1 /*
2 Copyright (c) 2009-2020, Intel Corporation
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6 
7     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9     * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10 
11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 */
13 // written by Roman Dementiev
14 //            Otto Bruggeman
15 //            Thomas Willhalm
16 //            Pat Fay
17 //            Austen Ott
18 //            Jim Harris (FreeBSD)
19 
20 /*!     \file cpucounters.cpp
21         \brief The bulk of PCM implementation
22   */
23 
24 //#define PCM_TEST_FALLBACK_TO_ATOM
25 
26 #include <stdio.h>
27 #include <assert.h>
28 #ifdef PCM_EXPORTS
29 // pcm-lib.h includes cpucounters.h
30 #include "PCM-Lib_Win\pcm-lib.h"
31 #else
32 #include "cpucounters.h"
33 #endif
34 #include "msr.h"
35 #include "pci.h"
36 #include "types.h"
37 #include "utils.h"
38 #include "topology.h"
39 
40 #if defined (__FreeBSD__) || defined(__DragonFly__)
41 #include <sys/param.h>
42 #include <sys/module.h>
43 #include <sys/types.h>
44 #include <sys/sysctl.h>
45 #include <sys/sem.h>
46 #include <sys/ioccom.h>
47 #include <sys/cpuctl.h>
48 #include <machine/cpufunc.h>
49 #endif
50 
51 #ifdef _MSC_VER
52 #include <intrin.h>
53 #include <windows.h>
54 #include <comdef.h>
55 #include <tchar.h>
56 #include "winring0/OlsApiInit.h"
57 #include "PCM_Win/windriver.h"
58 #else
59 #include <pthread.h>
60 #if defined(__FreeBSD__) || (defined(__DragonFly__) && __DragonFly_version >= 400707)
61 #include <pthread_np.h>
62 #endif
63 #include <errno.h>
64 #include <sys/time.h>
65 #ifdef __linux__
66 #include <sys/mman.h>
67 #endif
68 #endif
69 
70 #include <string.h>
71 #include <limits>
72 #include <map>
73 #include <algorithm>
74 #include <thread>
75 #include <future>
76 #include <functional>
77 #include <queue>
78 #include <condition_variable>
79 #include <mutex>
80 #include <atomic>
81 
82 #ifdef __APPLE__
83 #include <sys/types.h>
84 #include <sys/sysctl.h>
85 #include <sys/sem.h>
86 #endif
87 
88 namespace pcm {
89 
90 #ifdef __APPLE__
91 // convertUnknownToInt is used in the safe sysctl call to convert an unknown size to an int
92 int convertUnknownToInt(size_t size, char* value);
93 #endif
94 
95 #undef PCM_DEBUG_TOPOLOGY // debug of topology enumeration routine
96 
97 // FreeBSD is much more restrictive about names for semaphores
98 #if defined (__FreeBSD__)
99 #define PCM_INSTANCE_LOCK_SEMAPHORE_NAME "/PCM_inst_lock"
100 #define PCM_NUM_INSTANCES_SEMAPHORE_NAME "/num_PCM_inst"
101 #else
102 #define PCM_INSTANCE_LOCK_SEMAPHORE_NAME "PCM inst lock"
103 #define PCM_NUM_INSTANCES_SEMAPHORE_NAME "Num PCM insts"
104 #endif
105 
106 #ifdef _MSC_VER
107 
108 HMODULE hOpenLibSys = NULL;
109 
110 #ifndef NO_WINRING
initWinRing0Lib()111 bool PCM::initWinRing0Lib()
112 {
113     const BOOL result = InitOpenLibSys(&hOpenLibSys);
114 
115     if (result == FALSE)
116     {
117         CloseHandle(hOpenLibSys);
118         hOpenLibSys = NULL;
119         return false;
120     }
121 
122     BYTE major, minor, revision, release;
123     GetDriverVersion(&major, &minor, &revision, &release);
124     wchar_t buffer[128];
125     swprintf_s(buffer, 128, _T("\\\\.\\WinRing0_%d_%d_%d"),(int)major,(int)minor, (int)revision);
126     restrictDriverAccess(buffer);
127 
128     return true;
129 }
130 #endif // NO_WINRING
131 
132 class InstanceLock
133 {
134     HANDLE Mutex;
135 
136     InstanceLock();
137 public:
InstanceLock(const bool global)138     InstanceLock(const bool global)
139     {
140         Mutex = CreateMutex(NULL, FALSE,
141             global?(L"Global\\Processor Counter Monitor instance create/destroy lock"):(L"Local\\Processor Counter Monitor instance create/destroy lock"));
142         // lock
143         WaitForSingleObject(Mutex, INFINITE);
144     }
~InstanceLock()145     ~InstanceLock()
146     {
147         // unlock
148         ReleaseMutex(Mutex);
149         CloseHandle(Mutex);
150     }
151 };
152 #else // Linux or Apple
153 
154 pthread_mutex_t processIntanceMutex = PTHREAD_MUTEX_INITIALIZER;
155 
156 class InstanceLock
157 {
158     const char * globalSemaphoreName;
159     sem_t * globalSemaphore;
160     bool global;
161 
162     InstanceLock();
163 public:
InstanceLock(const bool global_)164     InstanceLock(const bool global_) : globalSemaphoreName(PCM_INSTANCE_LOCK_SEMAPHORE_NAME), globalSemaphore(NULL), global(global_)
165     {
166         if(!global)
167         {
168             pthread_mutex_lock(&processIntanceMutex);
169             return;
170         }
171         umask(0);
172         while (1)
173         {
174             //sem_unlink(globalSemaphoreName); // temporary
175             globalSemaphore = sem_open(globalSemaphoreName, O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO, 1);
176             if (SEM_FAILED == globalSemaphore)
177             {
178               if (EACCES == errno)
179                 {
180                     std::cerr << "PCM Error, do not have permissions to open semaphores in /dev/shm/. Waiting one second and retrying...\n";
181                     sleep(1);
182                 }
183             }
184             else
185             {
186                 /*
187                 if (sem_post(globalSemaphore)) {
188                     perror("sem_post error");
189                 }
190                 */
191                 break;         // success
192             }
193         }
194         if (sem_wait(globalSemaphore)) {
195             perror("sem_wait error");
196         }
197     }
~InstanceLock()198     ~InstanceLock()
199     {
200         if(!global)
201         {
202             pthread_mutex_unlock(&processIntanceMutex);
203             return;
204         }
205         if (sem_post(globalSemaphore)) {
206             perror("sem_post error");
207         }
208     }
209 };
210 #endif // end of _MSC_VER else
211 
212 #if defined(__FreeBSD__)
213 #define cpu_set_t cpuset_t
214 #endif
215 
216 class TemporalThreadAffinity  // speedup trick for Linux, FreeBSD, DragonFlyBSD, Windows
217 {
218     TemporalThreadAffinity(); // forbiden
219 #if defined(__FreeBSD__) || (defined(__DragonFly__) && __DragonFly_version >= 400707)
220     cpu_set_t old_affinity;
221 
222 public:
TemporalThreadAffinity(uint32 core_id,bool checkStatus=true)223     TemporalThreadAffinity(uint32 core_id, bool checkStatus = true)
224     {
225         pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &old_affinity);
226 
227         cpu_set_t new_affinity;
228         CPU_ZERO(&new_affinity);
229         CPU_SET(core_id, &new_affinity);
230         const auto res = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &new_affinity);
231         if (res != 0 && checkStatus)
232         {
233             std::cerr << "ERROR: pthread_setaffinity_np for core " << core_id << " failed with code " << res << "\n";
234             throw std::exception();
235         }
236     }
~TemporalThreadAffinity()237     ~TemporalThreadAffinity()
238     {
239         pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &old_affinity);
240     }
supported() const241     bool supported() const { return true; }
242 
243 #elif defined(__linux__)
244     cpu_set_t * old_affinity;
245     static constexpr auto maxCPUs = 8192;
246     const size_t set_size;
247 
248 public:
TemporalThreadAffinity(const uint32 core_id,bool checkStatus=true)249     TemporalThreadAffinity(const uint32 core_id, bool checkStatus = true)
250         : set_size(CPU_ALLOC_SIZE(maxCPUs))
251     {
252         old_affinity = CPU_ALLOC(maxCPUs);
253         assert(old_affinity);
254         pthread_getaffinity_np(pthread_self(), set_size, old_affinity);
255 
256         cpu_set_t * new_affinity = CPU_ALLOC(maxCPUs);
257         assert(new_affinity);
258         CPU_ZERO_S(set_size, new_affinity);
259         CPU_SET_S(core_id, set_size, new_affinity);
260         const auto res = pthread_setaffinity_np(pthread_self(), set_size, new_affinity);
261         CPU_FREE(new_affinity);
262         if (res != 0 && checkStatus)
263         {
264             std::cerr << "ERROR: pthread_setaffinity_np for core " << core_id << " failed with code " << res << "\n";
265             throw std::exception();
266         }
267     }
~TemporalThreadAffinity()268     ~TemporalThreadAffinity()
269     {
270         pthread_setaffinity_np(pthread_self(), set_size, old_affinity);
271         CPU_FREE(old_affinity);
272     }
supported() const273     bool supported() const { return true; }
274 #elif defined(_MSC_VER)
275     ThreadGroupTempAffinity affinity;
276 public:
TemporalThreadAffinity(uint32 core,bool checkStatus=true)277     TemporalThreadAffinity(uint32 core, bool checkStatus = true) : affinity(core, checkStatus) {}
supported() const278     bool supported() const { return true; }
279 #else // not implemented for os x
280 public:
TemporalThreadAffinity(uint32)281     TemporalThreadAffinity(uint32) { }
TemporalThreadAffinity(uint32,bool)282     TemporalThreadAffinity(uint32, bool) {}
supported() const283     bool supported() const { return false;  }
284 #endif
285 };
286 
287 
288 PCM * PCM::instance = NULL;
289 
290 /*
291 static int bitCount(uint64 n)
292 {
293     int count = 0;
294     while (n)
295     {
296         count += static_cast<int>(n & 0x00000001);
297         n >>= static_cast<uint64>(1);
298     }
299     return count;
300 }
301 */
302 
getInstance()303 PCM * PCM::getInstance()
304 {
305     // no lock here
306     if (instance) return instance;
307 
308     InstanceLock lock(false);
309     if (instance) return instance;
310 
311     return instance = new PCM();
312 }
313 
extractCoreGenCounterValue(uint64 val)314 uint64 PCM::extractCoreGenCounterValue(uint64 val)
315 {
316     if (canUsePerf) return val;
317 
318     if(core_gen_counter_width)
319         return extract_bits(val, 0, core_gen_counter_width-1);
320 
321     return val;
322 }
323 
extractCoreFixedCounterValue(uint64 val)324 uint64 PCM::extractCoreFixedCounterValue(uint64 val)
325 {
326     if (canUsePerf) return val;
327 
328     if(core_fixed_counter_width)
329         return extract_bits(val, 0, core_fixed_counter_width-1);
330 
331     return val;
332 }
333 
extractUncoreGenCounterValue(uint64 val)334 uint64 PCM::extractUncoreGenCounterValue(uint64 val)
335 {
336     if(uncore_gen_counter_width)
337         return extract_bits(val, 0, uncore_gen_counter_width-1);
338 
339     return val;
340 }
341 
extractUncoreFixedCounterValue(uint64 val)342 uint64 PCM::extractUncoreFixedCounterValue(uint64 val)
343 {
344     if(uncore_fixed_counter_width)
345         return extract_bits(val, 0, uncore_fixed_counter_width-1);
346 
347     return val;
348 }
349 
extractQOSMonitoring(uint64 val)350 uint64 PCM::extractQOSMonitoring(uint64 val)
351 {
352     //Check if any of the error bit(63) or Unavailable bit(62) of the IA32_QM_CTR MSR are 1
353     if(val & (3ULL<<62))
354     {
355         // invalid reading
356         return static_cast<uint64>(PCM_INVALID_QOS_MONITORING_DATA);
357     }
358 
359     // valid reading
360     return extract_bits(val,0,61);
361 }
extractThermalHeadroom(uint64 val)362 int32 extractThermalHeadroom(uint64 val)
363 {
364     if(val & (1ULL<<31ULL))
365     {  // valid reading
366        return static_cast<int32>(extract_bits(val, 16, 22));
367     }
368 
369     // invalid reading
370     return static_cast<int32>(PCM_INVALID_THERMAL_HEADROOM);
371 }
372 
373 
374 uint64 get_frequency_from_cpuid();
375 
376 
377 
378 /* Adding the new version of cpuid with leaf and subleaf as an input */
pcm_cpuid(const unsigned leaf,const unsigned subleaf,PCM_CPUID_INFO & info)379 void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & info)
380 {
381     #ifdef _MSC_VER
382     __cpuidex(info.array, leaf, subleaf);
383     #else
384     __asm__ __volatile__ ("cpuid" : \
385                           "=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf), "c" (subleaf));
386     #endif
387 }
388 
readCoreCounterConfig(const bool complainAboutMSR)389 void PCM::readCoreCounterConfig(const bool complainAboutMSR)
390 {
391     if (max_cpuid >= 0xa)
392     {
393         // get counter related info
394         PCM_CPUID_INFO cpuinfo;
395         pcm_cpuid(0xa, cpuinfo);
396         perfmon_version = extract_bits_ui(cpuinfo.array[0], 0, 7);
397         core_gen_counter_num_max = extract_bits_ui(cpuinfo.array[0], 8, 15);
398         core_gen_counter_width = extract_bits_ui(cpuinfo.array[0], 16, 23);
399         if (perfmon_version > 1)
400         {
401             core_fixed_counter_num_max = extract_bits_ui(cpuinfo.array[3], 0, 4);
402             core_fixed_counter_width = extract_bits_ui(cpuinfo.array[3], 5, 12);
403         }
404         else if (1 == perfmon_version)
405         {
406             core_fixed_counter_num_max = 3;
407             core_fixed_counter_width = core_gen_counter_width;
408         }
409         if (isForceRTMAbortModeAvailable())
410         {
411             uint64 TSXForceAbort = 0;
412             if (MSR.empty())
413             {
414                 if (complainAboutMSR)
415                 {
416                     std::cerr << "PCM Error: Can't determine the number of available counters reliably because of no access to MSR.\n";
417                 }
418             }
419             else if (MSR[0]->read(MSR_TSX_FORCE_ABORT, &TSXForceAbort) == sizeof(uint64))
420             {
421                 TSXForceAbort &= 1;
422                 /*
423                     TSXForceAbort is 0 (default mode) => the number of useful gen counters is 3
424                     TSXForceAbort is 1                => the number of gen counters is unchanged
425                 */
426                 if (TSXForceAbort == 0)
427                 {
428                     core_gen_counter_num_max = 3;
429                 }
430             }
431             else
432             {
433                 std::cerr << "PCM Error: Can't determine the number of available counters reliably because reading MSR_TSX_FORCE_ABORT failed.\n";
434             }
435         }
436 #if defined(__linux__)
437         const auto env = std::getenv("PCM_NO_AWS_WORKAROUND");
438         auto aws_workaround = true;
439         if (env != nullptr && std::string(env) == std::string("1"))
440         {
441             aws_workaround = false;
442         }
443         if (aws_workaround == true && vm == true && linux_arch_perfmon == true && core_gen_counter_num_max > 3)
444         {
445             core_gen_counter_num_max = 3;
446             std::cerr << "INFO: Reducing the number of programmable counters to 3 to workaround the fixed cycle counter virtualization issue on AWS.\n";
447             std::cerr << "      You can disable the workaround by setting PCM_NO_AWS_WORKAROUND=1 environment variable\n";
448         }
449 #endif
450     }
451 }
452 
isFixedCounterSupported(unsigned c)453 bool PCM::isFixedCounterSupported(unsigned c)
454 {
455     if (max_cpuid >= 0xa)
456     {
457         PCM_CPUID_INFO cpuinfo;
458         pcm_cpuid(0xa, cpuinfo);
459 	return extract_bits_ui(cpuinfo.reg.ecx, c, c) || (extract_bits_ui(cpuinfo.reg.edx, 4, 0) > c);
460     }
461     return false;
462 }
463 
isHWTMAL1Supported() const464 bool PCM::isHWTMAL1Supported() const
465 {
466     static int supported = -1;
467     if (supported < 0)
468     {
469         supported = 0;
470         PCM_CPUID_INFO cpuinfo;
471         pcm_cpuid(1, cpuinfo);
472         if (extract_bits_ui(cpuinfo.reg.ecx, 15, 15) && MSR.size())
473         {
474             uint64 perf_cap;
475             if (MSR[0]->read(MSR_PERF_CAPABILITIES, &perf_cap) == sizeof(uint64))
476             {
477                 supported = (int)extract_bits(perf_cap, 15, 15);
478             }
479         }
480     }
481     return supported > 0;
482 }
483 
readCPUMicrocodeLevel()484 void PCM::readCPUMicrocodeLevel()
485 {
486     if (MSR.empty()) return;
487     const int ref_core = 0;
488     TemporalThreadAffinity affinity(ref_core);
489     if (affinity.supported() && isCoreOnline(ref_core))
490     {   // see "Update Signature and Verification" and "Determining the Signature"
491         // sections in Intel SDM how to read ucode level
492         if (MSR[ref_core]->write(MSR_IA32_BIOS_SIGN_ID, 0) == sizeof(uint64))
493         {
494             PCM_CPUID_INFO cpuinfo;
495             pcm_cpuid(1, cpuinfo); // cpuid instructions updates MSR_IA32_BIOS_SIGN_ID
496             uint64 result = 0;
497             if (MSR[ref_core]->read(MSR_IA32_BIOS_SIGN_ID, &result) == sizeof(uint64))
498             {
499                 cpu_microcode_level = result >> 32;
500             }
501         }
502     }
503 }
504 
getMaxCustomCoreEvents()505 int32 PCM::getMaxCustomCoreEvents()
506 {
507     return core_gen_counter_num_max;
508 }
509 
detectModel()510 bool PCM::detectModel()
511 {
512     char buffer[1024];
513     union {
514         char cbuf[16];
515         int  ibuf[16 / sizeof(int)];
516     } buf;
517     PCM_CPUID_INFO cpuinfo;
518     pcm_cpuid(0, cpuinfo);
519     memset(buffer, 0, 1024);
520     memset(buf.cbuf, 0, 16);
521     buf.ibuf[0] = cpuinfo.array[1];
522     buf.ibuf[1] = cpuinfo.array[3];
523     buf.ibuf[2] = cpuinfo.array[2];
524     if (strncmp(buf.cbuf, "GenuineIntel", 4 * 3) != 0)
525     {
526         std::cerr << getUnsupportedMessage() << "\n";
527         return false;
528     }
529     max_cpuid = cpuinfo.array[0];
530 
531     pcm_cpuid(1, cpuinfo);
532     cpu_family = (((cpuinfo.array[0]) >> 8) & 0xf) | ((cpuinfo.array[0] & 0xf00000) >> 16);
533     cpu_model = (((cpuinfo.array[0]) & 0xf0) >> 4) | ((cpuinfo.array[0] & 0xf0000) >> 12);
534     cpu_stepping = cpuinfo.array[0] & 0x0f;
535 
536     if (cpuinfo.reg.ecx & (1UL << 31UL)) {
537         vm = true;
538         std::cerr << "Detected a hypervisor/virtualization technology. Some metrics might not be available due to configuration or availability of virtual hardware features.\n";
539     }
540 
541     readCoreCounterConfig();
542 
543     if (cpu_family != 6)
544     {
545         std::cerr << getUnsupportedMessage() << " CPU Family: " << cpu_family << "\n";
546         return false;
547     }
548 
549     pcm_cpuid(7, 0, cpuinfo);
550 
551 #ifdef __linux__
552     auto checkLinuxCpuinfoFlag = [](const std::string& flag) -> bool
553     {
554         std::ifstream linuxCpuinfo("/proc/cpuinfo");
555         if (linuxCpuinfo.is_open())
556         {
557             std::string line;
558             while (std::getline(linuxCpuinfo, line))
559             {
560                 auto tokens = split(line, ':');
561                 if (tokens.size() >= 2 && tokens[0].find("flags") == 0)
562                 {
563                     for (auto curFlag : split(tokens[1], ' '))
564                     {
565                         if (flag == curFlag)
566                         {
567                             return true;
568                         }
569                     }
570                 }
571             }
572             linuxCpuinfo.close();
573         }
574         return false;
575     };
576     linux_arch_perfmon = checkLinuxCpuinfoFlag("arch_perfmon");
577     std::cerr << "Linux arch_perfmon flag  : " << (linux_arch_perfmon ? "yes" : "no") << "\n";
578     if (vm == true && linux_arch_perfmon == false)
579     {
580         std::cerr << "ERROR: vPMU is not enabled in the hypervisor. Please see details in https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/set-up-analysis-target/on-virtual-machine.html \n";
581         std::cerr << "       you can force-continue by setting PCM_IGNORE_ARCH_PERFMON=1 environment variable.\n";
582         auto env = std::getenv("PCM_IGNORE_ARCH_PERFMON");
583         auto ignore_arch_perfmon = false;
584         if (env != nullptr && std::string(env) == std::string("1"))
585         {
586             ignore_arch_perfmon = true;
587         }
588         if (!ignore_arch_perfmon)
589         {
590             return false;
591         }
592     }
593 #endif
594 
595     std::cerr << "IBRS and IBPB supported  : " << ((cpuinfo.reg.edx & (1 << 26)) ? "yes" : "no") << "\n";
596     std::cerr << "STIBP supported          : " << ((cpuinfo.reg.edx & (1 << 27)) ? "yes" : "no") << "\n";
597     std::cerr << "Spec arch caps supported : " << ((cpuinfo.reg.edx & (1 << 29)) ? "yes" : "no") << "\n";
598 
599     return true;
600 }
601 
isRDTDisabled() const602 bool PCM::isRDTDisabled() const
603 {
604     static int flag = -1;
605     if (flag < 0)
606     {
607         // flag not yet initialized
608         const char * varname = "PCM_NO_RDT";
609         char* env = nullptr;
610 #ifdef _MSC_VER
611         _dupenv_s(&env, NULL, varname);
612 #else
613         env = std::getenv(varname);
614 #endif
615         if (env != nullptr && std::string(env) == std::string("1"))
616         {
617             std::cout << "Disabling RDT usage because PCM_NO_RDT=1 environment variable is set.\n";
618             flag = 1;
619         }
620         else
621         {
622             flag = 0;
623         }
624 #ifdef _MSC_VER
625         free(env);
626 #endif
627     }
628     return flag > 0;
629 }
630 
QOSMetricAvailable() const631 bool PCM::QOSMetricAvailable() const
632 {
633     if (isRDTDisabled()) return false;
634 #ifndef __linux__
635     if (isSecureBoot()) return false;
636 #endif
637     PCM_CPUID_INFO cpuinfo;
638     pcm_cpuid(0x7,0,cpuinfo);
639     return (cpuinfo.reg.ebx & (1<<12))?true:false;
640 }
641 
L3QOSMetricAvailable() const642 bool PCM::L3QOSMetricAvailable() const
643 {
644     if (isRDTDisabled()) return false;
645 #ifndef __linux__
646     if (isSecureBoot()) return false;
647 #endif
648     PCM_CPUID_INFO cpuinfo;
649     pcm_cpuid(0xf,0,cpuinfo);
650     return (cpuinfo.reg.edx & (1<<1))?true:false;
651 }
652 
L3CacheOccupancyMetricAvailable() const653 bool PCM::L3CacheOccupancyMetricAvailable() const
654 {
655     PCM_CPUID_INFO cpuinfo;
656     if (!(QOSMetricAvailable() && L3QOSMetricAvailable()))
657         return false;
658     pcm_cpuid(0xf,0x1,cpuinfo);
659     return (cpuinfo.reg.edx & 1)?true:false;
660 }
661 
CoreLocalMemoryBWMetricAvailable() const662 bool PCM::CoreLocalMemoryBWMetricAvailable() const
663 {
664     if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata
665     PCM_CPUID_INFO cpuinfo;
666     if (!(QOSMetricAvailable() && L3QOSMetricAvailable()))
667             return false;
668     pcm_cpuid(0xf,0x1,cpuinfo);
669     return (cpuinfo.reg.edx & 2)?true:false;
670 }
671 
CoreRemoteMemoryBWMetricAvailable() const672 bool PCM::CoreRemoteMemoryBWMetricAvailable() const
673 {
674     if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata
675     PCM_CPUID_INFO cpuinfo;
676     if (!(QOSMetricAvailable() && L3QOSMetricAvailable()))
677         return false;
678     pcm_cpuid(0xf, 0x1, cpuinfo);
679     return (cpuinfo.reg.edx & 4) ? true : false;
680 }
681 
getMaxRMID() const682 unsigned PCM::getMaxRMID() const
683 {
684     unsigned maxRMID = 0;
685     PCM_CPUID_INFO cpuinfo;
686     pcm_cpuid(0xf,0,cpuinfo);
687     maxRMID = (unsigned)cpuinfo.reg.ebx + 1;
688     return maxRMID;
689 }
690 
initRDT()691 void PCM::initRDT()
692 {
693     if (!(QOSMetricAvailable() && L3QOSMetricAvailable()))
694         return;
695 #ifdef __linux__
696     auto env = std::getenv("PCM_USE_RESCTRL");
697     if (env != nullptr && std::string(env) == std::string("1"))
698     {
699         std::cout << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because environment variable PCM_USE_RESCTRL=1\n";
700         resctrl.init();
701         useResctrl = true;
702         return;
703     }
704     if (resctrl.isMounted())
705     {
706         std::cout << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because resctrl driver is mounted.\n";
707         resctrl.init();
708         useResctrl = true;
709         return;
710     }
711     if (isSecureBoot())
712     {
713         std::cout << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because Secure Boot mode is enabled.\n";
714         resctrl.init();
715         useResctrl = true;
716         return;
717     }
718 #endif
719     std::cout << "Initializing RMIDs" << std::endl;
720     unsigned maxRMID;
721     /* Calculate maximum number of RMID supported by socket */
722     maxRMID = getMaxRMID();
723     // std::cout << "Maximum RMIDs per socket in the system : " << maxRMID << "\n";
724     std::vector<uint32> rmid(num_sockets);
725     for(int32 i = 0; i < num_sockets; i ++)
726             rmid[i] = maxRMID - 1;
727 
728     /* Associate each core with 1 RMID */
729     for(int32 core = 0; core < num_cores; core ++ )
730     {
731         if(!isCoreOnline(core)) continue;
732 
733         uint64 msr_pqr_assoc = 0 ;
734         uint64 msr_qm_evtsel = 0 ;
735                 MSR[core]->lock();
736         //Read 0xC8F MSR for each core
737         MSR[core]->read(IA32_PQR_ASSOC, &msr_pqr_assoc);
738         //std::cout << "initRMID reading IA32_PQR_ASSOC 0x" << std::hex << msr_pqr_assoc << std::dec << "\n";
739 
740         //std::cout << "Socket Id : " << topology[core].socket;
741         msr_pqr_assoc &= 0xffffffff00000000ULL;
742         msr_pqr_assoc |= (uint64)(rmid[topology[core].socket] & ((1ULL<<10)-1ULL));
743         //std::cout << "initRMID writing IA32_PQR_ASSOC 0x" << std::hex << msr_pqr_assoc << std::dec << "\n";
744         //Write 0xC8F MSR with new RMID for each core
745         MSR[core]->write(IA32_PQR_ASSOC,msr_pqr_assoc);
746 
747         msr_qm_evtsel = static_cast<uint64>(rmid[topology[core].socket] & ((1ULL<<10)-1ULL));
748         msr_qm_evtsel <<= 32;
749         //Write 0xC8D MSR with new RMID for each core
750         //std::cout << "initRMID writing IA32_QM_EVTSEL 0x" << std::hex << msr_qm_evtsel << std::dec << "\n";
751         MSR[core]->write(IA32_QM_EVTSEL,msr_qm_evtsel);
752                 MSR[core]->unlock();
753 
754         /* Initializing the memory bandwidth counters */
755         if (CoreLocalMemoryBWMetricAvailable())
756         {
757             memory_bw_local.push_back(std::make_shared<CounterWidthExtender>(new CounterWidthExtender::MBLCounter(MSR[core]), 24, 1000));
758             if (CoreRemoteMemoryBWMetricAvailable())
759             {
760                 memory_bw_total.push_back(std::make_shared<CounterWidthExtender>(new CounterWidthExtender::MBTCounter(MSR[core]), 24, 1000));
761             }
762         }
763         rmid[topology[core].socket] --;
764         //std::cout << std::flush; // Explicitly flush after each iteration
765     }
766     /* Get The scaling factor by running CPUID.0xF.0x1 instruction */
767     L3ScalingFactor = getL3ScalingFactor();
768 }
769 
initQOSevent(const uint64 event,const int32 core)770 void PCM::initQOSevent(const uint64 event, const int32 core)
771 {
772    if(!isCoreOnline(core)) return;
773    uint64 msr_qm_evtsel = 0 ;
774    //Write 0xC8D MSR with the event id
775    MSR[core]->read(IA32_QM_EVTSEL, &msr_qm_evtsel);
776    //std::cout << "initQOSevent reading IA32_QM_EVTSEL 0x" << std::hex << msr_qm_evtsel << std::dec << "\n";
777    msr_qm_evtsel &= 0xfffffffffffffff0ULL;
778    msr_qm_evtsel |= event & ((1ULL<<8)-1ULL);
779    //std::cout << "initQOSevent writing IA32_QM_EVTSEL 0x" << std::hex << msr_qm_evtsel << std::dec << "\n";
780    MSR[core]->write(IA32_QM_EVTSEL,msr_qm_evtsel);
781    //std::cout << std::flush;
782 }
783 
784 
initCStateSupportTables()785 void PCM::initCStateSupportTables()
786 {
787 #define PCM_PARAM_PROTECT(...) __VA_ARGS__
788 #define PCM_CSTATE_ARRAY(array_ , val ) \
789     { \
790         static uint64 tmp[] = val; \
791         PCM_COMPILE_ASSERT(sizeof(tmp) / sizeof(uint64) == (static_cast<int>(MAX_C_STATE)+1)); \
792         array_ = tmp; \
793         break; \
794     }
795 
796     // fill package C state array
797     switch(cpu_model)
798     {
799         case ATOM:
800         case ATOM_2:
801         case CENTERTON:
802         case AVOTON:
803         case BAYTRAIL:
804         case CHERRYTRAIL:
805         case APOLLO_LAKE:
806         case DENVERTON:
807 	case SNOWRIDGE:
808             PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x3F8, 0, 0x3F9, 0, 0x3FA, 0, 0, 0, 0 }) );
809         case NEHALEM_EP:
810         case NEHALEM:
811         case CLARKDALE:
812         case WESTMERE_EP:
813         case NEHALEM_EX:
814         case WESTMERE_EX:
815             PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3F8, 0, 0, 0x3F9, 0x3FA, 0, 0, 0}) );
816         case SANDY_BRIDGE:
817         case JAKETOWN:
818         case IVY_BRIDGE:
819         case IVYTOWN:
820             PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0x3F8, 0, 0, 0x3F9, 0x3FA, 0, 0, 0}) );
821         case HASWELL:
822         case HASWELL_2:
823         case HASWELLX:
824         case BDX_DE:
825         case BDX:
826         case KNL:
827             PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0x3F8, 0, 0, 0x3F9,  0x3FA, 0, 0, 0}) );
828         case SKX:
829         case ICX:
830             PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0, 0, 0, 0x3F9, 0, 0, 0, 0}) );
831         case HASWELL_ULT:
832         case BROADWELL:
833         PCM_SKL_PATH_CASES
834         case BROADWELL_XEON_E3:
835             PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0x3F8, 0, 0, 0x3F9, 0x3FA, 0x630, 0x631, 0x632}) );
836 
837         default:
838             std::cerr << "PCM error: package C-states support array is not initialized. Package C-states metrics will not be shown.\n";
839             PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }) );
840     };
841 
842     // fill core C state array
843     switch(cpu_model)
844     {
845         case ATOM:
846         case ATOM_2:
847         case CENTERTON:
848             PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }) );
849         case NEHALEM_EP:
850         case NEHALEM:
851         case CLARKDALE:
852         case WESTMERE_EP:
853         case NEHALEM_EX:
854         case WESTMERE_EX:
855             PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3FC, 0, 0, 0x3FD, 0, 0, 0, 0}) );
856         case SANDY_BRIDGE:
857         case JAKETOWN:
858         case IVY_BRIDGE:
859         case IVYTOWN:
860         case HASWELL:
861         case HASWELL_2:
862         case HASWELL_ULT:
863         case HASWELLX:
864         case BDX_DE:
865         case BDX:
866         case BROADWELL:
867         case BROADWELL_XEON_E3:
868         case BAYTRAIL:
869         case AVOTON:
870         case CHERRYTRAIL:
871         case APOLLO_LAKE:
872         case DENVERTON:
873         PCM_SKL_PATH_CASES
874 	case SNOWRIDGE:
875         case ICX:
876             PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3FC, 0, 0, 0x3FD, 0x3FE, 0, 0, 0}) );
877         case KNL:
878             PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0, 0, 0, 0x3FF, 0, 0, 0, 0}) );
879         case SKX:
880             PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0, 0, 0, 0x3FD, 0, 0, 0, 0}) );
881         default:
882             std::cerr << "PCM error: core C-states support array is not initialized. Core C-states metrics will not be shown.\n";
883             PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }) );
884     };
885 }
886 
887 
888 #ifdef __linux__
tryOpen(const char * path,const char * mode)889 FILE * tryOpen(const char * path, const char * mode)
890 {
891     FILE * f = fopen(path, mode);
892     if (!f)
893     {
894         f = fopen((std::string("/pcm") + path).c_str(), mode);
895     }
896     return f;
897 }
898 
readSysFS(const char * path,bool silent=false)899 std::string readSysFS(const char * path, bool silent = false)
900 {
901     FILE * f = tryOpen(path, "r");
902     if (!f)
903     {
904         if (silent == false) std::cerr << "ERROR: Can not open " << path << " file.\n";
905         return std::string();
906     }
907     char buffer[1024];
908     if(NULL == fgets(buffer, 1024, f))
909     {
910         if (silent == false) std::cerr << "ERROR: Can not read from " << path << ".\n";
911         fclose(f);
912         return std::string();
913     }
914     fclose(f);
915     return std::string(buffer);
916 }
917 
writeSysFS(const char * path,const std::string & value,bool silent=false)918 bool writeSysFS(const char * path, const std::string & value, bool silent = false)
919 {
920     FILE * f = tryOpen(path, "w");
921     if (!f)
922     {
923         if (silent == false) std::cerr << "ERROR: Can not open " << path << " file.\n";
924         return false;
925     }
926     if (fputs(value.c_str(), f) < 0)
927     {
928         if (silent == false) std::cerr << "ERROR: Can not write to " << path << ".\n";
929         fclose(f);
930         return false;
931     }
932     fclose(f);
933     return true;
934 }
935 
readMaxFromSysFS(const char * path)936 int readMaxFromSysFS(const char * path)
937 {
938     std::string content = readSysFS(path);
939     const char * buffer = content.c_str();
940     int result = -1;
941     pcm_sscanf(buffer) >> s_expect("0-") >> result;
942     if(result == -1)
943     {
944        pcm_sscanf(buffer) >> result;
945     }
946     return result;
947 }
948 
949 constexpr auto perfSlotsPath = "/sys/bus/event_source/devices/cpu/events/slots";
950 constexpr auto perfBadSpecPath = "/sys/bus/event_source/devices/cpu/events/topdown-bad-spec";
951 constexpr auto perfBackEndPath = "/sys/bus/event_source/devices/cpu/events/topdown-be-bound";
952 constexpr auto perfFrontEndPath = "/sys/bus/event_source/devices/cpu/events/topdown-fe-bound";
953 constexpr auto perfRetiringPath = "/sys/bus/event_source/devices/cpu/events/topdown-retiring";
954 
perfSupportsTopDown()955 bool perfSupportsTopDown()
956 {
957     static int yes = -1;
958     if (-1 == yes)
959     {
960         const auto slots = readSysFS(perfSlotsPath, true);
961         const auto bad = readSysFS(perfBadSpecPath, true);
962         const auto be = readSysFS(perfBackEndPath, true);
963         const auto fe = readSysFS(perfFrontEndPath, true);
964         const auto ret = readSysFS(perfRetiringPath, true);
965         yes = (slots.size() && bad.size() && be.size() && fe.size() && ret.size()) ? 1 : 0;
966     }
967     return 1 == yes;
968 }
969 
970 #endif
971 
discoverSystemTopology()972 bool PCM::discoverSystemTopology()
973 {
974     typedef std::map<uint32, uint32> socketIdMap_type;
975     socketIdMap_type socketIdMap;
976 
977     PCM_CPUID_INFO cpuid_args;
978     // init constants for CPU topology leaf 0xB
979     // adapted from Topology Enumeration Reference code for Intel 64 Architecture
980     // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
981     int wasCoreReported = 0, wasThreadReported = 0;
982     int subleaf = 0, levelType, levelShift;
983     //uint32 coreSelectMask = 0, smtSelectMask = 0;
984     uint32 smtMaskWidth = 0;
985     //uint32 pkgSelectMask = (-1), pkgSelectMaskShift = 0;
986     uint32 corePlusSMTMaskWidth = 0;
987     uint32 coreMaskWidth = 0;
988 
989     {
990         TemporalThreadAffinity aff0(0);
991         do
992         {
993             pcm_cpuid(0xb, subleaf, cpuid_args);
994             if (cpuid_args.array[1] == 0)
995             { // if EBX ==0 then this subleaf is not valid, we can exit the loop
996                 break;
997             }
998             levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
999             levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
1000             switch (levelType)
1001             {
1002             case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
1003                 smtMaskWidth = levelShift;
1004                 wasThreadReported = 1;
1005                 break;
1006             case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
1007                 corePlusSMTMaskWidth = levelShift;
1008                 wasCoreReported = 1;
1009                 break;
1010             default:
1011                 break;
1012             }
1013             subleaf++;
1014         } while (1);
1015     }
1016 
1017     if (wasThreadReported && wasCoreReported)
1018     {
1019         coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
1020     }
1021     else if (!wasCoreReported && wasThreadReported)
1022     {
1023         coreMaskWidth = smtMaskWidth;
1024     }
1025     else
1026     {
1027         std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
1028         return false;
1029     }
1030 
1031     uint32 l2CacheMaskShift = 0;
1032 #ifdef PCM_DEBUG_TOPOLOGY
1033     uint32 threadsSharingL2;
1034 #endif
1035     uint32 l2CacheMaskWidth;
1036 
1037     pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
1038     l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
1039 #ifdef PCM_DEBUG_TOPOLOGY
1040     threadsSharingL2 = l2CacheMaskWidth;
1041 #endif
1042     for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
1043     {
1044         l2CacheMaskShift++;
1045     }
1046 #ifdef PCM_DEBUG_TOPOLOGY
1047     std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
1048               << " [the most significant bit = " << l2CacheMaskShift << "]\n";
1049 #endif
1050 
1051     auto populateEntry = [&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry & entry, const int apic_id)
1052     {
1053         entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0;
1054         entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0;
1055         entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31);
1056         entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
1057     };
1058 
1059 #ifdef _MSC_VER
1060 // version for Windows 7 and later version
1061 
1062     char * slpi = new char[sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)];
1063     DWORD len = (DWORD)sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX);
1064     BOOL res = GetLogicalProcessorInformationEx(RelationAll, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)slpi, &len);
1065 
1066     while (res == FALSE)
1067     {
1068         delete[] slpi;
1069 
1070         if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
1071         {
1072             slpi = new char[len];
1073             res = GetLogicalProcessorInformationEx(RelationAll, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)slpi, &len);
1074         }
1075         else
1076         {
1077             std::wcerr << "Error in Windows function 'GetLogicalProcessorInformationEx': " <<
1078                 GetLastError() << " ";
1079             const TCHAR * strError = _com_error(GetLastError()).ErrorMessage();
1080             if (strError) std::wcerr << strError;
1081             std::wcerr << "\n";
1082             return false;
1083         }
1084     }
1085 
1086     char * base_slpi = slpi;
1087     PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pi = NULL;
1088 
1089     for ( ; slpi < base_slpi + len; slpi += (DWORD)pi->Size)
1090     {
1091         pi = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)slpi;
1092         if (pi->Relationship == RelationProcessorCore)
1093         {
1094             threads_per_core = (pi->Processor.Flags == LTP_PC_SMT) ? 2 : 1;
1095             // std::cout << "thr per core: " << threads_per_core << "\n";
1096             num_cores += threads_per_core;
1097         }
1098     }
1099     // std::cout << std::flush;
1100 
1101     num_online_cores = num_cores;
1102 
1103     if (num_cores != GetActiveProcessorCount(ALL_PROCESSOR_GROUPS))
1104     {
1105         std::cerr << "Error in processor group size counting: " << num_cores << "!=" << GetActiveProcessorCount(ALL_PROCESSOR_GROUPS) << "\n";
1106         std::cerr << "Make sure your binary is compiled for 64-bit: using 'x64' platform configuration.\n";
1107         return false;
1108     }
1109 
1110     for (int i = 0; i < (int)num_cores; i++)
1111     {
1112         ThreadGroupTempAffinity affinity(i);
1113 
1114         pcm_cpuid(0xb, 0x0, cpuid_args);
1115 
1116         int apic_id = cpuid_args.array[3];
1117 
1118         TopologyEntry entry;
1119         entry.os_id = i;
1120 
1121         populateEntry(entry, apic_id);
1122 
1123         topology.push_back(entry);
1124         socketIdMap[entry.socket] = 0;
1125     }
1126 
1127     delete[] base_slpi;
1128 
1129 #else
1130     // for Linux, Mac OS, FreeBSD and DragonFlyBSD
1131 
1132     TopologyEntry entry;
1133 
1134 #ifdef __linux__
1135     num_cores = readMaxFromSysFS("/sys/devices/system/cpu/present");
1136     if(num_cores == -1)
1137     {
1138       std::cerr << "Cannot read number of present cores\n";
1139       return false;
1140     }
1141     ++num_cores;
1142 
1143     // open /proc/cpuinfo
1144     FILE * f_cpuinfo = fopen("/proc/cpuinfo", "r");
1145     if (!f_cpuinfo)
1146     {
1147         std::cerr << "Cannot open /proc/cpuinfo file.\n";
1148         return false;
1149     }
1150 
1151     // map with key=pkg_apic_id (not necessarily zero based or sequential) and
1152     // associated value=socket_id that should be 0 based and sequential
1153     std::map<int, int> found_pkg_ids;
1154     topology.resize(num_cores);
1155     char buffer[1024];
1156     while (0 != fgets(buffer, 1024, f_cpuinfo))
1157     {
1158         if (strncmp(buffer, "processor", sizeof("processor") - 1) == 0)
1159         {
1160             pcm_sscanf(buffer) >> s_expect("processor\t: ") >> entry.os_id;
1161             //std::cout << "os_core_id: " << entry.os_id << "\n";
1162             TemporalThreadAffinity _(entry.os_id);
1163             pcm_cpuid(0xb, 0x0, cpuid_args);
1164             int apic_id = cpuid_args.array[3];
1165 
1166             populateEntry(entry, apic_id);
1167 
1168             topology[entry.os_id] = entry;
1169             socketIdMap[entry.socket] = 0;
1170             ++num_online_cores;
1171         }
1172     }
1173     //std::cout << std::flush;
1174     fclose(f_cpuinfo);
1175 
1176     // produce debug output similar to Intel MPI cpuinfo
1177 #ifdef PCM_DEBUG_TOPOLOGY
1178     std::cerr << "=====  Processor identification  =====\n";
1179     std::cerr << "Processor       Thread Id.      Core Id.        Tile Id.        Package Id.\n";
1180     std::map<uint32, std::vector<uint32> > os_id_by_core, os_id_by_tile, core_id_by_socket;
1181     for(auto it = topology.begin(); it != topology.end(); ++it)
1182     {
1183         std::cerr << std::left << std::setfill(' ')
1184                   << std::setw(16) << it->os_id
1185                   << std::setw(16) << it->thread_id
1186                   << std::setw(16) << it->core_id
1187                   << std::setw(16) << it->tile_id
1188                   << std::setw(16) << it->socket
1189                   << "\n";
1190         if(std::find(core_id_by_socket[it->socket].begin(), core_id_by_socket[it->socket].end(), it->core_id)
1191                 == core_id_by_socket[it->socket].end())
1192             core_id_by_socket[it->socket].push_back(it->core_id);
1193         // add socket offset to distinguish cores and tiles from different sockets
1194         os_id_by_core[(it->socket << 15) + it->core_id].push_back(it->os_id);
1195         os_id_by_tile[(it->socket << 15) + it->tile_id].push_back(it->os_id);
1196     }
1197     std::cerr << "=====  Placement on packages  =====\n";
1198     std::cerr << "Package Id.    Core Id.     Processors\n";
1199     for(auto pkg = core_id_by_socket.begin(); pkg != core_id_by_socket.end(); ++pkg)
1200     {
1201         auto core_id = pkg->second.begin();
1202         std::cerr << std::left << std::setfill(' ') << std::setw(15) << pkg->first << *core_id;
1203         for(++core_id; core_id != pkg->second.end(); ++core_id)
1204         {
1205             std::cerr << "," << *core_id;
1206         }
1207         std::cerr << "\n";
1208     }
1209     std::cerr << "\n=====  Core/Tile sharing  =====\n";
1210     std::cerr << "Level      Processors\nCore       ";
1211     for(auto core = os_id_by_core.begin(); core != os_id_by_core.end(); ++core)
1212     {
1213         auto os_id = core->second.begin();
1214         std::cerr << "(" << *os_id;
1215         for(++os_id; os_id != core->second.end(); ++os_id) {
1216             std::cerr << "," << *os_id;
1217         }
1218         std::cerr << ")";
1219     }
1220     std::cerr << "\nTile / L2$ ";
1221     for(auto core = os_id_by_tile.begin(); core != os_id_by_tile.end(); ++core)
1222     {
1223         auto os_id = core->second.begin();
1224         std::cerr << "(" << *os_id;
1225         for(++os_id; os_id != core->second.end(); ++os_id) {
1226             std::cerr << "," << *os_id;
1227         }
1228         std::cerr << ")";
1229     }
1230     std::cerr << "\n";
1231 #endif // PCM_DEBUG_TOPOLOGY
1232 #elif defined(__FreeBSD__) || defined(__DragonFly__)
1233 
1234     size_t size = sizeof(num_cores);
1235     cpuctl_cpuid_args_t cpuid_args_freebsd;
1236     int fd;
1237 
1238     if(0 != sysctlbyname("hw.ncpu", &num_cores, &size, NULL, 0))
1239     {
1240         std::cerr << "Unable to get hw.ncpu from sysctl.\n";
1241         return false;
1242     }
1243     num_online_cores = num_cores;
1244 
1245     if (modfind("cpuctl") == -1)
1246     {
1247         std::cerr << "cpuctl(4) not loaded.\n";
1248         return false;
1249     }
1250 
1251     for (int i = 0; i < num_cores; i++)
1252     {
1253         char cpuctl_name[64];
1254         int apic_id;
1255 
1256         snprintf(cpuctl_name, 64, "/dev/cpuctl%d", i);
1257         fd = ::open(cpuctl_name, O_RDWR);
1258 
1259         cpuid_args_freebsd.level = 0xb;
1260 
1261         ::ioctl(fd, CPUCTL_CPUID, &cpuid_args_freebsd);
1262 
1263         apic_id = cpuid_args_freebsd.data[3];
1264 
1265         entry.os_id = i;
1266 
1267         populateEntry(entry, apic_id);
1268 
1269         if (entry.socket == 0 && entry.core_id == 0) ++threads_per_core;
1270 
1271         topology.push_back(entry);
1272         socketIdMap[entry.socket] = 0;
1273     }
1274 
1275 #else // Getting processor info for Mac OS
1276 #define SAFE_SYSCTLBYNAME(message, ret_value)                                                              \
1277     {                                                                                                      \
1278         size_t size;                                                                                       \
1279         char *pParam;                                                                                      \
1280         if(0 != sysctlbyname(message, NULL, &size, NULL, 0))                                               \
1281         {                                                                                                  \
1282             std::cerr << "Unable to determine size of " << message << " sysctl return type.\n";            \
1283             return false;                                                                                  \
1284         }                                                                                                  \
1285         if(NULL == (pParam = (char *)malloc(size)))                                                        \
1286         {                                                                                                  \
1287             std::cerr << "Unable to allocate memory for " << message << "\n";                              \
1288             return false;                                                                                  \
1289         }                                                                                                  \
1290         if(0 != sysctlbyname(message, (void*)pParam, &size, NULL, 0))                                      \
1291         {                                                                                                  \
1292             std::cerr << "Unable to get " << message << " from sysctl.\n";                                 \
1293             return false;                                                                                  \
1294         }                                                                                                  \
1295         ret_value = convertUnknownToInt(size, pParam);                                                     \
1296         free(pParam);                                                                                      \
1297     }
1298 // End SAFE_SYSCTLBYNAME
1299 
1300     // Using OSXs sysctl to get the number of CPUs right away
1301     SAFE_SYSCTLBYNAME("hw.logicalcpu", num_cores)
1302     num_online_cores = num_cores;
1303 
1304 #undef SAFE_SYSCTLBYNAME
1305 
1306     // The OSX version needs the MSR handle earlier so that it can build the CPU topology.
1307     // This topology functionality should potentially go into a different KEXT
1308     for(int i = 0; i < num_cores; i++)
1309     {
1310         MSR.push_back(std::make_shared<SafeMsrHandle>(i));
1311     }
1312 
1313     TopologyEntry *entries = new TopologyEntry[num_cores];
1314     MSR[0]->buildTopology(num_cores, entries);
1315     for(int i = 0; i < num_cores; i++){
1316         socketIdMap[entries[i].socket] = 0;
1317         if(entries[i].os_id >= 0)
1318         {
1319             if(entries[i].core_id == 0 && entries[i].socket == 0) ++threads_per_core;
1320             topology.push_back(entries[i]);
1321         }
1322     }
1323     delete[] entries;
1324 // End of OSX specific code
1325 #endif // end of ifndef __APPLE__
1326 
1327 #endif //end of ifdef _MSC_VER
1328 
1329     if(num_cores == 0) {
1330         num_cores = (int32)topology.size();
1331     }
1332     if(num_sockets == 0) {
1333         num_sockets = (int32)(std::max)(socketIdMap.size(), (size_t)1);
1334     }
1335 
1336     socketIdMap_type::iterator s = socketIdMap.begin();
1337     for (uint32 sid = 0; s != socketIdMap.end(); ++s)
1338     {
1339         s->second = sid++;
1340         // first is apic id, second is logical socket id
1341         systemTopology->addSocket( s->first, s->second );
1342     }
1343 
1344     for (int32 cid = 0; cid < num_cores; ++cid)
1345     {
1346         //std::cerr << "Cid: " << cid << "\n";
1347         systemTopology->addThread( cid, topology[cid] );
1348     }
1349 
1350     // All threads are here now so we can set the refCore for a socket
1351     for ( auto socket : systemTopology->sockets() )
1352         socket->setRefCore();
1353 
1354     // use map to change apic socket id to the logical socket id
1355     for (int i = 0; (i < (int)num_cores) && (!socketIdMap.empty()); ++i)
1356     {
1357         if(isCoreOnline((int32)i))
1358           topology[i].socket = socketIdMap[topology[i].socket];
1359     }
1360 
1361 #if 0
1362     std::cerr << "Number of socket ids: " << socketIdMap.size() << "\n";
1363     std::cerr << "Topology:\nsocket os_id core_id\n";
1364     for (int i = 0; i < num_cores; ++i)
1365     {
1366         std::cerr << topology[i].socket << " " << topology[i].os_id << " " << topology[i].core_id << "\n";
1367     }
1368 #endif
1369     if (threads_per_core == 0)
1370     {
1371         for (int i = 0; i < (int)num_cores; ++i)
1372         {
1373             if (topology[i].socket == topology[0].socket && topology[i].core_id == topology[0].core_id)
1374                 ++threads_per_core;
1375         }
1376         assert(threads_per_core != 0);
1377     }
1378     if(num_phys_cores_per_socket == 0 && num_cores == num_online_cores) num_phys_cores_per_socket = num_cores / num_sockets / threads_per_core;
1379     if(num_online_cores == 0) num_online_cores = num_cores;
1380 
1381     int32 i = 0;
1382 
1383     socketRefCore.resize(num_sockets, -1);
1384     for(i = 0; i < num_cores; ++i)
1385     {
1386         if(isCoreOnline(i))
1387         {
1388             socketRefCore[topology[i].socket] = i;
1389         }
1390     }
1391 
1392     num_online_sockets = 0;
1393     for(i = 0; i < num_sockets; ++i)
1394     {
1395         if(isSocketOnline(i))
1396         {
1397             ++num_online_sockets;
1398         }
1399     }
1400 
1401     FrontendBoundSlots.resize(num_cores, 0);
1402     BadSpeculationSlots.resize(num_cores, 0);
1403     BackendBoundSlots.resize(num_cores, 0);
1404     RetiringSlots.resize(num_cores, 0);
1405     AllSlotsRaw.resize(num_cores, 0);
1406 
1407 #if 0
1408     std::cerr << "Socket reference cores:\n";
1409     for(int32 i=0; i< num_sockets;++i)
1410     {
1411         std::cerr << "socketRefCore[" << i << "]=" << socketRefCore[i] << "\n";
1412     }
1413 #endif
1414 
1415     return true;
1416 }
1417 
printSystemTopology() const1418 void PCM::printSystemTopology() const
1419 {
1420     if(num_cores == num_online_cores)
1421     {
1422       std::cerr << "Number of physical cores: " << (num_cores/threads_per_core) << "\n";
1423     }
1424 
1425     std::cerr << "Number of logical cores: " << num_cores << "\n";
1426     std::cerr << "Number of online logical cores: " << num_online_cores << "\n";
1427 
1428     if(num_cores == num_online_cores)
1429     {
1430       std::cerr << "Threads (logical cores) per physical core: " << threads_per_core << "\n";
1431     }
1432     else
1433     {
1434         std::cerr << "Offlined cores: ";
1435         for (int i = 0; i < (int)num_cores; ++i)
1436             if(isCoreOnline((int32)i) == false)
1437                 std::cerr << i << " ";
1438         std::cerr << "\n";
1439     }
1440     std::cerr << "Num sockets: " << num_sockets << "\n";
1441     if (num_phys_cores_per_socket > 0)
1442     {
1443         std::cerr << "Physical cores per socket: " << num_phys_cores_per_socket << "\n";
1444     }
1445     std::cerr << "Last level cache slices per socket: " << getMaxNumOfCBoxes() << "\n";
1446     std::cerr << "Core PMU (perfmon) version: " << perfmon_version << "\n";
1447     std::cerr << "Number of core PMU generic (programmable) counters: " << core_gen_counter_num_max << "\n";
1448     std::cerr << "Width of generic (programmable) counters: " << core_gen_counter_width << " bits\n";
1449     if (perfmon_version > 0)
1450     {
1451         std::cerr << "Number of core PMU fixed counters: " << core_fixed_counter_num_max << "\n";
1452         std::cerr << "Width of fixed counters: " << core_fixed_counter_width << " bits\n";
1453     }
1454     if (perfmon_version < 2 && vm == true)
1455     {
1456         std::cerr << "Warning: detected an unsupported virtualized environment: the hypervisor has limited the core PMU (perfmon) version to " << perfmon_version << "\n";
1457     }
1458 }
1459 
initMSR()1460 bool PCM::initMSR()
1461 {
1462 #ifndef __APPLE__
1463     try
1464     {
1465         for (int i = 0; i < (int)num_cores; ++i)
1466         {
1467             if ( isCoreOnline( (int32)i ) ) {
1468                 MSR.push_back(std::make_shared<SafeMsrHandle>(i));
1469                 systemTopology->addMSRHandleToOSThread( MSR.back(), (uint32)i );
1470             } else { // the core is offlined, assign an invalid MSR handle
1471                 MSR.push_back(std::make_shared<SafeMsrHandle>());
1472                 systemTopology->addMSRHandleToOSThread( MSR.back(), (uint32)i );
1473             }
1474         }
1475     }
1476     catch (...)
1477     {
1478         // failed
1479         MSR.clear();
1480 
1481         std::cerr << "Can not access CPUs Model Specific Registers (MSRs).\n";
1482 #ifdef _MSC_VER
1483         std::cerr << "You must have signed msr.sys driver in your current directory and have administrator rights to run this program.\n";
1484 #elif defined(__linux__)
1485         std::cerr << "Try to execute 'modprobe msr' as root user and then\n";
1486         std::cerr << "you also must have read and write permissions for /dev/cpu/*/msr devices (/dev/msr* for Android). The 'chown' command can help.\n";
1487 #elif defined(__FreeBSD__) || defined(__DragonFly__)
1488         std::cerr << "Ensure cpuctl module is loaded and that you have read and write\n";
1489         std::cerr << "permissions for /dev/cpuctl* devices (the 'chown' command can help).\n";
1490 #endif
1491         return false;
1492     }
1493 #endif
1494     return true;
1495 }
1496 
detectNominalFrequency()1497 bool PCM::detectNominalFrequency()
1498 {
1499     if (MSR.size())
1500     {
1501         uint64 freq = 0;
1502         MSR[socketRefCore[0]]->read(PLATFORM_INFO_ADDR, &freq);
1503         const uint64 bus_freq = (
1504                   cpu_model == SANDY_BRIDGE
1505                || cpu_model == JAKETOWN
1506                || cpu_model == IVYTOWN
1507                || cpu_model == HASWELLX
1508                || cpu_model == BDX_DE
1509                || cpu_model == BDX
1510                || cpu_model == IVY_BRIDGE
1511                || cpu_model == HASWELL
1512                || cpu_model == BROADWELL
1513                || cpu_model == AVOTON
1514                || cpu_model == APOLLO_LAKE
1515                || cpu_model == DENVERTON
1516                || useSKLPath()
1517                || cpu_model == SNOWRIDGE
1518                || cpu_model == KNL
1519                || cpu_model == SKX
1520                || cpu_model == ICX
1521                ) ? (100000000ULL) : (133333333ULL);
1522 
1523         nominal_frequency = ((freq >> 8) & 255) * bus_freq;
1524 
1525         if(!nominal_frequency)
1526             nominal_frequency = get_frequency_from_cpuid();
1527 
1528         if(!nominal_frequency)
1529         {
1530             std::cerr << "Error: Can not detect core frequency.\n";
1531             destroyMSR();
1532             return false;
1533         }
1534 
1535 #ifndef PCM_SILENT
1536         std::cerr << "Nominal core frequency: " << nominal_frequency << " Hz\n";
1537 #endif
1538     }
1539 
1540     return true;
1541 }
1542 
initEnergyMonitoring()1543 void PCM::initEnergyMonitoring()
1544 {
1545     if(packageEnergyMetricsAvailable() && MSR.size())
1546     {
1547         uint64 rapl_power_unit = 0;
1548         MSR[socketRefCore[0]]->read(MSR_RAPL_POWER_UNIT,&rapl_power_unit);
1549         uint64 energy_status_unit = extract_bits(rapl_power_unit,8,12);
1550         if (cpu_model == PCM::CHERRYTRAIL || cpu_model == PCM::BAYTRAIL)
1551             joulesPerEnergyUnit = double(1ULL << energy_status_unit)/1000000.; // (2)^energy_status_unit microJoules
1552         else
1553             joulesPerEnergyUnit = 1./double(1ULL<<energy_status_unit); // (1/2)^energy_status_unit
1554         //std::cout << "MSR_RAPL_POWER_UNIT: " << energy_status_unit << "; Joules/unit " << joulesPerEnergyUnit << "\n";
1555         uint64 power_unit = extract_bits(rapl_power_unit,0,3);
1556         double wattsPerPowerUnit = 1./double(1ULL<<power_unit);
1557 
1558         uint64 package_power_info = 0;
1559         MSR[socketRefCore[0]]->read(MSR_PKG_POWER_INFO,&package_power_info);
1560         pkgThermalSpecPower = (int32) (double(extract_bits(package_power_info, 0, 14))*wattsPerPowerUnit);
1561         pkgMinimumPower = (int32) (double(extract_bits(package_power_info, 16, 30))*wattsPerPowerUnit);
1562         pkgMaximumPower = (int32) (double(extract_bits(package_power_info, 32, 46))*wattsPerPowerUnit);
1563 
1564 #ifndef PCM_SILENT
1565         std::cerr << "Package thermal spec power: " << pkgThermalSpecPower << " Watt; ";
1566         std::cerr << "Package minimum power: " << pkgMinimumPower << " Watt; ";
1567         std::cerr << "Package maximum power: " << pkgMaximumPower << " Watt;\n";
1568 #endif
1569 
1570         int i = 0;
1571 
1572         if(energy_status.empty())
1573             for (i = 0; i < (int)num_sockets; ++i)
1574                 energy_status.push_back(
1575                     std::make_shared<CounterWidthExtender>(
1576                         new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[i]], MSR_PKG_ENERGY_STATUS), 32, 10000));
1577 
1578         if(dramEnergyMetricsAvailable() && dram_energy_status.empty())
1579             for (i = 0; i < (int)num_sockets; ++i)
1580                 dram_energy_status.push_back(
1581                     std::make_shared<CounterWidthExtender>(
1582                     new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[i]], MSR_DRAM_ENERGY_STATUS), 32, 10000));
1583     }
1584 }
1585 
1586 static const uint32 UBOX0_DEV_IDS[] = {
1587     0x3451
1588 };
1589 
1590 std::vector<std::pair<uint32, uint32> > socket2UBOX0bus;
1591 
1592 void initSocket2Bus(std::vector<std::pair<uint32, uint32> > & socket2bus, uint32 device, uint32 function, const uint32 DEV_IDS[], uint32 devIdsSize);
1593 
initSocket2Ubox0Bus()1594 void initSocket2Ubox0Bus()
1595 {
1596     initSocket2Bus(socket2UBOX0bus, SERVER_UBOX0_REGISTER_DEV_ADDR, SERVER_UBOX0_REGISTER_FUNC_ADDR,
1597         UBOX0_DEV_IDS, (uint32)sizeof(UBOX0_DEV_IDS) / sizeof(UBOX0_DEV_IDS[0]));
1598 }
1599 
initUncoreObjects()1600 void PCM::initUncoreObjects()
1601 {
1602     if (hasPCICFGUncore() && MSR.size())
1603     {
1604         int i = 0;
1605         bool failed = false;
1606         try
1607         {
1608             for (i = 0; i < (int)num_sockets; ++i)
1609             {
1610                 server_pcicfg_uncore.push_back(std::make_shared<ServerPCICFGUncore>(i, this));
1611             }
1612         }
1613         catch (std::runtime_error & e)
1614         {
1615             std::cerr << e.what() << "\n";
1616             failed = true;
1617         }
1618         catch (...)
1619         {
1620             failed = true;
1621         }
1622         if (failed)
1623         {
1624             server_pcicfg_uncore.clear();
1625             std::cerr << "Can not access server uncore PCI configuration space. Access to uncore counters (memory and QPI bandwidth) is disabled.\n";
1626 #ifdef _MSC_VER
1627             std::cerr << "You must have signed msr.sys driver in your current directory and have administrator rights to run this program.\n";
1628 #else
1629             //std::cerr << "you must have read and write permissions for /proc/bus/pci/7f/10.* and /proc/bus/pci/ff/10.* devices (the 'chown' command can help).\n";
1630             //std::cerr << "you must have read and write permissions for /dev/mem device (the 'chown' command can help).\n";
1631             //std::cerr << "you must have read permission for /sys/firmware/acpi/tables/MCFG device (the 'chmod' command can help).\n";
1632             std::cerr << "You must be root to access server uncore counters in PCM.\n";
1633 #endif
1634         }
1635     } else if(hasClientMCCounters() && MSR.size())
1636     {
1637        // initialize memory bandwidth counting
1638        try
1639        {
1640            switch (cpu_model)
1641            {
1642            case TGL:
1643                clientBW = std::make_shared<TGLClientBW>();
1644                break;
1645            default:
1646                clientBW = std::make_shared<ClientBW>();
1647            }
1648            clientImcReads = std::make_shared<CounterWidthExtender>(
1649                new CounterWidthExtender::ClientImcReadsCounter(clientBW), 32, 10000);
1650            clientImcWrites = std::make_shared<CounterWidthExtender>(
1651                new CounterWidthExtender::ClientImcWritesCounter(clientBW), 32, 10000);
1652            clientIoRequests = std::make_shared<CounterWidthExtender>(
1653                new CounterWidthExtender::ClientIoRequestsCounter(clientBW), 32, 10000);
1654 
1655        } catch(...)
1656        {
1657            std::cerr << "Can not read memory controller counter information from PCI configuration space. Access to memory bandwidth counters is not possible.\n";
1658            #ifdef _MSC_VER
1659            // TODO: add message here
1660            #endif
1661            #ifdef __linux__
1662            std::cerr << "You must be root to access these SandyBridge/IvyBridge/Haswell counters in PCM. \n";
1663            #endif
1664        }
1665     }
1666     if (cpu_model == ICX || cpu_model == SNOWRIDGE)
1667     {
1668         bool failed = false;
1669         try
1670         {
1671             initSocket2Ubox0Bus();
1672         }
1673         catch (std::exception & e)
1674         {
1675             std::cerr << e.what() << "\n";
1676             failed = true;
1677         }
1678         catch (...)
1679         {
1680             failed = true;
1681         }
1682         if (failed)
1683         {
1684             std::cerr << "Can not read PCI configuration space bus mapping. Access to uncore counters is disabled.\n";
1685         }
1686         for (size_t s = 0; s < (size_t)num_sockets && s < socket2UBOX0bus.size() && s < server_pcicfg_uncore.size(); ++s)
1687         {
1688             serverBW.push_back(std::make_shared<ServerBW>(server_pcicfg_uncore[s]->getNumMC(), socket2UBOX0bus[s].first, socket2UBOX0bus[s].second));
1689             // std::cout << " Added serverBW object server_pcicfg_uncore[s]->getNumMC() = " << server_pcicfg_uncore[s]->getNumMC() << std::endl;
1690         }
1691         if (socket2UBOX0bus.size() != (size_t)num_sockets)
1692         {
1693             std::cerr << "PCM warning: found " << socket2UBOX0bus.size() << " uboxes. Expected " << num_sockets << std::endl;
1694         }
1695     }
1696 
1697     if (useLinuxPerfForUncore())
1698     {
1699         initUncorePMUsPerf();
1700     }
1701     else
1702     {
1703         initUncorePMUsDirect();
1704     }
1705 }
1706 
initUncorePMUsDirect()1707 void PCM::initUncorePMUsDirect()
1708 {
1709     for (uint32 s = 0; s < (uint32)num_sockets; ++s)
1710     {
1711         auto & handle = MSR[socketRefCore[s]];
1712         // unfreeze uncore PMUs
1713         switch (cpu_model)
1714         {
1715         case SKX:
1716             handle->write(MSR_UNCORE_PMON_GLOBAL_CTL, 1ULL << 61ULL);
1717             break;
1718         case HASWELLX:
1719         case BDX:
1720             handle->write(MSR_UNCORE_PMON_GLOBAL_CTL, 1ULL << 29ULL);
1721             break;
1722         case IVYTOWN:
1723             handle->write(IVT_MSR_UNCORE_PMON_GLOBAL_CTL, 1ULL << 29ULL);
1724             break;
1725         }
1726         if (IVYTOWN == cpu_model || JAKETOWN == cpu_model)
1727         {
1728             uboxPMUs.push_back(
1729                 UncorePMU(
1730                     std::shared_ptr<MSRRegister>(),
1731                     std::make_shared<MSRRegister>(handle, JKTIVT_UBOX_MSR_PMON_CTL0_ADDR),
1732                     std::make_shared<MSRRegister>(handle, JKTIVT_UBOX_MSR_PMON_CTL1_ADDR),
1733                     std::shared_ptr<MSRRegister>(),
1734                     std::shared_ptr<MSRRegister>(),
1735                     std::make_shared<MSRRegister>(handle, JKTIVT_UBOX_MSR_PMON_CTR0_ADDR),
1736                     std::make_shared<MSRRegister>(handle, JKTIVT_UBOX_MSR_PMON_CTR1_ADDR),
1737                     std::shared_ptr<MSRRegister>(),
1738                     std::shared_ptr<MSRRegister>(),
1739                     std::make_shared<MSRRegister>(handle, JKTIVT_UCLK_FIXED_CTL_ADDR),
1740                     std::make_shared<MSRRegister>(handle, JKTIVT_UCLK_FIXED_CTR_ADDR)
1741                 )
1742             );
1743         }
1744         else
1745         {
1746             uboxPMUs.push_back(
1747                 UncorePMU(
1748                     std::shared_ptr<MSRRegister>(),
1749                     std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTL0_ADDR),
1750                     std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTL1_ADDR),
1751                     std::shared_ptr<MSRRegister>(),
1752                     std::shared_ptr<MSRRegister>(),
1753                     std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTR0_ADDR),
1754                     std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTR1_ADDR),
1755                     std::shared_ptr<MSRRegister>(),
1756                     std::shared_ptr<MSRRegister>(),
1757                     std::make_shared<MSRRegister>(handle, UCLK_FIXED_CTL_ADDR),
1758                     std::make_shared<MSRRegister>(handle, UCLK_FIXED_CTR_ADDR)
1759                 )
1760             );
1761         }
1762         switch (cpu_model)
1763         {
1764         case IVYTOWN:
1765         case JAKETOWN:
1766             pcuPMUs.push_back(
1767                 UncorePMU(
1768                     std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_BOX_CTL_ADDR),
1769                     std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTL0_ADDR),
1770                     std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTL1_ADDR),
1771                     std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTL2_ADDR),
1772                     std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTL3_ADDR),
1773                     std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTR0_ADDR),
1774                     std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTR1_ADDR),
1775                     std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTR2_ADDR),
1776                     std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_CTR3_ADDR),
1777                     std::shared_ptr<MSRRegister>(),
1778                     std::shared_ptr<MSRRegister>(),
1779                     std::make_shared<MSRRegister>(handle, JKTIVT_PCU_MSR_PMON_BOX_FILTER_ADDR)
1780                 )
1781             );
1782             break;
1783         case BDX_DE:
1784         case BDX:
1785         case KNL:
1786         case HASWELLX:
1787         case SKX:
1788         case ICX:
1789             pcuPMUs.push_back(
1790                 UncorePMU(
1791                     std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_BOX_CTL_ADDR),
1792                     std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTL0_ADDR),
1793                     std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTL1_ADDR),
1794                     std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTL2_ADDR),
1795                     std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTL3_ADDR),
1796                     std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTR0_ADDR),
1797                     std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTR1_ADDR),
1798                     std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTR2_ADDR),
1799                     std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_CTR3_ADDR),
1800                     std::shared_ptr<MSRRegister>(),
1801                     std::shared_ptr<MSRRegister>(),
1802                     std::make_shared<MSRRegister>(handle, HSX_PCU_MSR_PMON_BOX_FILTER_ADDR)
1803                 )
1804             );
1805             break;
1806         }
1807     }
1808     // init IIO addresses
1809     if (getCPUModel() == PCM::SKX)
1810     {
1811         iioPMUs.resize(num_sockets);
1812         for (uint32 s = 0; s < (uint32)num_sockets; ++s)
1813         {
1814             auto & handle = MSR[socketRefCore[s]];
1815             for (int unit = 0; unit < SKX_IIO_STACK_COUNT; ++unit)
1816             {
1817                 iioPMUs[s][unit] = UncorePMU(
1818                     std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_UNIT_CTL + SKX_IIO_PM_REG_STEP * unit),
1819                     std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTL0 + SKX_IIO_PM_REG_STEP * unit + 0),
1820                     std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTL0 + SKX_IIO_PM_REG_STEP * unit + 1),
1821                     std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTL0 + SKX_IIO_PM_REG_STEP * unit + 2),
1822                     std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTL0 + SKX_IIO_PM_REG_STEP * unit + 3),
1823                     std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTR0 + SKX_IIO_PM_REG_STEP * unit + 0),
1824                     std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTR0 + SKX_IIO_PM_REG_STEP * unit + 1),
1825                     std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTR0 + SKX_IIO_PM_REG_STEP * unit + 2),
1826                     std::make_shared<MSRRegister>(handle, SKX_IIO_CBDMA_CTR0 + SKX_IIO_PM_REG_STEP * unit + 3)
1827                 );
1828             }
1829         }
1830     }
1831     else if (getCPUModel() == PCM::ICX)
1832     {
1833         iioPMUs.resize(num_sockets);
1834         for (uint32 s = 0; s < (uint32)num_sockets; ++s)
1835         {
1836             auto & handle = MSR[socketRefCore[s]];
1837             for (int unit = 0; unit < ICX_IIO_STACK_COUNT; ++unit)
1838             {
1839                 iioPMUs[s][unit] = UncorePMU(
1840                     std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit]),
1841                     std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTL_REG_OFFSET + 0),
1842                     std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTL_REG_OFFSET + 1),
1843                     std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTL_REG_OFFSET + 2),
1844                     std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTL_REG_OFFSET + 3),
1845                     std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTR_REG_OFFSET + 0),
1846                     std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTR_REG_OFFSET + 1),
1847                     std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTR_REG_OFFSET + 2),
1848                     std::make_shared<MSRRegister>(handle, ICX_IIO_UNIT_CTL[unit] + ICX_IIO_CTR_REG_OFFSET + 3)
1849                 );
1850             }
1851         }
1852     }
1853     else if (getCPUModel() == PCM::SNOWRIDGE)
1854     {
1855         iioPMUs.resize(num_sockets);
1856         for (uint32 s = 0; s < (uint32)num_sockets; ++s)
1857         {
1858             auto & handle = MSR[socketRefCore[s]];
1859             for (int unit = 0; unit < SNR_IIO_STACK_COUNT; ++unit)
1860             {
1861                 iioPMUs[s][unit] = UncorePMU(
1862                     std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_UNIT_CTL + SNR_IIO_PM_REG_STEP * unit),
1863                     std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTL0 + SNR_IIO_PM_REG_STEP * unit + 0),
1864                     std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTL0 + SNR_IIO_PM_REG_STEP * unit + 1),
1865                     std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTL0 + SNR_IIO_PM_REG_STEP * unit + 2),
1866                     std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTL0 + SNR_IIO_PM_REG_STEP * unit + 3),
1867                     std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTR0 + SNR_IIO_PM_REG_STEP * unit + 0),
1868                     std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTR0 + SNR_IIO_PM_REG_STEP * unit + 1),
1869                     std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTR0 + SNR_IIO_PM_REG_STEP * unit + 2),
1870                     std::make_shared<MSRRegister>(handle, SNR_IIO_CBDMA_CTR0 + SNR_IIO_PM_REG_STEP * unit + 3)
1871                 );
1872             }
1873         }
1874     }
1875 
1876     if (hasPCICFGUncore() && MSR.size())
1877     {
1878         cboPMUs.resize(num_sockets);
1879         for (uint32 s = 0; s < (uint32)num_sockets; ++s)
1880         {
1881             auto & handle = MSR[socketRefCore[s]];
1882             for (uint32 cbo = 0; cbo < getMaxNumOfCBoxes(); ++cbo)
1883             {
1884                 const auto filter1MSR = CX_MSR_PMON_BOX_FILTER1(cbo);
1885                 std::shared_ptr<HWRegister> filter1MSRHandle = filter1MSR ? std::make_shared<MSRRegister>(handle, filter1MSR) : std::shared_ptr<HWRegister>();
1886                 cboPMUs[s].push_back(
1887                     UncorePMU(
1888                         std::make_shared<MSRRegister>(handle, CX_MSR_PMON_BOX_CTL(cbo)),
1889                         std::make_shared<MSRRegister>(handle, CX_MSR_PMON_CTLY(cbo, 0)),
1890                         std::make_shared<MSRRegister>(handle, CX_MSR_PMON_CTLY(cbo, 1)),
1891                         std::make_shared<MSRRegister>(handle, CX_MSR_PMON_CTLY(cbo, 2)),
1892                         std::make_shared<MSRRegister>(handle, CX_MSR_PMON_CTLY(cbo, 3)),
1893                         std::make_shared<CounterWidthExtenderRegister>(
1894                             std::make_shared<CounterWidthExtender>(new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[s]], CX_MSR_PMON_CTRY(cbo, 0)), 48, 5555)),
1895                         std::make_shared<CounterWidthExtenderRegister>(
1896                             std::make_shared<CounterWidthExtender>(new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[s]], CX_MSR_PMON_CTRY(cbo, 1)), 48, 5555)),
1897                         std::make_shared<CounterWidthExtenderRegister>(
1898                             std::make_shared<CounterWidthExtender>(new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[s]], CX_MSR_PMON_CTRY(cbo, 2)), 48, 5555)),
1899                         std::make_shared<CounterWidthExtenderRegister>(
1900                             std::make_shared<CounterWidthExtender>(new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[s]], CX_MSR_PMON_CTRY(cbo, 3)), 48, 5555)),
1901                         std::shared_ptr<MSRRegister>(),
1902                         std::shared_ptr<MSRRegister>(),
1903                         std::make_shared<MSRRegister>(handle, CX_MSR_PMON_BOX_FILTER(cbo)),
1904                         filter1MSRHandle
1905                     )
1906                 );
1907             }
1908         }
1909     }
1910 }
1911 
1912 #ifdef PCM_USE_PERF
1913 std::vector<int> enumeratePerfPMUs(const std::string & type, int max_id);
1914 void populatePerfPMUs(unsigned socket_, const std::vector<int> & ids, std::vector<UncorePMU> & pmus, bool fixed, bool filter0 = false, bool filter1 = false);
1915 #endif
1916 
initUncorePMUsPerf()1917 void PCM::initUncorePMUsPerf()
1918 {
1919 #ifdef PCM_USE_PERF
1920     iioPMUs.resize(num_sockets);
1921     cboPMUs.resize(num_sockets);
1922     for (uint32 s = 0; s < (uint32)num_sockets; ++s)
1923     {
1924         populatePerfPMUs(s, enumeratePerfPMUs("pcu", 100), pcuPMUs, false, true);
1925         populatePerfPMUs(s, enumeratePerfPMUs("ubox", 100), uboxPMUs, true);
1926         populatePerfPMUs(s, enumeratePerfPMUs("cbox", 100), cboPMUs[s], false, true, true);
1927         populatePerfPMUs(s, enumeratePerfPMUs("cha", 200), cboPMUs[s], false, true, true);
1928         std::vector<UncorePMU> iioPMUVector;
1929         populatePerfPMUs(s, enumeratePerfPMUs("iio", 100), iioPMUVector, false);
1930         for (size_t i = 0; i < iioPMUVector.size(); ++i)
1931         {
1932             iioPMUs[s][i] = iioPMUVector[i];
1933         }
1934     }
1935 #endif
1936 }
1937 
1938 #ifdef __linux__
1939 
1940 #define PCM_NMI_WATCHDOG_PATH "/proc/sys/kernel/nmi_watchdog"
1941 
isNMIWatchdogEnabled()1942 bool isNMIWatchdogEnabled()
1943 {
1944     const auto watchdog = readSysFS(PCM_NMI_WATCHDOG_PATH);
1945     if (watchdog.length() == 0)
1946     {
1947         return false;
1948     }
1949 
1950     return (std::atoi(watchdog.c_str()) == 1);
1951 }
1952 
disableNMIWatchdog(const bool silent)1953 void disableNMIWatchdog(const bool silent)
1954 {
1955     if (!silent) std::cerr << "Disabling NMI watchdog since it consumes one hw-PMU counter.\n";
1956     writeSysFS(PCM_NMI_WATCHDOG_PATH, "0");
1957 }
1958 
enableNMIWatchdog(const bool silent)1959 void enableNMIWatchdog(const bool silent)
1960 {
1961     if (!silent) std::cerr << " Re-enabling NMI watchdog.\n";
1962     writeSysFS(PCM_NMI_WATCHDOG_PATH, "1");
1963 }
1964 #endif
1965 
1966 class CoreTaskQueue
1967 {
1968     std::queue<std::packaged_task<void()> > wQueue;
1969     std::mutex m;
1970     std::condition_variable condVar;
1971     std::thread worker;
1972     CoreTaskQueue() = delete;
1973     CoreTaskQueue(CoreTaskQueue &) = delete;
1974 public:
CoreTaskQueue(int32 core)1975     CoreTaskQueue(int32 core) :
1976         worker([=]() {
1977             TemporalThreadAffinity tempThreadAffinity(core, false);
1978             std::unique_lock<std::mutex> lock(m);
1979             while (1) {
1980                 while (wQueue.empty()) {
1981                     condVar.wait(lock);
1982                 }
1983                 while (!wQueue.empty()) {
1984                     wQueue.front()();
1985                     wQueue.pop();
1986                 }
1987             }
1988         })
1989     {}
push(std::packaged_task<void ()> & task)1990     void push(std::packaged_task<void()> & task)
1991     {
1992         std::unique_lock<std::mutex> lock(m);
1993         wQueue.push(std::move(task));
1994         condVar.notify_one();
1995     }
1996 };
1997 
PCM()1998 PCM::PCM() :
1999     cpu_family(-1),
2000     cpu_model(-1),
2001     cpu_stepping(-1),
2002     cpu_microcode_level(-1),
2003     max_cpuid(-1),
2004     threads_per_core(0),
2005     num_cores(0),
2006     num_sockets(0),
2007     num_phys_cores_per_socket(0),
2008     num_online_cores(0),
2009     num_online_sockets(0),
2010     core_gen_counter_num_max(0),
2011     core_gen_counter_num_used(0), // 0 means no core gen counters used
2012     core_gen_counter_width(0),
2013     core_fixed_counter_num_max(0),
2014     core_fixed_counter_num_used(0),
2015     core_fixed_counter_width(0),
2016     uncore_gen_counter_num_max(8),
2017     uncore_gen_counter_num_used(0),
2018     uncore_gen_counter_width(48),
2019     uncore_fixed_counter_num_max(1),
2020     uncore_fixed_counter_num_used(0),
2021     uncore_fixed_counter_width(48),
2022     perfmon_version(0),
2023     perfmon_config_anythread(1),
2024     nominal_frequency(0),
2025     max_qpi_speed(0),
2026     L3ScalingFactor(0),
2027     pkgThermalSpecPower(-1),
2028     pkgMinimumPower(-1),
2029     pkgMaximumPower(-1),
2030     systemTopology(new SystemRoot(this)),
2031     allow_multiple_instances(false),
2032     programmed_pmu(false),
2033     joulesPerEnergyUnit(0),
2034 #ifdef __linux__
2035     resctrl(*this),
2036 #endif
2037     useResctrl(false),
2038     disable_JKT_workaround(false),
2039     blocked(false),
2040     coreCStateMsr(NULL),
2041     pkgCStateMsr(NULL),
2042     L2CacheHitRatioAvailable(false),
2043     L3CacheHitRatioAvailable(false),
2044     L3CacheMissesAvailable(false),
2045     L2CacheMissesAvailable(false),
2046     L2CacheHitsAvailable(false),
2047     L3CacheHitsNoSnoopAvailable(false),
2048     L3CacheHitsSnoopAvailable(false),
2049     L3CacheHitsAvailable(false),
2050     forceRTMAbortMode(false),
2051     mode(INVALID_MODE),
2052     numInstancesSemaphore(NULL),
2053     canUsePerf(false),
2054     outfile(NULL),
2055     backup_ofile(NULL),
2056     run_state(1),
2057     needToRestoreNMIWatchdog(false)
2058 {
2059 #ifdef _MSC_VER
2060     // WARNING: This driver code (msr.sys) is only for testing purposes, not for production use
2061     Driver drv(Driver::msrLocalPath());
2062     // drv.stop();     // restart driver (usually not needed)
2063     if (!drv.start())
2064     {
2065         std::wcerr << "Cannot access CPU counters\n";
2066         std::wcerr << "You must have a signed  driver at " << drv.driverPath() << " and have administrator rights to run this program\n";
2067         return;
2068     }
2069 #endif
2070 
2071     if(!detectModel()) return;
2072 
2073     if(!checkModel()) return;
2074 
2075     initCStateSupportTables();
2076 
2077     if(!discoverSystemTopology()) return;
2078 
2079     if(!initMSR()) return;
2080 
2081     readCoreCounterConfig(true);
2082 
2083 #ifndef PCM_SILENT
2084     printSystemTopology();
2085 #endif
2086 
2087     if(!detectNominalFrequency()) return;
2088 
2089     showSpecControlMSRs();
2090 
2091     initEnergyMonitoring();
2092 
2093     initUncoreObjects();
2094 
2095     initRDT();
2096 
2097     readCPUMicrocodeLevel();
2098 
2099 #ifdef PCM_USE_PERF
2100     canUsePerf = true;
2101     std::vector<int> dummy(PERF_MAX_COUNTERS, -1);
2102     perfEventHandle.resize(num_cores, dummy);
2103 #endif
2104 
2105     for (int32 i = 0; i < num_cores; ++i)
2106     {
2107         coreTaskQueues.push_back(std::make_shared<CoreTaskQueue>(i));
2108     }
2109 }
2110 
enableJKTWorkaround(bool enable)2111 void PCM::enableJKTWorkaround(bool enable)
2112 {
2113     if(disable_JKT_workaround) return;
2114     std::cerr << "Using PCM on your system might have a performance impact as per http://software.intel.com/en-us/articles/performance-impact-when-sampling-certain-llc-events-on-snb-ep-with-vtune\n";
2115     std::cerr << "You can avoid the performance impact by using the option --noJKTWA, however the cache metrics might be wrong then.\n";
2116     if(MSR.size())
2117     {
2118         for(int32 i = 0; i < num_cores; ++i)
2119         {
2120             uint64 val64 = 0;
2121             MSR[i]->read(0x39C, &val64);
2122             if(enable)
2123                 val64 |= 1ULL;
2124             else
2125                 val64 &= (~1ULL);
2126             MSR[i]->write(0x39C, val64);
2127         }
2128     }
2129     for (size_t i = 0; i < (size_t)server_pcicfg_uncore.size(); ++i)
2130     {
2131             if(server_pcicfg_uncore[i].get()) server_pcicfg_uncore[i]->enableJKTWorkaround(enable);
2132     }
2133 }
2134 
showSpecControlMSRs()2135 void PCM::showSpecControlMSRs()
2136 {
2137     PCM_CPUID_INFO cpuinfo;
2138     pcm_cpuid(7, 0, cpuinfo);
2139 
2140     if (MSR.size())
2141     {
2142         if ((cpuinfo.reg.edx & (1 << 26)) || (cpuinfo.reg.edx & (1 << 27)))
2143         {
2144             uint64 val64 = 0;
2145             MSR[0]->read(MSR_IA32_SPEC_CTRL, &val64);
2146             std::cerr << "IBRS enabled in the kernel   : " << ((val64 & 1) ? "yes" : "no") << "\n";
2147             std::cerr << "STIBP enabled in the kernel  : " << ((val64 & 2) ? "yes" : "no") << "\n";
2148         }
2149         if (cpuinfo.reg.edx & (1 << 29))
2150         {
2151             uint64 val64 = 0;
2152             MSR[0]->read(MSR_IA32_ARCH_CAPABILITIES, &val64);
2153             std::cerr << "The processor is not susceptible to Rogue Data Cache Load: " << ((val64 & 1) ? "yes" : "no") << "\n";
2154             std::cerr << "The processor supports enhanced IBRS                     : " << ((val64 & 2) ? "yes" : "no") << "\n";
2155         }
2156     }
2157 }
2158 
isCoreOnline(int32 os_core_id) const2159 bool PCM::isCoreOnline(int32 os_core_id) const
2160 {
2161     return (topology[os_core_id].os_id != -1) && (topology[os_core_id].core_id != -1) && (topology[os_core_id].socket != -1);
2162 }
2163 
isSocketOnline(int32 socket_id) const2164 bool PCM::isSocketOnline(int32 socket_id) const
2165 {
2166     return socketRefCore[socket_id] != -1;
2167 }
2168 
isCPUModelSupported(const int model_)2169 bool PCM::isCPUModelSupported(const int model_)
2170 {
2171     return (   model_ == NEHALEM_EP
2172             || model_ == NEHALEM_EX
2173             || model_ == WESTMERE_EP
2174             || model_ == WESTMERE_EX
2175             || isAtom(model_)
2176             || model_ == SNOWRIDGE
2177             || model_ == CLARKDALE
2178             || model_ == SANDY_BRIDGE
2179             || model_ == JAKETOWN
2180             || model_ == IVY_BRIDGE
2181             || model_ == HASWELL
2182             || model_ == IVYTOWN
2183             || model_ == HASWELLX
2184             || model_ == BDX_DE
2185             || model_ == BDX
2186             || model_ == BROADWELL
2187             || model_ == KNL
2188             || model_ == SKL
2189             || model_ == SKL_UY
2190             || model_ == KBL
2191             || model_ == KBL_1
2192             || model_ == CML
2193             || model_ == ICL
2194             || model_ == RKL
2195             || model_ == TGL
2196             || model_ == SKX
2197             || model_ == ICX
2198            );
2199 }
2200 
checkModel()2201 bool PCM::checkModel()
2202 {
2203     if (cpu_model == NEHALEM) cpu_model = NEHALEM_EP;
2204     if (cpu_model == ATOM_2) cpu_model = ATOM;
2205     if (cpu_model == HASWELL_ULT || cpu_model == HASWELL_2) cpu_model = HASWELL;
2206     if (cpu_model == BROADWELL_XEON_E3) cpu_model = BROADWELL;
2207     if (cpu_model == ICX_D) cpu_model = ICX;
2208     if (cpu_model == CML_1) cpu_model = CML;
2209     if (cpu_model == ICL_1) cpu_model = ICL;
2210     if (cpu_model == TGL_1) cpu_model = TGL;
2211 
2212     if(!isCPUModelSupported((int)cpu_model))
2213     {
2214         std::cerr << getUnsupportedMessage() << " CPU model number: " << cpu_model << " Brand: \"" << getCPUBrandString().c_str() << "\"\n";
2215 /* FOR TESTING PURPOSES ONLY */
2216 #ifdef PCM_TEST_FALLBACK_TO_ATOM
2217         std::cerr << "Fall back to ATOM functionality.\n";
2218         cpu_model = ATOM;
2219         return true;
2220 #endif
2221         return false;
2222     }
2223     return true;
2224 }
2225 
destroyMSR()2226 void PCM::destroyMSR()
2227 {
2228     MSR.clear();
2229 }
2230 
~PCM()2231 PCM::~PCM()
2232 {
2233     InstanceLock lock(allow_multiple_instances);
2234     if (instance)
2235     {
2236         destroyMSR();
2237         instance = NULL;
2238         delete systemTopology;
2239     }
2240 }
2241 
good()2242 bool PCM::good()
2243 {
2244     return !MSR.empty();
2245 }
2246 
2247 #ifdef PCM_USE_PERF
PCM_init_perf_event_attr(bool group=true)2248 perf_event_attr PCM_init_perf_event_attr(bool group = true)
2249 {
2250     perf_event_attr e;
2251     bzero(&e,sizeof(perf_event_attr));
2252     e.type = -1; // must be set up later
2253     e.size = sizeof(e);
2254     e.config = -1; // must be set up later
2255     e.sample_period = 0;
2256     e.sample_type = 0;
2257     e.read_format = group ? PERF_FORMAT_GROUP : 0; /* PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING |
2258                           PERF_FORMAT_ID | PERF_FORMAT_GROUP ; */
2259     e.disabled = 0;
2260     e.inherit = 0;
2261     e.pinned = 1;
2262     e.exclusive = 0;
2263     e.exclude_user = 0;
2264     e.exclude_kernel = 0;
2265     e.exclude_hv = 0;
2266     e.exclude_idle = 0;
2267     e.mmap = 0;
2268     e.comm = 0;
2269     e.freq = 0;
2270     e.inherit_stat = 0;
2271     e.enable_on_exec = 0;
2272     e.task = 0;
2273     e.watermark = 0;
2274     e.wakeup_events = 0;
2275     return e;
2276 }
2277 #endif
2278 
program(const PCM::ProgramMode mode_,const void * parameter_,const bool silent)2279 PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter_, const bool silent)
2280 {
2281 #ifdef __linux__
2282     if (isNMIWatchdogEnabled())
2283     {
2284         disableNMIWatchdog(silent);
2285         needToRestoreNMIWatchdog = true;
2286     }
2287 #endif
2288 
2289     if(allow_multiple_instances && (EXT_CUSTOM_CORE_EVENTS == mode_ || CUSTOM_CORE_EVENTS == mode_))
2290     {
2291         allow_multiple_instances = false;
2292         std::cerr << "Warning: multiple PCM instance mode is not allowed with custom events.\n";
2293     }
2294 
2295     InstanceLock lock(allow_multiple_instances);
2296     if (MSR.empty()) return PCM::MSRAccessDenied;
2297 
2298     ExtendedCustomCoreEventDescription * pExtDesc = (ExtendedCustomCoreEventDescription *)parameter_;
2299 
2300 #ifdef PCM_USE_PERF
2301     if (!silent) std::cerr << "Trying to use Linux perf events...\n";
2302     const char * no_perf_env = std::getenv("PCM_NO_PERF");
2303     if (no_perf_env != NULL && std::string(no_perf_env) == std::string("1"))
2304     {
2305         canUsePerf = false;
2306         if (!silent) std::cerr << "Usage of Linux perf events is disabled through PCM_NO_PERF environment variable. Using direct PMU programming...\n";
2307     }
2308 /*
2309     if(num_online_cores < num_cores)
2310     {
2311         canUsePerf = false;
2312         std::cerr << "PCM does not support using Linux perf API on systems with offlined cores. Falling-back to direct PMU programming.\n";
2313     }
2314 */
2315     else if(PERF_COUNT_HW_MAX <= PCM_PERF_COUNT_HW_REF_CPU_CYCLES)
2316     {
2317         canUsePerf = false;
2318         if (!silent) std::cerr << "Can not use Linux perf because your Linux kernel does not support PERF_COUNT_HW_REF_CPU_CYCLES event. Falling-back to direct PMU programming.\n";
2319     }
2320     else if(EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->fixedCfg)
2321     {
2322         canUsePerf = false;
2323         if (!silent) std::cerr << "Can not use Linux perf because non-standard fixed counter configuration requested. Falling-back to direct PMU programming.\n";
2324     }
2325     else if(EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && (pExtDesc->OffcoreResponseMsrValue[0] || pExtDesc->OffcoreResponseMsrValue[1]))
2326     {
2327         const std::string offcore_rsp_format = readSysFS("/sys/bus/event_source/devices/cpu/format/offcore_rsp");
2328         if (offcore_rsp_format != "config1:0-63\n")
2329         {
2330             canUsePerf = false;
2331             if (!silent) std::cerr << "Can not use Linux perf because OffcoreResponse usage is not supported. Falling-back to direct PMU programming.\n";
2332         }
2333     }
2334     if (isHWTMAL1Supported() == true && perfSupportsTopDown() == false)
2335     {
2336         canUsePerf = false;
2337         if (!silent) std::cerr << "Installed Linux kernel perf does not support hardware top-down level-1 counters. Using direct PMU programming instead.\n";
2338     }
2339 #endif
2340 
2341     if(allow_multiple_instances)
2342     {
2343         //std::cerr << "Checking for other instances of PCM...\n";
2344 #ifdef _MSC_VER
2345 
2346         numInstancesSemaphore = CreateSemaphore(NULL, 0, 1 << 20, L"Global\\Number of running Processor Counter Monitor instances");
2347         if (!numInstancesSemaphore)
2348         {
2349             _com_error error(GetLastError());
2350             std::wcerr << "Error in Windows function 'CreateSemaphore': " << GetLastError() << " ";
2351             const TCHAR * strError = _com_error(GetLastError()).ErrorMessage();
2352             if (strError) std::wcerr << strError;
2353             std::wcerr << "\n";
2354             return PCM::UnknownError;
2355         }
2356         LONG prevValue = 0;
2357         if (!ReleaseSemaphore(numInstancesSemaphore, 1, &prevValue))
2358         {
2359             _com_error error(GetLastError());
2360             std::wcerr << "Error in Windows function 'ReleaseSemaphore': " << GetLastError() << " ";
2361             const TCHAR * strError = _com_error(GetLastError()).ErrorMessage();
2362             if (strError) std::wcerr << strError;
2363             std::wcerr << "\n";
2364             return PCM::UnknownError;
2365         }
2366         if (prevValue > 0)  // already programmed since another instance exists
2367         {
2368             if (!silent) std::cerr << "Number of PCM instances: " << (prevValue + 1) << "\n";
2369             if (hasPCICFGUncore() && max_qpi_speed==0)
2370             for (size_t i = 0; i < (size_t)server_pcicfg_uncore.size(); ++i)
2371                 if (server_pcicfg_uncore[i].get())
2372                     max_qpi_speed = (std::max)(server_pcicfg_uncore[i]->computeQPISpeed(socketRefCore[i], cpu_model), max_qpi_speed); // parenthesis to avoid macro expansion on Windows
2373 
2374             reportQPISpeed();
2375             return PCM::Success;
2376         }
2377 
2378     #else // if linux, apple, freebsd or dragonflybsd
2379         numInstancesSemaphore = sem_open(PCM_NUM_INSTANCES_SEMAPHORE_NAME, O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO, 0);
2380         if (SEM_FAILED == numInstancesSemaphore)
2381         {
2382             if (EACCES == errno)
2383                 std::cerr << "PCM Error, do not have permissions to open semaphores in /dev/shm/. Clean up them.\n";
2384             return PCM::UnknownError;
2385         }
2386     #ifndef __APPLE__
2387         sem_post(numInstancesSemaphore);
2388         int curValue = 0;
2389         sem_getvalue(numInstancesSemaphore, &curValue);
2390     #else //if it is apple
2391         uint32 curValue = PCM::incrementNumInstances();
2392         sem_post(numInstancesSemaphore);
2393     #endif // end ifndef __APPLE__
2394 
2395         if (curValue > 1)  // already programmed since another instance exists
2396         {
2397             if (!silent) std::cerr << "Number of PCM instances: " << curValue << "\n";
2398             if (hasPCICFGUncore() && max_qpi_speed==0)
2399             for (int i = 0; i < (int)server_pcicfg_uncore.size(); ++i) {
2400                 if(server_pcicfg_uncore[i].get())
2401                     max_qpi_speed = std::max(server_pcicfg_uncore[i]->computeQPISpeed(socketRefCore[i],cpu_model), max_qpi_speed);
2402                 reportQPISpeed();
2403             }
2404             if(!canUsePerf) return PCM::Success;
2405         }
2406 
2407     #endif // end ifdef _MSC_VER
2408 
2409     #ifdef PCM_USE_PERF
2410     /*
2411     numInst>1 &&  canUsePerf==false -> not reachable, already PMU programmed in another PCM instance
2412     numInst>1 &&  canUsePerf==true  -> perf programmed in different PCM, is not allowed
2413     numInst<=1 && canUsePerf==false -> we are first, perf cannot be used, *check* if PMU busy
2414     numInst<=1 && canUsePerf==true -> we are first, perf will be used, *dont check*, this is now perf business
2415     */
2416         if(curValue > 1 && (canUsePerf == true))
2417         {
2418             std::cerr << "Running several clients using the same counters is not possible with Linux perf. Recompile PCM without Linux Perf support to allow such usage. \n";
2419             decrementInstanceSemaphore();
2420             return PCM::UnknownError;
2421         }
2422 
2423         if((curValue <= 1) && (canUsePerf == false) && PMUinUse())
2424         {
2425             decrementInstanceSemaphore();
2426             return PCM::PMUBusy;
2427         }
2428     #else
2429         if (PMUinUse())
2430         {
2431             decrementInstanceSemaphore();
2432             return PCM::PMUBusy;
2433         }
2434     #endif
2435     }
2436     else
2437     {
2438         if((canUsePerf == false) && PMUinUse())
2439         {
2440             return PCM::PMUBusy;
2441         }
2442     }
2443 
2444     mode = mode_;
2445 
2446     // copy custom event descriptions
2447     if (mode == CUSTOM_CORE_EVENTS)
2448     {
2449         if (!parameter_)
2450         {
2451             std::cerr << "PCM Internal Error: data structure for custom event not initialized\n";
2452             return PCM::UnknownError;
2453         }
2454         CustomCoreEventDescription * pDesc = (CustomCoreEventDescription *)parameter_;
2455         coreEventDesc[0] = pDesc[0];
2456         coreEventDesc[1] = pDesc[1];
2457         if (isAtom() == false && cpu_model != KNL)
2458         {
2459             coreEventDesc[2] = pDesc[2];
2460             core_gen_counter_num_used = 3;
2461             if (core_gen_counter_num_max > 3) {
2462                 coreEventDesc[3] = pDesc[3];
2463                 core_gen_counter_num_used = 4;
2464             }
2465         }
2466         else
2467             core_gen_counter_num_used = 2;
2468     }
2469     else if (mode != EXT_CUSTOM_CORE_EVENTS)
2470     {
2471         if (isAtom() || cpu_model == KNL)
2472         {
2473             coreEventDesc[0].event_number = ARCH_LLC_MISS_EVTNR;
2474             coreEventDesc[0].umask_value = ARCH_LLC_MISS_UMASK;
2475             coreEventDesc[1].event_number = ARCH_LLC_REFERENCE_EVTNR;
2476             coreEventDesc[1].umask_value = ARCH_LLC_REFERENCE_UMASK;
2477             L2CacheHitRatioAvailable = true;
2478             L2CacheMissesAvailable = true;
2479             L2CacheHitsAvailable = true;
2480             core_gen_counter_num_used = 2;
2481         }
2482         else
2483         switch ( cpu_model ) {
2484             case SNOWRIDGE:
2485                 coreEventDesc[0].event_number = ARCH_LLC_MISS_EVTNR;
2486                 coreEventDesc[0].umask_value = ARCH_LLC_MISS_UMASK;
2487                 coreEventDesc[1].event_number = ARCH_LLC_REFERENCE_EVTNR;
2488                 coreEventDesc[1].umask_value = ARCH_LLC_REFERENCE_UMASK;
2489                 coreEventDesc[2].event_number = SKL_MEM_LOAD_RETIRED_L2_MISS_EVTNR;
2490                 coreEventDesc[2].umask_value = SKL_MEM_LOAD_RETIRED_L2_MISS_UMASK;
2491                 coreEventDesc[3].event_number = SKL_MEM_LOAD_RETIRED_L2_HIT_EVTNR;
2492                 coreEventDesc[3].umask_value = SKL_MEM_LOAD_RETIRED_L2_HIT_UMASK;
2493                 L2CacheHitRatioAvailable = true;
2494                 L3CacheHitRatioAvailable = true;
2495                 L3CacheMissesAvailable = true;
2496                 L2CacheMissesAvailable = true;
2497                 L2CacheHitsAvailable = true;
2498                 L3CacheHitsSnoopAvailable = true;
2499                 L3CacheHitsAvailable = true;
2500                 core_gen_counter_num_used = 4;
2501                 break;
2502             PCM_SKL_PATH_CASES
2503             case SKX:
2504             case ICX:
2505                 assert(useSkylakeEvents());
2506                 coreEventDesc[0].event_number = SKL_MEM_LOAD_RETIRED_L3_MISS_EVTNR;
2507                 coreEventDesc[0].umask_value = SKL_MEM_LOAD_RETIRED_L3_MISS_UMASK;
2508                 coreEventDesc[1].event_number = SKL_MEM_LOAD_RETIRED_L3_HIT_EVTNR;
2509                 coreEventDesc[1].umask_value = SKL_MEM_LOAD_RETIRED_L3_HIT_UMASK;
2510                 coreEventDesc[2].event_number = SKL_MEM_LOAD_RETIRED_L2_MISS_EVTNR;
2511                 coreEventDesc[2].umask_value = SKL_MEM_LOAD_RETIRED_L2_MISS_UMASK;
2512                 coreEventDesc[3].event_number = SKL_MEM_LOAD_RETIRED_L2_HIT_EVTNR;
2513                 coreEventDesc[3].umask_value = SKL_MEM_LOAD_RETIRED_L2_HIT_UMASK;
2514                 if (core_gen_counter_num_max == 3)
2515                 {
2516                     L3CacheHitRatioAvailable = true;
2517                     L3CacheMissesAvailable = true;
2518                     L2CacheMissesAvailable = true;
2519                     L3CacheHitsSnoopAvailable = true;
2520                     L3CacheHitsAvailable = true;
2521                     core_gen_counter_num_used = 3;
2522                     break;
2523                 }
2524                 L2CacheHitRatioAvailable = true;
2525                 L3CacheHitRatioAvailable = true;
2526                 L3CacheMissesAvailable = true;
2527                 L2CacheMissesAvailable = true;
2528                 L2CacheHitsAvailable = true;
2529                 L3CacheHitsSnoopAvailable = true;
2530                 L3CacheHitsAvailable = true;
2531                 core_gen_counter_num_used = 4;
2532                 break;
2533             case SANDY_BRIDGE:
2534             case JAKETOWN:
2535             case IVYTOWN:
2536             case IVY_BRIDGE:
2537             case HASWELL:
2538             case HASWELLX:
2539             case BROADWELL:
2540             case BDX_DE:
2541             case BDX:
2542                 coreEventDesc[0].event_number = ARCH_LLC_MISS_EVTNR;
2543                 coreEventDesc[0].umask_value = ARCH_LLC_MISS_UMASK;
2544                 coreEventDesc[1].event_number = MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_NONE_EVTNR;
2545                 coreEventDesc[1].umask_value = MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_NONE_UMASK;
2546                 coreEventDesc[2].event_number = MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_EVTNR;
2547                 coreEventDesc[2].umask_value = MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_UMASK;
2548                 coreEventDesc[3].event_number = MEM_LOAD_UOPS_RETIRED_L2_HIT_EVTNR;
2549                 coreEventDesc[3].umask_value = MEM_LOAD_UOPS_RETIRED_L2_HIT_UMASK;
2550                 if (core_gen_counter_num_max == 3)
2551                 {
2552                     L3CacheHitRatioAvailable = true;
2553                     L3CacheMissesAvailable = true;
2554                     L2CacheMissesAvailable = true;
2555                     L3CacheHitsNoSnoopAvailable = true;
2556                     L3CacheHitsSnoopAvailable = true;
2557                     L3CacheHitsAvailable = true;
2558                     core_gen_counter_num_used = 3;
2559                     break;
2560                 }
2561                 L2CacheHitRatioAvailable = true;
2562                 L3CacheHitRatioAvailable = true;
2563                 L3CacheMissesAvailable = true;
2564                 L2CacheMissesAvailable = true;
2565                 L2CacheHitsAvailable = true;
2566                 L3CacheHitsNoSnoopAvailable = true;
2567                 L3CacheHitsSnoopAvailable = true;
2568                 L3CacheHitsAvailable = true;
2569                 core_gen_counter_num_used = 4;
2570                 break;
2571             case NEHALEM_EP:
2572             case WESTMERE_EP:
2573             case CLARKDALE:
2574                 coreEventDesc[0].event_number = MEM_LOAD_RETIRED_L3_MISS_EVTNR;
2575                 coreEventDesc[0].umask_value = MEM_LOAD_RETIRED_L3_MISS_UMASK;
2576                 coreEventDesc[1].event_number = MEM_LOAD_RETIRED_L3_UNSHAREDHIT_EVTNR;
2577                 coreEventDesc[1].umask_value = MEM_LOAD_RETIRED_L3_UNSHAREDHIT_UMASK;
2578                 coreEventDesc[2].event_number = MEM_LOAD_RETIRED_L2_HITM_EVTNR;
2579                 coreEventDesc[2].umask_value = MEM_LOAD_RETIRED_L2_HITM_UMASK;
2580                 coreEventDesc[3].event_number = MEM_LOAD_RETIRED_L2_HIT_EVTNR;
2581                 coreEventDesc[3].umask_value = MEM_LOAD_RETIRED_L2_HIT_UMASK;
2582                 L2CacheHitRatioAvailable = true;
2583                 L3CacheHitRatioAvailable = true;
2584                 L3CacheMissesAvailable = true;
2585                 L2CacheMissesAvailable = true;
2586                 L2CacheHitsAvailable = true;
2587                 L3CacheHitsNoSnoopAvailable = true;
2588                 L3CacheHitsSnoopAvailable = true;
2589                 L3CacheHitsAvailable = true;
2590                 core_gen_counter_num_used = 4;
2591                 break;
2592             default:
2593                 assert(!useSkylakeEvents());
2594                 coreEventDesc[0].event_number = ARCH_LLC_MISS_EVTNR;
2595                 coreEventDesc[0].umask_value = ARCH_LLC_MISS_UMASK;
2596                 coreEventDesc[1].event_number = MEM_LOAD_RETIRED_L3_UNSHAREDHIT_EVTNR;
2597                 coreEventDesc[1].umask_value = MEM_LOAD_RETIRED_L3_UNSHAREDHIT_UMASK;
2598                 coreEventDesc[2].event_number = MEM_LOAD_RETIRED_L2_HITM_EVTNR;
2599                 coreEventDesc[2].umask_value = MEM_LOAD_RETIRED_L2_HITM_UMASK;
2600                 coreEventDesc[3].event_number = MEM_LOAD_RETIRED_L2_HIT_EVTNR;
2601                 coreEventDesc[3].umask_value = MEM_LOAD_RETIRED_L2_HIT_UMASK;
2602                 L2CacheHitRatioAvailable = true;
2603                 L3CacheHitRatioAvailable = true;
2604                 L3CacheMissesAvailable = true;
2605                 L2CacheMissesAvailable = true;
2606                 L2CacheHitsAvailable = true;
2607                 L3CacheHitsNoSnoopAvailable = true;
2608                 L3CacheHitsSnoopAvailable = true;
2609                 L3CacheHitsAvailable = true;
2610                 core_gen_counter_num_used = 4;
2611         }
2612     }
2613 
2614     core_fixed_counter_num_used = 3;
2615 
2616     if(EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->gpCounterCfg)
2617     {
2618         core_gen_counter_num_used = pExtDesc->nGPCounters;
2619     }
2620 
2621     if(cpu_model == JAKETOWN)
2622     {
2623         bool enableWA = false;
2624         for(uint32 i = 0; i< core_gen_counter_num_used; ++i)
2625         {
2626             if(coreEventDesc[i].event_number == MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_EVTNR)
2627                 enableWA = true;
2628         }
2629         enableJKTWorkaround(enableWA); // this has a performance penalty on memory access
2630     }
2631 
2632     if (core_gen_counter_num_used > core_gen_counter_num_max)
2633     {
2634         std::cerr << "PCM ERROR: Trying to program " << core_gen_counter_num_used << " general purpose counters with only "
2635             << core_gen_counter_num_max << " available\n";
2636         return PCM::UnknownError;
2637     }
2638     if (core_fixed_counter_num_used > core_fixed_counter_num_max)
2639     {
2640         std::cerr << "PCM ERROR: Trying to program " << core_fixed_counter_num_used << " fixed counters with only "
2641             << core_fixed_counter_num_max << " available\n";
2642         return PCM::UnknownError;
2643     }
2644 
2645     programmed_pmu = true;
2646 
2647     lastProgrammedCustomCounters.clear();
2648     lastProgrammedCustomCounters.resize(num_cores);
2649     // Version for linux/windows/freebsd/dragonflybsd
2650     for (int i = 0; i < (int)num_cores; ++i)
2651     {
2652         if (isCoreOnline(i) == false) continue;
2653         TemporalThreadAffinity tempThreadAffinity(i, false); // speedup trick for Linux
2654 
2655         const auto status = programCoreCounters(i, mode_, pExtDesc, lastProgrammedCustomCounters[i]);
2656         if (status != PCM::Success)
2657         {
2658             return status;
2659         }
2660 
2661         // program uncore counters
2662 
2663         if (cpu_model == NEHALEM_EP || cpu_model == WESTMERE_EP || cpu_model == CLARKDALE)
2664         {
2665             programNehalemEPUncore(i);
2666         }
2667         else if (hasBecktonUncore())
2668         {
2669             programBecktonUncore(i);
2670         }
2671     }
2672 
2673     if (canUsePerf && !silent)
2674     {
2675         std::cerr << "Successfully programmed on-core PMU using Linux perf\n";
2676     }
2677 
2678     if (hasPCICFGUncore())
2679     {
2680         std::vector<std::future<uint64>> qpi_speeds;
2681         for (size_t i = 0; i < (size_t)server_pcicfg_uncore.size(); ++i)
2682         {
2683             server_pcicfg_uncore[i]->program();
2684             qpi_speeds.push_back(std::async(std::launch::async,
2685                 &ServerPCICFGUncore::computeQPISpeed, server_pcicfg_uncore[i].get(), socketRefCore[i], cpu_model));
2686         }
2687         for (size_t i = 0; i < (size_t)server_pcicfg_uncore.size(); ++i)
2688         {
2689             max_qpi_speed = (std::max)(qpi_speeds[i].get(), max_qpi_speed);
2690         }
2691 
2692 	programCbo();
2693     }
2694 
2695     if (!silent) reportQPISpeed();
2696 
2697     return PCM::Success;
2698 }
2699 
programCoreCounters(const int i,const PCM::ProgramMode mode_,const ExtendedCustomCoreEventDescription * pExtDesc,std::vector<EventSelectRegister> & result)2700 PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
2701     const PCM::ProgramMode mode_,
2702     const ExtendedCustomCoreEventDescription * pExtDesc,
2703     std::vector<EventSelectRegister> & result)
2704 {
2705     // program core counters
2706 
2707     result.clear();
2708     FixedEventControlRegister ctrl_reg;
2709 #ifdef PCM_USE_PERF
2710     int leader_counter = -1;
2711     perf_event_attr e = PCM_init_perf_event_attr();
2712     auto programPerfEvent = [this, &e, &leader_counter, &i](const int eventPos, const std::string & eventName) -> bool
2713     {
2714         // if (i == 0) std::cerr << "DEBUG: programming event "<< std::hex << e.config << std::dec << "\n";
2715         if ((perfEventHandle[i][eventPos] = syscall(SYS_perf_event_open, &e, -1,
2716             i /* core id */, leader_counter /* group leader */, 0)) <= 0)
2717         {
2718             std::cerr << "Linux Perf: Error when programming " << eventName << ", error: " << strerror(errno) << "\n";
2719             if (24 == errno)
2720             {
2721                 std::cerr << "try executing 'ulimit -n 10000' to increase the limit on the number of open files.\n";
2722             }
2723             else
2724             {
2725                 std::cerr << "try running with environment variable PCM_NO_PERF=1\n";
2726             }
2727             decrementInstanceSemaphore();
2728             return false;
2729         }
2730         return true;
2731     };
2732     if (canUsePerf)
2733     {
2734         e.type = PERF_TYPE_HARDWARE;
2735         e.config = PERF_COUNT_HW_INSTRUCTIONS;
2736         if (programPerfEvent(PERF_INST_RETIRED_POS, "INST_RETIRED") == false)
2737         {
2738             return PCM::UnknownError;
2739         }
2740         leader_counter = perfEventHandle[i][PERF_INST_RETIRED_POS];
2741         e.pinned = 0; // all following counter are not leaders, thus need not be pinned explicitly
2742         e.config = PERF_COUNT_HW_CPU_CYCLES;
2743         if (programPerfEvent(PERF_CPU_CLK_UNHALTED_THREAD_POS, "CPU_CLK_UNHALTED_THREAD") == false)
2744         {
2745             return PCM::UnknownError;
2746         }
2747         e.config = PCM_PERF_COUNT_HW_REF_CPU_CYCLES;
2748         if (programPerfEvent(PERF_CPU_CLK_UNHALTED_REF_POS, "CPU_CLK_UNHALTED_REF") == false)
2749         {
2750             return PCM::UnknownError;
2751         }
2752     }
2753     else
2754 #endif
2755     {
2756         // disable counters while programming
2757         MSR[i]->write(IA32_CR_PERF_GLOBAL_CTRL, 0);
2758         MSR[i]->read(IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value);
2759 
2760 
2761         if (EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->fixedCfg)
2762         {
2763             ctrl_reg = *(pExtDesc->fixedCfg);
2764         }
2765         else
2766         {
2767 	    ctrl_reg.value = 0;
2768 
2769 	    ctrl_reg.fields.os0 = 1;
2770             ctrl_reg.fields.usr0 = 1;
2771 
2772             ctrl_reg.fields.os1 = 1;
2773             ctrl_reg.fields.usr1 = 1;
2774 
2775             ctrl_reg.fields.os2 = 1;
2776             ctrl_reg.fields.usr2 = 1;
2777 
2778             if (isFixedCounterSupported(3))
2779 	    {
2780 	        ctrl_reg.fields.os3 = 1;
2781                 ctrl_reg.fields.usr3 = 1;
2782 	    }
2783         }
2784 
2785         MSR[i]->write(INST_RETIRED_ADDR, 0);
2786         MSR[i]->write(CPU_CLK_UNHALTED_THREAD_ADDR, 0);
2787         MSR[i]->write(CPU_CLK_UNHALTED_REF_ADDR, 0);
2788         MSR[i]->write(IA32_CR_FIXED_CTR_CTRL, ctrl_reg.value);
2789     }
2790 
2791     if (EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc)
2792     {
2793         if (pExtDesc->OffcoreResponseMsrValue[0]) // still need to do also if perf API is used due to a bug in perf
2794             MSR[i]->write(MSR_OFFCORE_RSP0, pExtDesc->OffcoreResponseMsrValue[0]);
2795         if (pExtDesc->OffcoreResponseMsrValue[1])
2796             MSR[i]->write(MSR_OFFCORE_RSP1, pExtDesc->OffcoreResponseMsrValue[1]);
2797     }
2798 
2799     auto setEvent = [] (EventSelectRegister & reg, const uint64 event,  const uint64 umask)
2800     {
2801             reg.fields.event_select = event;
2802             reg.fields.umask = umask;
2803             reg.fields.usr = 1;
2804             reg.fields.os = 1;
2805             reg.fields.edge = 0;
2806             reg.fields.pin_control = 0;
2807             reg.fields.apic_int = 0;
2808             reg.fields.any_thread = 0;
2809             reg.fields.enable = 1;
2810             reg.fields.invert = 0;
2811             reg.fields.cmask = 0;
2812             reg.fields.in_tx = 0;
2813             reg.fields.in_txcp = 0;
2814     };
2815     EventSelectRegister event_select_reg;
2816     for (uint32 j = 0; j < core_gen_counter_num_used; ++j)
2817     {
2818         if (EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->gpCounterCfg)
2819         {
2820             event_select_reg = pExtDesc->gpCounterCfg[j];
2821             event_select_reg.fields.enable = 1;
2822         }
2823         else
2824         {
2825             MSR[i]->read(IA32_PERFEVTSEL0_ADDR + j, &event_select_reg.value); // read-only also safe for perf
2826 
2827             setEvent(event_select_reg, coreEventDesc[j].event_number, coreEventDesc[j].umask_value);
2828         }
2829         result.push_back(event_select_reg);
2830 #ifdef PCM_USE_PERF
2831         if (canUsePerf)
2832         {
2833             e.type = PERF_TYPE_RAW;
2834             e.config = (1ULL << 63ULL) + event_select_reg.value;
2835             if (event_select_reg.fields.event_select == OFFCORE_RESPONSE_0_EVTNR)
2836                 e.config1 = pExtDesc->OffcoreResponseMsrValue[0];
2837             if (event_select_reg.fields.event_select == OFFCORE_RESPONSE_1_EVTNR)
2838                 e.config1 = pExtDesc->OffcoreResponseMsrValue[1];
2839             if (programPerfEvent(PERF_GEN_EVENT_0_POS + j, std::string("generic event #") + std::to_string(i)) == false)
2840             {
2841                 return PCM::UnknownError;
2842             }
2843         }
2844         else
2845 #endif
2846         {
2847             MSR[i]->write(IA32_PMC0 + j, 0);
2848             MSR[i]->write(IA32_PERFEVTSEL0_ADDR + j, event_select_reg.value);
2849         }
2850     }
2851 
2852     if (!canUsePerf)
2853     {
2854         // start counting, enable all (4 programmable + 3 fixed) counters
2855         uint64 value = (1ULL << 0) + (1ULL << 1) + (1ULL << 2) + (1ULL << 3) + (1ULL << 32) + (1ULL << 33) + (1ULL << 34);
2856 
2857 	if (isFixedCounterSupported(3))
2858 	{
2859 	    value |= (1ULL << 35);
2860 	    MSR[i]->write(TOPDOWN_SLOTS_ADDR, 0);
2861 	}
2862 
2863 	if (isHWTMAL1Supported())
2864 	{
2865 	    value |= (1ULL << 48);
2866 	    MSR[i]->write(PERF_METRICS_ADDR, 0);
2867 	}
2868 
2869         if (isAtom() || cpu_model == KNL)       // KNL and Atom have 3 fixed + only 2 programmable counters
2870             value = (1ULL << 0) + (1ULL << 1) + (1ULL << 32) + (1ULL << 33) + (1ULL << 34);
2871 
2872         for (uint32 j = 0; j < core_gen_counter_num_used; ++j)
2873         {
2874             value |= (1ULL << j); // enable all custom counters (if > 4)
2875         }
2876 
2877         MSR[i]->write(IA32_PERF_GLOBAL_OVF_CTRL, value);
2878         MSR[i]->write(IA32_CR_PERF_GLOBAL_CTRL, value);
2879     }
2880 #ifdef PCM_USE_PERF
2881     else
2882     {
2883 	    if (isFixedCounterSupported(3) && isHWTMAL1Supported() && perfSupportsTopDown())
2884         {
2885             const auto topDownEvents = {  std::make_pair(perfSlotsPath, PERF_TOPDOWN_SLOTS_POS),
2886                                           std::make_pair(perfBadSpecPath, PERF_TOPDOWN_BADSPEC_POS),
2887                                           std::make_pair(perfBackEndPath, PERF_TOPDOWN_BACKEND_POS),
2888                                           std::make_pair(perfFrontEndPath, PERF_TOPDOWN_FRONTEND_POS),
2889                                           std::make_pair(perfRetiringPath, PERF_TOPDOWN_RETIRING_POS)};
2890             int readPos = core_fixed_counter_num_used + core_gen_counter_num_used;
2891             leader_counter = -1;
2892             for (auto event : topDownEvents)
2893             {
2894                 uint64 eventSel = 0, umask = 0;
2895                 const auto eventDesc = readSysFS(event.first);
2896                 const auto tokens = split(eventDesc, ',');
2897                 for (auto token : tokens)
2898                 {
2899                     if (match(token, "event=", &eventSel)) {}
2900                     else if (match(token, "umask=", &umask)) {}
2901                     else
2902                     {
2903                         std::cerr << "ERROR: unknown token " << token << " in event description \"" << eventDesc << "\" from " << event.first << "\n";
2904                         decrementInstanceSemaphore();
2905                         return PCM::UnknownError;
2906                     }
2907                 }
2908                 EventSelectRegister reg;
2909                 setEvent(reg, eventSel, umask);
2910                 e.type = PERF_TYPE_RAW;
2911                 e.config = (1ULL << 63ULL) + reg.value;
2912                 // std::cerr << "Programming perf event " << std::hex << e.config << "\n";
2913                 if (programPerfEvent(event.second, std::string("event ") + event.first + " " + eventDesc) == false)
2914                 {
2915                     return PCM::UnknownError;
2916                 }
2917                 leader_counter = perfEventHandle[i][PERF_TOPDOWN_SLOTS_POS];
2918                 perfTopDownPos[event.second] = readPos++;
2919             }
2920         }
2921     }
2922 #endif
2923     return PCM::Success;
2924 }
2925 
reportQPISpeed() const2926 void PCM::reportQPISpeed() const
2927 {
2928     if (!max_qpi_speed) return;
2929 
2930     if (hasPCICFGUncore()) {
2931         for (size_t i = 0; i < (size_t)server_pcicfg_uncore.size(); ++i)
2932         {
2933             std::cerr << "Socket " << i << "\n";
2934             if(server_pcicfg_uncore[i].get()) server_pcicfg_uncore[i]->reportQPISpeed();
2935         }
2936     } else {
2937         std::cerr << "Max QPI speed: " << max_qpi_speed / (1e9) << " GBytes/second (" << max_qpi_speed / (1e9*getBytesPerLinkTransfer()) << " GT/second)\n";
2938     }
2939 
2940 }
2941 
programNehalemEPUncore(int32 core)2942 void PCM::programNehalemEPUncore(int32 core)
2943 {
2944 
2945 #define CPUCNT_INIT_THE_REST_OF_EVTCNT \
2946     unc_event_select_reg.fields.occ_ctr_rst = 1; \
2947     unc_event_select_reg.fields.edge = 0; \
2948     unc_event_select_reg.fields.enable_pmi = 0; \
2949     unc_event_select_reg.fields.enable = 1; \
2950     unc_event_select_reg.fields.invert = 0; \
2951     unc_event_select_reg.fields.cmask = 0;
2952 
2953     uncore_gen_counter_num_used = 8;
2954 
2955     UncoreEventSelectRegister unc_event_select_reg;
2956 
2957     MSR[core]->read(MSR_UNCORE_PERFEVTSEL0_ADDR, &unc_event_select_reg.value);
2958 
2959     unc_event_select_reg.fields.event_select = UNC_QMC_WRITES_FULL_ANY_EVTNR;
2960     unc_event_select_reg.fields.umask = UNC_QMC_WRITES_FULL_ANY_UMASK;
2961 
2962     CPUCNT_INIT_THE_REST_OF_EVTCNT
2963 
2964         MSR[core]->write(MSR_UNCORE_PERFEVTSEL0_ADDR, unc_event_select_reg.value);
2965 
2966 
2967     MSR[core]->read(MSR_UNCORE_PERFEVTSEL1_ADDR, &unc_event_select_reg.value);
2968 
2969     unc_event_select_reg.fields.event_select = UNC_QMC_NORMAL_READS_ANY_EVTNR;
2970     unc_event_select_reg.fields.umask = UNC_QMC_NORMAL_READS_ANY_UMASK;
2971 
2972     CPUCNT_INIT_THE_REST_OF_EVTCNT
2973 
2974         MSR[core]->write(MSR_UNCORE_PERFEVTSEL1_ADDR, unc_event_select_reg.value);
2975 
2976 
2977     MSR[core]->read(MSR_UNCORE_PERFEVTSEL2_ADDR, &unc_event_select_reg.value);
2978     unc_event_select_reg.fields.event_select = UNC_QHL_REQUESTS_EVTNR;
2979     unc_event_select_reg.fields.umask = UNC_QHL_REQUESTS_IOH_READS_UMASK;
2980     CPUCNT_INIT_THE_REST_OF_EVTCNT
2981         MSR[core]->write(MSR_UNCORE_PERFEVTSEL2_ADDR, unc_event_select_reg.value);
2982 
2983     MSR[core]->read(MSR_UNCORE_PERFEVTSEL3_ADDR, &unc_event_select_reg.value);
2984     unc_event_select_reg.fields.event_select = UNC_QHL_REQUESTS_EVTNR;
2985     unc_event_select_reg.fields.umask = UNC_QHL_REQUESTS_IOH_WRITES_UMASK;
2986     CPUCNT_INIT_THE_REST_OF_EVTCNT
2987         MSR[core]->write(MSR_UNCORE_PERFEVTSEL3_ADDR, unc_event_select_reg.value);
2988 
2989     MSR[core]->read(MSR_UNCORE_PERFEVTSEL4_ADDR, &unc_event_select_reg.value);
2990     unc_event_select_reg.fields.event_select = UNC_QHL_REQUESTS_EVTNR;
2991     unc_event_select_reg.fields.umask = UNC_QHL_REQUESTS_REMOTE_READS_UMASK;
2992     CPUCNT_INIT_THE_REST_OF_EVTCNT
2993         MSR[core]->write(MSR_UNCORE_PERFEVTSEL4_ADDR, unc_event_select_reg.value);
2994 
2995     MSR[core]->read(MSR_UNCORE_PERFEVTSEL5_ADDR, &unc_event_select_reg.value);
2996     unc_event_select_reg.fields.event_select = UNC_QHL_REQUESTS_EVTNR;
2997     unc_event_select_reg.fields.umask = UNC_QHL_REQUESTS_REMOTE_WRITES_UMASK;
2998     CPUCNT_INIT_THE_REST_OF_EVTCNT
2999         MSR[core]->write(MSR_UNCORE_PERFEVTSEL5_ADDR, unc_event_select_reg.value);
3000 
3001     MSR[core]->read(MSR_UNCORE_PERFEVTSEL6_ADDR, &unc_event_select_reg.value);
3002     unc_event_select_reg.fields.event_select = UNC_QHL_REQUESTS_EVTNR;
3003     unc_event_select_reg.fields.umask = UNC_QHL_REQUESTS_LOCAL_READS_UMASK;
3004     CPUCNT_INIT_THE_REST_OF_EVTCNT
3005         MSR[core]->write(MSR_UNCORE_PERFEVTSEL6_ADDR, unc_event_select_reg.value);
3006 
3007     MSR[core]->read(MSR_UNCORE_PERFEVTSEL7_ADDR, &unc_event_select_reg.value);
3008     unc_event_select_reg.fields.event_select = UNC_QHL_REQUESTS_EVTNR;
3009     unc_event_select_reg.fields.umask = UNC_QHL_REQUESTS_LOCAL_WRITES_UMASK;
3010     CPUCNT_INIT_THE_REST_OF_EVTCNT
3011         MSR[core]->write(MSR_UNCORE_PERFEVTSEL7_ADDR, unc_event_select_reg.value);
3012 
3013 
3014 #undef CPUCNT_INIT_THE_REST_OF_EVTCNT
3015 
3016     // start uncore counting
3017     uint64 value = 255 + (1ULL << 32);           // enable all counters
3018     MSR[core]->write(MSR_UNCORE_PERF_GLOBAL_CTRL_ADDR, value);
3019 
3020     // synchronise counters
3021     MSR[core]->write(MSR_UNCORE_PMC0, 0);
3022     MSR[core]->write(MSR_UNCORE_PMC1, 0);
3023     MSR[core]->write(MSR_UNCORE_PMC2, 0);
3024     MSR[core]->write(MSR_UNCORE_PMC3, 0);
3025     MSR[core]->write(MSR_UNCORE_PMC4, 0);
3026     MSR[core]->write(MSR_UNCORE_PMC5, 0);
3027     MSR[core]->write(MSR_UNCORE_PMC6, 0);
3028     MSR[core]->write(MSR_UNCORE_PMC7, 0);
3029 }
3030 
programBecktonUncore(int32 core)3031 void PCM::programBecktonUncore(int32 core)
3032 {
3033     // program Beckton uncore
3034     if (core == socketRefCore[0]) computeQPISpeedBeckton((int)core);
3035 
3036     uint64 value = 1 << 29ULL;           // reset all counters
3037     MSR[core]->write(U_MSR_PMON_GLOBAL_CTL, value);
3038 
3039     BecktonUncorePMUZDPCTLFVCRegister FVCreg;
3040     FVCreg.value = 0;
3041     if (cpu_model == NEHALEM_EX)
3042     {
3043         FVCreg.fields.bcmd = 0;             // rd_bcmd
3044         FVCreg.fields.resp = 0;             // ack_resp
3045         FVCreg.fields.evnt0 = 5;            // bcmd_match
3046         FVCreg.fields.evnt1 = 6;            // resp_match
3047         FVCreg.fields.pbox_init_err = 0;
3048     }
3049     else
3050     {
3051         FVCreg.fields_wsm.bcmd = 0;             // rd_bcmd
3052         FVCreg.fields_wsm.resp = 0;             // ack_resp
3053         FVCreg.fields_wsm.evnt0 = 5;            // bcmd_match
3054         FVCreg.fields_wsm.evnt1 = 6;            // resp_match
3055         FVCreg.fields_wsm.pbox_init_err = 0;
3056     }
3057     MSR[core]->write(MB0_MSR_PMU_ZDP_CTL_FVC, FVCreg.value);
3058     MSR[core]->write(MB1_MSR_PMU_ZDP_CTL_FVC, FVCreg.value);
3059 
3060     BecktonUncorePMUCNTCTLRegister CNTCTLreg;
3061     CNTCTLreg.value = 0;
3062     CNTCTLreg.fields.en = 1;
3063     CNTCTLreg.fields.pmi_en = 0;
3064     CNTCTLreg.fields.count_mode = 0;
3065     CNTCTLreg.fields.storage_mode = 0;
3066     CNTCTLreg.fields.wrap_mode = 1;
3067     CNTCTLreg.fields.flag_mode = 0;
3068     CNTCTLreg.fields.inc_sel = 0x0d;           // FVC_EV0
3069     MSR[core]->write(MB0_MSR_PMU_CNT_CTL_0, CNTCTLreg.value);
3070     MSR[core]->write(MB1_MSR_PMU_CNT_CTL_0, CNTCTLreg.value);
3071     CNTCTLreg.fields.inc_sel = 0x0e;           // FVC_EV1
3072     MSR[core]->write(MB0_MSR_PMU_CNT_CTL_1, CNTCTLreg.value);
3073     MSR[core]->write(MB1_MSR_PMU_CNT_CTL_1, CNTCTLreg.value);
3074 
3075     value = 1 + ((0x0C) << 1ULL);              // enable bit + (event select IMT_INSERTS_WR)
3076     MSR[core]->write(BB0_MSR_PERF_CNT_CTL_1, value);
3077     MSR[core]->write(BB1_MSR_PERF_CNT_CTL_1, value);
3078 
3079     MSR[core]->write(MB0_MSR_PERF_GLOBAL_CTL, 3); // enable two counters
3080     MSR[core]->write(MB1_MSR_PERF_GLOBAL_CTL, 3); // enable two counters
3081 
3082     MSR[core]->write(BB0_MSR_PERF_GLOBAL_CTL, 2); // enable second counter
3083     MSR[core]->write(BB1_MSR_PERF_GLOBAL_CTL, 2); // enable second counter
3084 
3085     // program R-Box to monitor QPI traffic
3086 
3087     // enable counting on all counters on the left side (port 0-3)
3088     MSR[core]->write(R_MSR_PMON_GLOBAL_CTL_7_0, 255);
3089     // ... on the right side (port 4-7)
3090     MSR[core]->write(R_MSR_PMON_GLOBAL_CTL_15_8, 255);
3091 
3092     // pick the event
3093     value = (1 << 7ULL) + (1 << 6ULL) + (1 << 2ULL); // count any (incoming) data responses
3094     MSR[core]->write(R_MSR_PORT0_IPERF_CFG0, value);
3095     MSR[core]->write(R_MSR_PORT1_IPERF_CFG0, value);
3096     MSR[core]->write(R_MSR_PORT4_IPERF_CFG0, value);
3097     MSR[core]->write(R_MSR_PORT5_IPERF_CFG0, value);
3098 
3099     // pick the event
3100     value = (1ULL << 30ULL); // count null idle flits sent
3101     MSR[core]->write(R_MSR_PORT0_IPERF_CFG1, value);
3102     MSR[core]->write(R_MSR_PORT1_IPERF_CFG1, value);
3103     MSR[core]->write(R_MSR_PORT4_IPERF_CFG1, value);
3104     MSR[core]->write(R_MSR_PORT5_IPERF_CFG1, value);
3105 
3106     // choose counter 0 to monitor R_MSR_PORT0_IPERF_CFG0
3107     MSR[core]->write(R_MSR_PMON_CTL0, 1 + 2 * (0));
3108     // choose counter 1 to monitor R_MSR_PORT1_IPERF_CFG0
3109     MSR[core]->write(R_MSR_PMON_CTL1, 1 + 2 * (6));
3110     // choose counter 8 to monitor R_MSR_PORT4_IPERF_CFG0
3111     MSR[core]->write(R_MSR_PMON_CTL8, 1 + 2 * (0));
3112     // choose counter 9 to monitor R_MSR_PORT5_IPERF_CFG0
3113     MSR[core]->write(R_MSR_PMON_CTL9, 1 + 2 * (6));
3114 
3115     // choose counter 2 to monitor R_MSR_PORT0_IPERF_CFG1
3116     MSR[core]->write(R_MSR_PMON_CTL2, 1 + 2 * (1));
3117     // choose counter 3 to monitor R_MSR_PORT1_IPERF_CFG1
3118     MSR[core]->write(R_MSR_PMON_CTL3, 1 + 2 * (7));
3119     // choose counter 10 to monitor R_MSR_PORT4_IPERF_CFG1
3120     MSR[core]->write(R_MSR_PMON_CTL10, 1 + 2 * (1));
3121     // choose counter 11 to monitor R_MSR_PORT5_IPERF_CFG1
3122     MSR[core]->write(R_MSR_PMON_CTL11, 1 + 2 * (7));
3123 
3124     // enable uncore TSC counter (fixed one)
3125     MSR[core]->write(W_MSR_PMON_GLOBAL_CTL, 1ULL << 31ULL);
3126     MSR[core]->write(W_MSR_PMON_FIXED_CTR_CTL, 1ULL);
3127 
3128     value = (1 << 28ULL) + 1;                  // enable all counters
3129     MSR[core]->write(U_MSR_PMON_GLOBAL_CTL, value);
3130 }
3131 
3132 uint64 RDTSC();
3133 
computeNominalFrequency()3134 void PCM::computeNominalFrequency()
3135 {
3136     const int ref_core = 0;
3137     uint64 before = 0, after = 0;
3138     MSR[ref_core]->read(IA32_TIME_STAMP_COUNTER, &before);
3139     MySleepMs(1000);
3140     MSR[ref_core]->read(IA32_TIME_STAMP_COUNTER, &after);
3141     nominal_frequency = after-before;
3142 }
getCPUBrandString()3143 std::string PCM::getCPUBrandString()
3144 {
3145     char buffer[sizeof(int)*4*3+1];
3146     PCM_CPUID_INFO * info = (PCM_CPUID_INFO *) buffer;
3147     pcm_cpuid(0x80000002, *info);
3148     ++info;
3149     pcm_cpuid(0x80000003, *info);
3150     ++info;
3151     pcm_cpuid(0x80000004, *info);
3152     buffer[sizeof(int)*4*3] = 0;
3153     std::string result(buffer);
3154     while(result[0]==' ') result.erase(0,1);
3155     std::string::size_type i;
3156     while((i = result.find("  ")) != std::string::npos) result.replace(i,2," "); // remove duplicate spaces
3157     return result;
3158 }
3159 
getCPUFamilyModelString()3160 std::string PCM::getCPUFamilyModelString()
3161 {
3162     char buffer[sizeof(int)*4*3+6];
3163     memset(buffer,0,sizeof(buffer));
3164 #ifdef _MSC_VER
3165     sprintf_s(buffer,sizeof(buffer),"GenuineIntel-%d-%2X-%X",this->cpu_family,this->cpu_model,this->cpu_stepping);
3166 #else
3167     snprintf(buffer,sizeof(buffer),"GenuineIntel-%d-%2X-%X",this->cpu_family,this->cpu_model,this->cpu_stepping);
3168 #endif
3169     std::string result(buffer);
3170     return result;
3171 }
3172 
enableForceRTMAbortMode(const bool silent)3173 void PCM::enableForceRTMAbortMode(const bool silent)
3174 {
3175     // std::cout << "enableForceRTMAbortMode(): forceRTMAbortMode=" << forceRTMAbortMode << "\n";
3176     if (!forceRTMAbortMode)
3177     {
3178         if (isForceRTMAbortModeAvailable() && (core_gen_counter_num_max < 4))
3179         {
3180             for (auto m : MSR)
3181             {
3182                 const auto res = m->write(MSR_TSX_FORCE_ABORT, 1);
3183                 if (res != sizeof(uint64))
3184                 {
3185                     std::cerr << "Warning: writing 1 to MSR_TSX_FORCE_ABORT failed with error "
3186                         << res << " on core " << m->getCoreId() << "\n";
3187                 }
3188             }
3189             readCoreCounterConfig(true); // re-read core_gen_counter_num_max from CPUID
3190             if (!silent) std::cerr << "The number of custom counters is now " << core_gen_counter_num_max << "\n";
3191             if (core_gen_counter_num_max < 4)
3192             {
3193                 std::cerr << "PCM Warning: the number of custom counters did not increase (" << core_gen_counter_num_max << ")\n";
3194             }
3195             forceRTMAbortMode = true;
3196         }
3197     }
3198 }
3199 
isForceRTMAbortModeEnabled() const3200 bool PCM::isForceRTMAbortModeEnabled() const
3201 {
3202     return forceRTMAbortMode;
3203 }
3204 
disableForceRTMAbortMode(const bool silent)3205 void PCM::disableForceRTMAbortMode(const bool silent)
3206 {
3207     // std::cout << "disableForceRTMAbortMode(): forceRTMAbortMode=" << forceRTMAbortMode << "\n";
3208     if (forceRTMAbortMode)
3209     {
3210         for (auto m : MSR)
3211         {
3212             const auto res = m->write(MSR_TSX_FORCE_ABORT, 0);
3213             if (res != sizeof(uint64))
3214             {
3215                 std::cerr << "Warning: writing 0 to MSR_TSX_FORCE_ABORT failed with error "
3216                     << res << " on core " << m->getCoreId() << "\n";
3217             }
3218         }
3219         readCoreCounterConfig(true); // re-read core_gen_counter_num_max from CPUID
3220         if (!silent) std::cerr << "The number of custom counters is now " << core_gen_counter_num_max << "\n";
3221         if (core_gen_counter_num_max != 3)
3222         {
3223             std::cerr << "PCM Warning: the number of custom counters is not 3 (" << core_gen_counter_num_max << ")\n";
3224         }
3225         forceRTMAbortMode = false;
3226     }
3227 }
3228 
isForceRTMAbortModeAvailable() const3229 bool PCM::isForceRTMAbortModeAvailable() const
3230 {
3231     PCM_CPUID_INFO info;
3232     pcm_cpuid(7, 0, info); // leaf 7, subleaf 0
3233     return (info.reg.edx & (0x1 << 13)) ? true : false;
3234 }
3235 
get_frequency_from_cpuid()3236 uint64 get_frequency_from_cpuid() // from Pat Fay (Intel)
3237 {
3238     double speed=0;
3239     std::string brand = PCM::getCPUBrandString();
3240     if (brand.length() > std::string::size_type(0))
3241     {
3242         std::string::size_type unitsg = brand.find("GHz");
3243         if(unitsg != std::string::npos)
3244         {
3245             std::string::size_type atsign = brand.rfind(' ', unitsg);
3246             if(atsign != std::string::npos)
3247             {
3248                 std::istringstream(brand.substr(atsign)) >> speed;
3249                 speed *= 1000;
3250             }
3251         }
3252         else
3253         {
3254             std::string::size_type unitsg = brand.find("MHz");
3255             if(unitsg != std::string::npos)
3256             {
3257                 std::string::size_type atsign = brand.rfind(' ', unitsg);
3258                 if(atsign != std::string::npos)
3259                 {
3260                     std::istringstream(brand.substr(atsign)) >> speed;
3261                 }
3262             }
3263         }
3264     }
3265     return (uint64)(speed * 1000. * 1000.);
3266 }
3267 
getSupportedUarchCodenames() const3268 std::string PCM::getSupportedUarchCodenames() const
3269 {
3270     std::ostringstream ostr;
3271     for(int32 i=0; i < static_cast<int32>(PCM::END_OF_MODEL_LIST) ; ++i)
3272         if(isCPUModelSupported((int)i))
3273             ostr << getUArchCodename(i) << ", ";
3274     return std::string(ostr.str().substr(0, ostr.str().length() - 2));
3275 }
3276 
getUnsupportedMessage() const3277 std::string PCM::getUnsupportedMessage() const
3278 {
3279     std::ostringstream ostr;
3280     ostr << "Error: unsupported processor. Only Intel(R) processors are supported (Atom(R) and microarchitecture codename " << getSupportedUarchCodenames() << ").";
3281     return std::string(ostr.str());
3282 }
3283 
computeQPISpeedBeckton(int core_nr)3284 void PCM::computeQPISpeedBeckton(int core_nr)
3285 {
3286     uint64 startFlits = 0;
3287     // reset all counters
3288     MSR[core_nr]->write(U_MSR_PMON_GLOBAL_CTL, 1 << 29ULL);
3289 
3290     // enable counting on all counters on the left side (port 0-3)
3291     MSR[core_nr]->write(R_MSR_PMON_GLOBAL_CTL_7_0, 255);
3292     // disable on the right side (port 4-7)
3293     MSR[core_nr]->write(R_MSR_PMON_GLOBAL_CTL_15_8, 0);
3294 
3295     // count flits sent
3296     MSR[core_nr]->write(R_MSR_PORT0_IPERF_CFG0, 1ULL << 31ULL);
3297 
3298     // choose counter 0 to monitor R_MSR_PORT0_IPERF_CFG0
3299     MSR[core_nr]->write(R_MSR_PMON_CTL0, 1 + 2 * (0));
3300 
3301     // enable all counters
3302     MSR[core_nr]->write(U_MSR_PMON_GLOBAL_CTL, (1 << 28ULL) + 1);
3303 
3304     MSR[core_nr]->read(R_MSR_PMON_CTR0, &startFlits);
3305 
3306     const uint64 timerGranularity = 1000000ULL; // mks
3307     uint64 startTSC = getTickCount(timerGranularity, (uint32) core_nr);
3308     uint64 endTSC;
3309     do
3310     {
3311         endTSC = getTickCount(timerGranularity, (uint32) core_nr);
3312     } while (endTSC - startTSC < 200000ULL); // spin for 200 ms
3313 
3314     uint64 endFlits = 0;
3315     MSR[core_nr]->read(R_MSR_PMON_CTR0, &endFlits);
3316     max_qpi_speed = (endFlits - startFlits) * 8ULL * timerGranularity / (endTSC - startTSC);
3317 
3318 }
3319 
checkCustomCoreProgramming(std::shared_ptr<SafeMsrHandle> msr)3320 uint32 PCM::checkCustomCoreProgramming(std::shared_ptr<SafeMsrHandle> msr)
3321 {
3322     const auto core = msr->getCoreId();
3323     if (size_t(core) >= lastProgrammedCustomCounters.size() || canUsePerf)
3324     {
3325         // checking 'canUsePerf'because corruption detection curently works
3326         // only if perf is not used, see https://github.com/opcm/pcm/issues/106
3327         return 0;
3328     }
3329     uint32 corruptedCountersMask = 0;
3330 
3331     for (size_t ctr = 0; ctr < lastProgrammedCustomCounters[core].size(); ++ctr)
3332     {
3333         EventSelectRegister current;
3334         if (msr->read(IA32_PERFEVTSEL0_ADDR + ctr, &current.value) != sizeof(current.value))
3335         {
3336             std::cerr << "PCM Error: can not read MSR 0x" << std::hex << (IA32_PERFEVTSEL0_ADDR + ctr) <<
3337                 " on core " << std::dec << core << "\n";
3338             continue;
3339         }
3340         if (canUsePerf)
3341         {
3342             current.fields.apic_int = 0; // perf sets this bit
3343         }
3344         if (current.value != lastProgrammedCustomCounters[core][ctr].value)
3345         {
3346             std::cerr << "PCM Error: someone has corrupted custom counter " << ctr << " on core " << core
3347                 << " expected value " << lastProgrammedCustomCounters[core][ctr].value << " value read "
3348                 << current.value << "\n";
3349 
3350             corruptedCountersMask |= (1<<ctr);
3351         }
3352     }
3353     return corruptedCountersMask;
3354 }
3355 
PMUinUse()3356 bool PCM::PMUinUse()
3357 {
3358     // follow the "Performance Monitoring Unit Sharing Guide" by P. Irelan and Sh. Kuo
3359     for (int i = 0; i < (int)num_cores; ++i)
3360     {
3361         //std::cout << "Core " << i << " examine registers\n";
3362         uint64 value = 0;
3363         if (perfmon_version >= 4)
3364         {
3365             MSR[i]->read(MSR_PERF_GLOBAL_INUSE, &value);
3366             for (uint32 j = 0; j < core_gen_counter_num_max; ++j)
3367             {
3368                 if (value & (1ULL << j))
3369                 {
3370                     std::cerr << "WARNING: Custom counter " << j << " is in use. MSR_PERF_GLOBAL_INUSE on core " << i << ": 0x" << std::hex << value << std::dec << "\n";
3371                     /*
3372                     Testing MSR_PERF_GLOBAL_INUSE mechanism for a moment. At a later point in time will report BUSY.
3373                     return true;
3374                     */
3375                 }
3376             }
3377         }
3378 
3379         MSR[i]->read(IA32_CR_PERF_GLOBAL_CTRL, &value);
3380         // std::cout << "Core " << i << " IA32_CR_PERF_GLOBAL_CTRL is " << std::hex << value << std::dec << "\n";
3381 
3382         EventSelectRegister event_select_reg;
3383         event_select_reg.value = 0xFFFFFFFFFFFFFFFF;
3384 
3385         for (uint32 j = 0; j < core_gen_counter_num_max; ++j)
3386         {
3387             MSR[i]->read(IA32_PERFEVTSEL0_ADDR + j, &event_select_reg.value);
3388 
3389             if (event_select_reg.fields.event_select != 0 || event_select_reg.fields.apic_int != 0)
3390             {
3391                 std::cerr << "WARNING: Core " << i <<" IA32_PERFEVTSEL" << j << "_ADDR is not zeroed " << event_select_reg.value << "\n";
3392 
3393                 if (needToRestoreNMIWatchdog == true && event_select_reg.fields.event_select == 0x3C && event_select_reg.fields.umask == 0)
3394                 {
3395                     // NMI watchdog did not clear its event, ignore it
3396                     continue;
3397                 }
3398                 return true;
3399             }
3400         }
3401 
3402         FixedEventControlRegister ctrl_reg;
3403         ctrl_reg.value = 0xffffffffffffffff;
3404 
3405         MSR[i]->read(IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value);
3406 
3407         // Check if someone has installed pmi handler on counter overflow.
3408         // If so, that agent might potentially need to change counter value
3409         // for the "sample after"-mode messing up PCM measurements
3410         if(ctrl_reg.fields.enable_pmi0 || ctrl_reg.fields.enable_pmi1 || ctrl_reg.fields.enable_pmi2)
3411         {
3412             std::cerr << "WARNING: Core " << i << " fixed ctrl:" << ctrl_reg.value << "\n";
3413             if (needToRestoreNMIWatchdog == false) // if NMI watchdog did not clear the fields, ignore it
3414             {
3415                 return true;
3416             }
3417         }
3418 #if 0
3419         // either os=0,usr=0 (not running) or os=1,usr=1 (fits PCM modus) are ok, other combinations are not
3420         if(ctrl_reg.fields.os0 != ctrl_reg.fields.usr0 ||
3421            ctrl_reg.fields.os1 != ctrl_reg.fields.usr1 ||
3422            ctrl_reg.fields.os2 != ctrl_reg.fields.usr2)
3423         {
3424            std::cerr << "WARNING: Core " << i << " fixed ctrl:" << ctrl_reg.value << "\n";
3425            return true;
3426         }
3427 #endif
3428     }
3429     //std::cout << std::flush
3430     return false;
3431 }
3432 
getUArchCodename(const int32 cpu_model_param) const3433 const char * PCM::getUArchCodename(const int32 cpu_model_param) const
3434 {
3435     auto cpu_model_ = cpu_model_param;
3436     if(cpu_model_ < 0)
3437         cpu_model_ = this->cpu_model ;
3438 
3439     switch(cpu_model_)
3440     {
3441         case CENTERTON:
3442             return "Centerton";
3443         case BAYTRAIL:
3444             return "Baytrail";
3445         case AVOTON:
3446             return "Avoton";
3447         case CHERRYTRAIL:
3448             return "Cherrytrail";
3449         case APOLLO_LAKE:
3450             return "Apollo Lake";
3451         case DENVERTON:
3452             return "Denverton";
3453         case SNOWRIDGE:
3454             return "Snowridge";
3455         case NEHALEM_EP:
3456         case NEHALEM:
3457             return "Nehalem/Nehalem-EP";
3458         case ATOM:
3459             return "Atom(tm)";
3460         case CLARKDALE:
3461             return "Westmere/Clarkdale";
3462         case WESTMERE_EP:
3463             return "Westmere-EP";
3464         case NEHALEM_EX:
3465             return "Nehalem-EX";
3466         case WESTMERE_EX:
3467             return "Westmere-EX";
3468         case SANDY_BRIDGE:
3469             return "Sandy Bridge";
3470         case JAKETOWN:
3471             return "Sandy Bridge-EP/Jaketown";
3472         case IVYTOWN:
3473             return "Ivy Bridge-EP/EN/EX/Ivytown";
3474         case HASWELLX:
3475             return "Haswell-EP/EN/EX";
3476         case BDX_DE:
3477             return "Broadwell-DE";
3478         case BDX:
3479             return "Broadwell-EP/EX";
3480         case KNL:
3481             return "Knights Landing";
3482         case IVY_BRIDGE:
3483             return "Ivy Bridge";
3484         case HASWELL:
3485             return "Haswell";
3486         case BROADWELL:
3487             return "Broadwell";
3488         case SKL:
3489             return "Skylake";
3490         case SKL_UY:
3491             return "Skylake U/Y";
3492         case KBL:
3493             return "Kabylake";
3494         case KBL_1:
3495             return "Kabylake/Whiskey Lake";
3496         case CML:
3497             return "Comet Lake";
3498         case ICL:
3499             return "Icelake";
3500         case RKL:
3501             return "Rocket Lake";
3502         case TGL:
3503             return "Tiger Lake";
3504         case SKX:
3505             if (cpu_model_param >= 0)
3506             {
3507                 // query for specified cpu_model_param, stepping not provided
3508                 return "Skylake-SP, Cascade Lake-SP";
3509             }
3510             if (isCLX())
3511             {
3512                 return "Cascade Lake-SP";
3513             }
3514             if (isCPX())
3515             {
3516                 return "Cooper Lake";
3517             }
3518             return "Skylake-SP";
3519         case ICX:
3520             return "Icelake-SP";
3521     }
3522     return "unknown";
3523 }
3524 
cleanupPMU(const bool silent)3525 void PCM::cleanupPMU(const bool silent)
3526 {
3527 #ifdef PCM_USE_PERF
3528     if(canUsePerf)
3529     {
3530       for (int i = 0; i < num_cores; ++i)
3531         for(int c = 0; c < PERF_MAX_COUNTERS; ++c)
3532             ::close(perfEventHandle[i][c]);
3533 
3534       return;
3535     }
3536 #endif
3537 
3538     // follow the "Performance Monitoring Unit Sharing Guide" by P. Irelan and Sh. Kuo
3539     for (int i = 0; i < (int)num_cores; ++i)
3540     {
3541         // disable generic counters and continue free running counting for fixed counters
3542         MSR[i]->write(IA32_CR_PERF_GLOBAL_CTRL, (1ULL << 32) + (1ULL << 33) + (1ULL << 34));
3543 
3544         for (uint32 j = 0; j < core_gen_counter_num_max; ++j)
3545         {
3546             MSR[i]->write(IA32_PERFEVTSEL0_ADDR + j, 0);
3547         }
3548     }
3549 
3550     if(cpu_model == JAKETOWN)
3551         enableJKTWorkaround(false);
3552 
3553 #ifndef PCM_SILENT
3554     if (!silent) std::cerr << " Zeroed PMU registers\n";
3555 #endif
3556 }
3557 
cleanupUncorePMUs(const bool silent)3558 void PCM::cleanupUncorePMUs(const bool silent)
3559 {
3560     for (auto & sPMUs : iioPMUs)
3561     {
3562         for (auto & pmu : sPMUs)
3563         {
3564             pmu.second.cleanup();
3565         }
3566     }
3567     for (auto & sCBOPMUs : cboPMUs)
3568     {
3569         for (auto & pmu : sCBOPMUs)
3570         {
3571             pmu.cleanup();
3572         }
3573     }
3574     for (auto & pmu : pcuPMUs)
3575     {
3576         pmu.cleanup();
3577     }
3578     for (auto & uncore : server_pcicfg_uncore)
3579     {
3580         uncore->cleanupPMUs();
3581     }
3582 #ifndef PCM_SILENT
3583     if (!silent) std::cerr << " Zeroed uncore PMU registers\n";
3584 #endif
3585 }
3586 
resetPMU()3587 void PCM::resetPMU()
3588 {
3589     for (int i = 0; i < (int)MSR.size(); ++i)
3590     {
3591         // disable all counters
3592         MSR[i]->write(IA32_CR_PERF_GLOBAL_CTRL, 0);
3593 
3594         for (uint32 j = 0; j < core_gen_counter_num_max; ++j)
3595         {
3596             MSR[i]->write(IA32_PERFEVTSEL0_ADDR + j, 0);
3597         }
3598 
3599 
3600         FixedEventControlRegister ctrl_reg;
3601         ctrl_reg.value = 0xffffffffffffffff;
3602 
3603         MSR[i]->read(IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value);
3604         if ((ctrl_reg.fields.os0 ||
3605              ctrl_reg.fields.usr0 ||
3606              ctrl_reg.fields.enable_pmi0 ||
3607              ctrl_reg.fields.os1 ||
3608              ctrl_reg.fields.usr1 ||
3609              ctrl_reg.fields.enable_pmi1 ||
3610              ctrl_reg.fields.os2 ||
3611              ctrl_reg.fields.usr2 ||
3612              ctrl_reg.fields.enable_pmi2)
3613             != 0)
3614             MSR[i]->write(IA32_CR_FIXED_CTR_CTRL, 0);
3615     }
3616 
3617 #ifndef PCM_SILENT
3618     std::cerr << " Zeroed PMU registers\n";
3619 #endif
3620 }
cleanupRDT(const bool silent)3621 void PCM::cleanupRDT(const bool silent)
3622 {
3623     if(!(QOSMetricAvailable() && L3QOSMetricAvailable())) {
3624         return;
3625     }
3626 #ifdef __linux__
3627     if (useResctrl)
3628     {
3629         resctrl.cleanup();
3630         return;
3631     }
3632 #endif
3633 
3634     for(int32 core = 0; core < num_cores; core ++ )
3635     {
3636                 if(!isCoreOnline(core)) continue;
3637         uint64 msr_pqr_assoc = 0 ;
3638         uint64 msr_qm_evtsel = 0;
3639         int32 rmid = 0;
3640         int32 event = 0;
3641 
3642         //Read 0xC8F MSR for each core
3643         MSR[core]->read(IA32_PQR_ASSOC, &msr_pqr_assoc);
3644         msr_pqr_assoc &= 0xffffffff00000000ULL;
3645 
3646         //Write 0xC8F MSR with RMID 0
3647         MSR[core]->write(IA32_PQR_ASSOC,msr_pqr_assoc);
3648 
3649         msr_qm_evtsel = rmid & ((1ULL<<10)-1ULL) ;
3650         msr_qm_evtsel <<= 32 ;
3651         msr_qm_evtsel |= event & ((1ULL<<8)-1ULL);
3652 
3653         //Write Event Id as 0 and RMID 0 to the MSR for each core
3654         MSR[core]->write(IA32_QM_EVTSEL,msr_qm_evtsel);
3655 
3656     }
3657 
3658 
3659     if (!silent) std::cerr << " Freeing up all RMIDs\n";
3660 }
3661 
setOutput(const std::string filename)3662 void PCM::setOutput(const std::string filename)
3663 {
3664      outfile = new std::ofstream(filename.c_str());
3665      backup_ofile = std::cout.rdbuf();
3666      std::cout.rdbuf(outfile->rdbuf());
3667 }
3668 
restoreOutput()3669 void PCM::restoreOutput()
3670 {
3671     // restore cout back to what it was originally
3672     if(backup_ofile)
3673         std::cout.rdbuf(backup_ofile);
3674 
3675 // close output file
3676     if(outfile)
3677         outfile->close();
3678 }
3679 
cleanup(const bool silent)3680 void PCM::cleanup(const bool silent)
3681 {
3682     InstanceLock lock(allow_multiple_instances);
3683 
3684     if (MSR.empty()) return;
3685 
3686     if (!silent) std::cerr << "Cleaning up\n";
3687 
3688     if (decrementInstanceSemaphore())
3689         cleanupPMU(silent);
3690 
3691     disableForceRTMAbortMode(silent);
3692 
3693     cleanupUncorePMUs(silent);
3694     cleanupRDT(silent);
3695 #ifdef __linux__
3696     if (needToRestoreNMIWatchdog)
3697     {
3698         enableNMIWatchdog(silent);
3699         needToRestoreNMIWatchdog = false;
3700     }
3701 #endif
3702 }
3703 
3704 // hle is only available when cpuid has this:
3705 // HLE: CPUID.07H.EBX.HLE [bit 4]  = 1
supportsHLE() const3706 bool PCM::supportsHLE() const
3707 {
3708     PCM_CPUID_INFO info;
3709     pcm_cpuid(7, 0, info); // leaf 7, subleaf 0
3710 
3711    return (info.reg.ebx & (0x1 << 4)) ? true : false;
3712 }
3713 
3714 // rtm is only available when cpuid has this:
3715 // RTM: CPUID.07H.EBX.RTM [bit 11] = 1
supportsRTM() const3716 bool PCM::supportsRTM() const
3717 {
3718     PCM_CPUID_INFO info;
3719     pcm_cpuid(7, 0, info); // leaf 7, subleaf 0
3720 
3721     return (info.reg.ebx & (0x1 << 11)) ? true : false;
3722 }
3723 
3724 #ifdef __APPLE__
3725 
getNumInstances()3726 uint32 PCM::getNumInstances()
3727 {
3728     return MSR[0]->getNumInstances();
3729 }
3730 
3731 
incrementNumInstances()3732 uint32 PCM::incrementNumInstances()
3733 {
3734     return MSR[0]->incrementNumInstances();
3735 }
3736 
decrementNumInstances()3737 uint32 PCM::decrementNumInstances()
3738 {
3739     return MSR[0]->decrementNumInstances();;
3740 }
3741 
convertUnknownToInt(size_t size,char * value)3742 int convertUnknownToInt(size_t size, char* value)
3743 {
3744     if(sizeof(int) == size)
3745     {
3746         return *(int*)value;
3747     }
3748     else if(sizeof(long) == size)
3749     {
3750         return *(long *)value;
3751     }
3752     else if(sizeof(long long) == size)
3753     {
3754         return *(long long *)value;
3755     }
3756     else
3757     {
3758         // In this case, we don't know what it is so we guess int
3759         return *(int *)value;
3760     }
3761 }
3762 
3763 #endif
3764 
decrementInstanceSemaphore()3765 bool PCM::decrementInstanceSemaphore()
3766 {
3767     if(allow_multiple_instances == false)
3768     {
3769         return programmed_pmu;
3770     }
3771     bool isLastInstance = false;
3772     // when decrement was called before program() the numInstancesSemaphore
3773     // may not be initialized, causing SIGSEGV. This fixes it.
3774     if(numInstancesSemaphore == NULL)
3775         return true;
3776 
3777                 #ifdef _MSC_VER
3778     WaitForSingleObject(numInstancesSemaphore, 0);
3779 
3780     DWORD res = WaitForSingleObject(numInstancesSemaphore, 0);
3781     if (res == WAIT_TIMEOUT)
3782     {
3783         // I have the last instance of monitor
3784 
3785         isLastInstance = true;
3786 
3787         CloseHandle(numInstancesSemaphore);
3788     }
3789     else if (res == WAIT_OBJECT_0)
3790     {
3791         ReleaseSemaphore(numInstancesSemaphore, 1, NULL);
3792 
3793         // std::cerr << "Someone else is running monitor instance, no cleanup needed\n";
3794     }
3795     else
3796     {
3797         // unknown error
3798         std::cerr << "ERROR: Bad semaphore. Performed cleanup twice?\n";
3799     }
3800 
3801         #elif __APPLE__
3802     sem_wait(numInstancesSemaphore);
3803     uint32 oldValue = PCM::getNumInstances();
3804     sem_post(numInstancesSemaphore);
3805     if(oldValue == 0)
3806     {
3807     // see same case for linux
3808     return false;
3809     }
3810     sem_wait(numInstancesSemaphore);
3811     uint32 currValue = PCM::decrementNumInstances();
3812     sem_post(numInstancesSemaphore);
3813     if(currValue == 0){
3814     isLastInstance = true;
3815     }
3816 
3817     #else // if linux
3818     int oldValue = -1;
3819     sem_getvalue(numInstancesSemaphore, &oldValue);
3820     if(oldValue == 0)
3821     {
3822        // the current value is already zero, somewhere the semaphore has been already decremented (and thus the clean up has been done if needed)
3823        // that means logically we are do not own the last instance anymore, thus returning false
3824        return false;
3825     }
3826     sem_wait(numInstancesSemaphore);
3827     int curValue = -1;
3828     sem_getvalue(numInstancesSemaphore, &curValue);
3829     if (curValue == 0)
3830     {
3831         // I have the last instance of monitor
3832 
3833         isLastInstance = true;
3834 
3835         // std::cerr << "I am the last one\n";
3836     }
3837         #endif // end ifdef _MSC_VER
3838 
3839     return isLastInstance;
3840 }
3841 
getTickCount(uint64 multiplier,uint32 core)3842 uint64 PCM::getTickCount(uint64 multiplier, uint32 core)
3843 {
3844     return (multiplier * getInvariantTSC(CoreCounterState(), getCoreCounterState(core))) / getNominalFrequency();
3845 }
3846 
getTickCountRDTSCP(uint64 multiplier)3847 uint64 PCM::getTickCountRDTSCP(uint64 multiplier)
3848 {
3849     return (multiplier*RDTSCP())/getNominalFrequency();
3850 }
3851 
getSystemCounterState()3852 SystemCounterState getSystemCounterState()
3853 {
3854     PCM * inst = PCM::getInstance();
3855     SystemCounterState result;
3856     if (inst) result = inst->getSystemCounterState();
3857     return result;
3858 }
3859 
getSocketCounterState(uint32 socket)3860 SocketCounterState getSocketCounterState(uint32 socket)
3861 {
3862     PCM * inst = PCM::getInstance();
3863     SocketCounterState result;
3864     if (inst) result = inst->getSocketCounterState(socket);
3865     return result;
3866 }
3867 
getCoreCounterState(uint32 core)3868 CoreCounterState getCoreCounterState(uint32 core)
3869 {
3870     PCM * inst = PCM::getInstance();
3871     CoreCounterState result;
3872     if (inst) result = inst->getCoreCounterState(core);
3873     return result;
3874 }
3875 
3876 #ifdef PCM_USE_PERF
readPerfData(uint32 core,std::vector<uint64> & outData)3877 void PCM::readPerfData(uint32 core, std::vector<uint64> & outData)
3878 {
3879     auto readPerfDataHelper = [this](const uint32 core, std::vector<uint64>& outData, const uint32 leader, const uint32 num_counters)
3880     {
3881         if (perfEventHandle[core][leader] < 0)
3882         {
3883             std::fill(outData.begin(), outData.end(), 0);
3884             return;
3885         }
3886         uint64 data[1 + PERF_MAX_COUNTERS];
3887         const int32 bytes2read = sizeof(uint64) * (1 + num_counters);
3888         int result = ::read(perfEventHandle[core][leader], data, bytes2read);
3889         // data layout: nr counters; counter 0, counter 1, counter 2,...
3890         if (result != bytes2read)
3891         {
3892             std::cerr << "Error while reading perf data. Result is " << result << "\n";
3893             std::cerr << "Check if you run other competing Linux perf clients.\n";
3894         }
3895         else if (data[0] != num_counters)
3896         {
3897             std::cerr << "Number of counters read from perf is wrong. Elements read: " << data[0] << "\n";
3898         }
3899         else
3900         {  // copy all counters, they start from position 1 in data
3901             std::copy((data + 1), (data + 1) + data[0], outData.begin());
3902         }
3903     };
3904     readPerfDataHelper(core, outData, PERF_GROUP_LEADER_COUNTER, core_fixed_counter_num_used + core_gen_counter_num_used);
3905     if (isHWTMAL1Supported() && perfSupportsTopDown())
3906     {
3907         std::vector<uint64> outTopDownData(outData.size(), 0);
3908         readPerfDataHelper(core, outTopDownData, PERF_TOPDOWN_GROUP_LEADER_COUNTER, PERF_TOPDOWN_COUNTERS);
3909         std::copy(outTopDownData.begin(), outTopDownData.begin() + PERF_TOPDOWN_COUNTERS, outData.begin() + core_fixed_counter_num_used + core_gen_counter_num_used);
3910     }
3911 }
3912 #endif
3913 
readAndAggregateTSC(std::shared_ptr<SafeMsrHandle> msr)3914 void BasicCounterState::readAndAggregateTSC(std::shared_ptr<SafeMsrHandle> msr)
3915 {
3916     uint64 cInvariantTSC = 0;
3917     PCM * m = PCM::getInstance();
3918     const auto cpu_model = m->getCPUModel();
3919     if(m->isAtom() == false || cpu_model == PCM::AVOTON) msr->read(IA32_TIME_STAMP_COUNTER, &cInvariantTSC);
3920     else
3921     {
3922 #ifdef _MSC_VER
3923         cInvariantTSC = ((static_cast<uint64>(GetTickCount()/1000ULL)))*m->getNominalFrequency();
3924 #else
3925         struct timeval tp;
3926         gettimeofday(&tp, NULL);
3927         cInvariantTSC = (double(tp.tv_sec) + tp.tv_usec / 1000000.)*m->getNominalFrequency();
3928 #endif
3929     }
3930     InvariantTSC += cInvariantTSC;
3931 }
3932 
readAndAggregate(std::shared_ptr<SafeMsrHandle> msr)3933 void BasicCounterState::readAndAggregate(std::shared_ptr<SafeMsrHandle> msr)
3934 {
3935     uint64 cInstRetiredAny = 0, cCpuClkUnhaltedThread = 0, cCpuClkUnhaltedRef = 0;
3936     uint64 cL3Occupancy = 0;
3937     uint64 cCustomEvents[PERF_MAX_CUSTOM_COUNTERS] = {0ULL, 0ULL, 0ULL, 0ULL, 0ULL, 0ULL, 0ULL, 0ULL };
3938     uint64 cCStateResidency[PCM::MAX_C_STATE + 1];
3939     memset(cCStateResidency, 0, sizeof(cCStateResidency));
3940     uint64 thermStatus = 0;
3941     uint64 cSMICount = 0;
3942     uint64 cFrontendBoundSlots = 0;
3943     uint64 cBadSpeculationSlots = 0;
3944     uint64 cBackendBoundSlots = 0;
3945     uint64 cRetiringSlots = 0;
3946     uint64 cAllSlotsRaw = 0;
3947     const int32 core_id = msr->getCoreId();
3948     TemporalThreadAffinity tempThreadAffinity(core_id); // speedup trick for Linux
3949 
3950     PCM * m = PCM::getInstance();
3951     const int32 core_gen_counter_num_max = m->getMaxCustomCoreEvents();
3952     uint64 overflows = 0;
3953 
3954     const auto corruptedCountersMask = m->checkCustomCoreProgramming(msr);
3955     // reading core PMU counters
3956 #ifdef PCM_USE_PERF
3957     if(m->canUsePerf)
3958     {
3959         std::vector<uint64> perfData(PERF_MAX_COUNTERS, 0ULL);
3960         m->readPerfData(msr->getCoreId(), perfData);
3961         cInstRetiredAny =       perfData[PCM::PERF_INST_RETIRED_POS];
3962         cCpuClkUnhaltedThread = perfData[PCM::PERF_CPU_CLK_UNHALTED_THREAD_POS];
3963         cCpuClkUnhaltedRef =    perfData[PCM::PERF_CPU_CLK_UNHALTED_REF_POS];
3964         for (int i = 0; i < core_gen_counter_num_max; ++i)
3965         {
3966             cCustomEvents[i] = perfData[PCM::PERF_GEN_EVENT_0_POS + i];
3967         }
3968         if (m->isHWTMAL1Supported() && perfSupportsTopDown())
3969         {
3970             cFrontendBoundSlots =   perfData[m->perfTopDownPos[PCM::PERF_TOPDOWN_FRONTEND_POS]];
3971             cBadSpeculationSlots =  perfData[m->perfTopDownPos[PCM::PERF_TOPDOWN_BADSPEC_POS]];
3972             cBackendBoundSlots =    perfData[m->perfTopDownPos[PCM::PERF_TOPDOWN_BACKEND_POS]];
3973             cRetiringSlots =        perfData[m->perfTopDownPos[PCM::PERF_TOPDOWN_RETIRING_POS]];
3974             cAllSlotsRaw =          perfData[m->perfTopDownPos[PCM::PERF_TOPDOWN_SLOTS_POS]];
3975 //          if (core_id == 0) std::cout << "DEBUG: "<< cAllSlotsRaw << " " << cFrontendBoundSlots << " " << cBadSpeculationSlots << " " << cBackendBoundSlots << " " << cRetiringSlots << std::endl;
3976         }
3977     }
3978     else
3979 #endif
3980     {
3981         uint64 overflows_after = 0;
3982 
3983         do
3984         {
3985             msr->read(IA32_PERF_GLOBAL_STATUS, &overflows); // read overflows
3986             // std::cerr << "Debug " << core_id << " IA32_PERF_GLOBAL_STATUS: " << overflows << std::endl;
3987 
3988             msr->read(INST_RETIRED_ADDR, &cInstRetiredAny);
3989             msr->read(CPU_CLK_UNHALTED_THREAD_ADDR, &cCpuClkUnhaltedThread);
3990             msr->read(CPU_CLK_UNHALTED_REF_ADDR, &cCpuClkUnhaltedRef);
3991             for (int i = 0; i < core_gen_counter_num_max; ++i)
3992             {
3993                 msr->read(IA32_PMC0 + i, &cCustomEvents[i]);
3994             }
3995 
3996             msr->read(IA32_PERF_GLOBAL_STATUS, &overflows_after); // read overflows again
3997             // std::cerr << "Debug " << core_id << " IA32_PERF_GLOBAL_STATUS: " << overflows << std::endl;
3998 
3999         } while (overflows != overflows_after); // repeat the reading if an overflow happened during the reading
4000 
4001         msr->write(IA32_PERF_GLOBAL_OVF_CTRL, overflows); // clear overflows
4002         if (m->isHWTMAL1Supported())
4003         {
4004             uint64 perfMetrics = 0, slots = 0;
4005             msr->lock();
4006             msr->read(PERF_METRICS_ADDR, &perfMetrics);
4007             msr->read(TOPDOWN_SLOTS_ADDR, &slots);
4008             msr->write(PERF_METRICS_ADDR, 0);
4009             msr->write(TOPDOWN_SLOTS_ADDR, 0);
4010             cFrontendBoundSlots = extract_bits(perfMetrics, 16, 23);
4011             cBadSpeculationSlots = extract_bits(perfMetrics, 8, 15);
4012             cBackendBoundSlots = extract_bits(perfMetrics, 24, 31);
4013             cRetiringSlots = extract_bits(perfMetrics, 0, 7);
4014             const double total = double(cFrontendBoundSlots + cBadSpeculationSlots + cBackendBoundSlots + cRetiringSlots);
4015             cFrontendBoundSlots = m->FrontendBoundSlots[core_id] += uint64((double(cFrontendBoundSlots) / total) * double(slots));
4016             cBadSpeculationSlots = m->BadSpeculationSlots[core_id] += uint64((double(cBadSpeculationSlots) / total) * double(slots));
4017             cBackendBoundSlots = m->BackendBoundSlots[core_id] += uint64((double(cBackendBoundSlots) / total) * double(slots));
4018             cRetiringSlots = m->RetiringSlots[core_id] += uint64((double(cRetiringSlots) / total) * double(slots));
4019             cAllSlotsRaw = m->AllSlotsRaw[core_id] += slots;
4020             // std::cout << "DEBUG: "<< slots << " " << cFrontendBoundSlots << " " << cBadSpeculationSlots << " " << cBackendBoundSlots << " " << cRetiringSlots << std::endl;
4021             msr->unlock();
4022         }
4023     }
4024 
4025     for (int i = 0; i < core_gen_counter_num_max; ++i)
4026     {
4027         if (corruptedCountersMask & (1<<i)) cCustomEvents[i] = ~0ULL;
4028     }
4029 
4030     // std::cout << "DEBUG1: " << msr->getCoreId() << " " << cInstRetiredAny << " \n";
4031     if (m->L3CacheOccupancyMetricAvailable() && m->useResctrl == false)
4032     {
4033         msr->lock();
4034         uint64 event = 1;
4035         m->initQOSevent(event, core_id);
4036         msr->read(IA32_QM_CTR, &cL3Occupancy);
4037         //std::cout << "readAndAggregate reading IA32_QM_CTR " << std::dec << cL3Occupancy << std::dec << "\n";
4038         msr->unlock();
4039     }
4040 
4041     m->readAndAggregateMemoryBWCounters(static_cast<uint32>(core_id), *this);
4042 
4043     readAndAggregateTSC(msr);
4044 
4045     // reading core C state counters
4046     for(int i=0; i <= (int)(PCM::MAX_C_STATE) ;++i)
4047         if(m->coreCStateMsr && m->coreCStateMsr[i])
4048                 msr->read(m->coreCStateMsr[i], &(cCStateResidency[i]));
4049 
4050     // reading temperature
4051     msr->read(MSR_IA32_THERM_STATUS, &thermStatus);
4052 
4053     msr->read(MSR_SMI_COUNT, &cSMICount);
4054 
4055     InstRetiredAny += checked_uint64(m->extractCoreFixedCounterValue(cInstRetiredAny), extract_bits(overflows, 32, 32));
4056     CpuClkUnhaltedThread += checked_uint64(m->extractCoreFixedCounterValue(cCpuClkUnhaltedThread), extract_bits(overflows, 33, 33));
4057     CpuClkUnhaltedRef += checked_uint64(m->extractCoreFixedCounterValue(cCpuClkUnhaltedRef), extract_bits(overflows, 34, 34));
4058     for (int i = 0; i < core_gen_counter_num_max; ++i)
4059     {
4060         Event[i] += checked_uint64(m->extractCoreGenCounterValue(cCustomEvents[i]), extract_bits(overflows, i, i));
4061     }
4062 #ifdef __linux__
4063     if (m->useResctrl)
4064     {
4065         L3Occupancy = m->resctrl.getL3OCC(core_id) / 1024;
4066     }
4067     else
4068 #endif
4069     {
4070         //std::cout << "Scaling Factor " << m->L3ScalingFactor;
4071         cL3Occupancy = m->extractQOSMonitoring(cL3Occupancy);
4072         L3Occupancy = (cL3Occupancy==PCM_INVALID_QOS_MONITORING_DATA)? PCM_INVALID_QOS_MONITORING_DATA : (uint64)((double)(cL3Occupancy * m->L3ScalingFactor) / 1024.0);
4073     }
4074     for(int i=0; i <= int(PCM::MAX_C_STATE);++i)
4075         CStateResidency[i] += cCStateResidency[i];
4076     ThermalHeadroom = extractThermalHeadroom(thermStatus);
4077     SMICount += cSMICount;
4078     FrontendBoundSlots  += cFrontendBoundSlots;
4079     BadSpeculationSlots += cBadSpeculationSlots;
4080     BackendBoundSlots   += cBackendBoundSlots;
4081     RetiringSlots       += cRetiringSlots;
4082     AllSlotsRaw         += cAllSlotsRaw;
4083 }
4084 
programServerUncoreLatencyMetrics(bool enable_pmm)4085 PCM::ErrorCode PCM::programServerUncoreLatencyMetrics(bool enable_pmm)
4086 {
4087     uint32 DDRConfig[4] = {0,0,0,0};
4088 
4089     if (enable_pmm == false)
4090     {   //DDR is false
4091         if (ICX == cpu_model)
4092 	{
4093             DDRConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + MC_CH_PCI_PMON_CTL_UMASK(1);  // DRAM RPQ occupancy
4094             DDRConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + MC_CH_PCI_PMON_CTL_UMASK(1);  // DRAM RPQ Insert
4095             DDRConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(0x81) + MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM WPQ Occupancy
4096             DDRConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) + MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM WPQ Insert
4097 
4098 	} else {
4099 
4100             DDRConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM RPQ occupancy
4101             DDRConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM RPQ Insert
4102             DDRConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(0x81) + MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM WPQ Occupancy
4103             DDRConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) + MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM WPQ Insert
4104 	}
4105     } else {
4106         DDRConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0xe0) + MC_CH_PCI_PMON_CTL_UMASK(1);  // PMM RDQ occupancy
4107         DDRConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0xe3) + MC_CH_PCI_PMON_CTL_UMASK(0);  // PMM RDQ Insert
4108         DDRConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(0xe4) + MC_CH_PCI_PMON_CTL_UMASK(1);  // PMM WPQ Occupancy
4109         DDRConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(0xe7) + MC_CH_PCI_PMON_CTL_UMASK(0);  // PMM WPQ Insert
4110     }
4111 
4112     if (DDRLatencyMetricsAvailable())
4113     {
4114         for (size_t i = 0; i < (size_t)server_pcicfg_uncore.size(); ++i)
4115         {
4116             server_pcicfg_uncore[i]->programIMC(DDRConfig);
4117         }
4118     }
4119     return PCM::Success;
4120 }
4121 
programServerUncoreMemoryMetrics(const ServerUncoreMemoryMetrics & metrics,int rankA,int rankB)4122 PCM::ErrorCode PCM::programServerUncoreMemoryMetrics(const ServerUncoreMemoryMetrics & metrics, int rankA, int rankB)
4123 {
4124     if(MSR.empty() || server_pcicfg_uncore.empty())  return PCM::MSRAccessDenied;
4125 
4126     for (int i = 0; (i < (int)server_pcicfg_uncore.size()) && MSR.size(); ++i)
4127     {
4128         server_pcicfg_uncore[i]->programServerUncoreMemoryMetrics(metrics, rankA, rankB);
4129     }
4130 
4131     return PCM::Success;
4132 }
4133 
programServerUncorePowerMetrics(int mc_profile,int pcu_profile,int * freq_bands)4134 PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_profile, int * freq_bands)
4135 {
4136     if(MSR.empty() || server_pcicfg_uncore.empty())  return PCM::MSRAccessDenied;
4137 
4138     uint32 PCUCntConf[4] = {0,0,0,0};
4139 
4140     PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0); // clock ticks
4141 
4142     switch(pcu_profile)
4143     {
4144     case 0:
4145          PCUCntConf[1] =  PCU_MSR_PMON_CTL_EVENT(0xB); // FREQ_BAND0_CYCLES
4146          PCUCntConf[2] =  PCU_MSR_PMON_CTL_EVENT(0xC); // FREQ_BAND1_CYCLES
4147          PCUCntConf[3] =  PCU_MSR_PMON_CTL_EVENT(0xD); // FREQ_BAND2_CYCLES
4148          break;
4149     case 1:
4150          PCUCntConf[1] =  PCU_MSR_PMON_CTL_EVENT(0x80) + PCU_MSR_PMON_CTL_OCC_SEL(1); // POWER_STATE_OCCUPANCY.C0 using CLOCKTICKS + 8th-bit
4151          PCUCntConf[2] =  PCU_MSR_PMON_CTL_EVENT(0x80) + PCU_MSR_PMON_CTL_OCC_SEL(2); // POWER_STATE_OCCUPANCY.C3 using CLOCKTICKS + 8th-bit
4152          PCUCntConf[3] =  PCU_MSR_PMON_CTL_EVENT(0x80) + PCU_MSR_PMON_CTL_OCC_SEL(3); // POWER_STATE_OCCUPANCY.C6 using CLOCKTICKS + 8th-bit
4153          break;
4154     case 2:
4155          PCUCntConf[1] =  PCU_MSR_PMON_CTL_EVENT(0x09); // PROCHOT_INTERNAL_CYCLES
4156          PCUCntConf[2] =  PCU_MSR_PMON_CTL_EVENT(0x0A); // PROCHOT_EXTERNAL_CYCLES
4157          PCUCntConf[3] =  PCU_MSR_PMON_CTL_EVENT(0x04); // Thermal frequency limit cycles: FREQ_MAX_LIMIT_THERMAL_CYCLES
4158          break;
4159     case 3:
4160          PCUCntConf[1] =  PCU_MSR_PMON_CTL_EVENT(0x04); // Thermal frequency limit cycles: FREQ_MAX_LIMIT_THERMAL_CYCLES
4161          PCUCntConf[2] =  PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES
4162          PCUCntConf[3] =  PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX and ICX and SNOWRIDGE)
4163          break;
4164     case 4: // not supported on SKX and ICX and SNOWRIDGE
4165          PCUCntConf[1] =  PCU_MSR_PMON_CTL_EVENT(0x06); // OS frequency limit cycles: FREQ_MAX_OS_CYCLES
4166          PCUCntConf[2] =  PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES
4167          PCUCntConf[3] =  PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX and ICX and SNOWRIDGE)
4168          break;
4169     case 5:
4170          if(JAKETOWN == cpu_model)
4171          {
4172              PCUCntConf[1] =  PCU_MSR_PMON_CTL_EVENT(0) + PCU_MSR_PMON_CTL_EXTRA_SEL + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions
4173              PCUCntConf[2] =  PCU_MSR_PMON_CTL_EVENT(0) + PCU_MSR_PMON_CTL_EXTRA_SEL ; // cycles spent changing frequency
4174          } else if (IVYTOWN == cpu_model )
4175          {
4176              PCUCntConf[1] =  PCU_MSR_PMON_CTL_EVENT(0x60) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions
4177              PCUCntConf[2] =  PCU_MSR_PMON_CTL_EVENT(0x60) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES
4178          } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model || ICX == cpu_model || SNOWRIDGE == cpu_model)
4179          {
4180              PCUCntConf[1] =  PCU_MSR_PMON_CTL_EVENT(0x74) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions
4181              PCUCntConf[2] =  PCU_MSR_PMON_CTL_EVENT(0x74) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES
4182              if(HASWELLX == cpu_model)
4183              {
4184                  PCUCntConf[3] =  PCU_MSR_PMON_CTL_EVENT(0x79) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of UFS transitions
4185                  PCUCntConf[0] =  PCU_MSR_PMON_CTL_EVENT(0x79)                             ; // UFS transition cycles
4186              }
4187          } else
4188          {
4189              std::cerr << "ERROR: no frequency transition events defined for CPU model " << cpu_model << "\n";
4190          }
4191          break;
4192     case 6:
4193          if (IVYTOWN == cpu_model )
4194          {
4195              PCUCntConf[2] =  PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions
4196              PCUCntConf[3] =  PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions
4197          } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model || ICX == cpu_model || SNOWRIDGE == cpu_model)
4198          {
4199              PCUCntConf[0] =  PCU_MSR_PMON_CTL_EVENT(0x4E)                             ; // PC1e residenicies (not supported on SKX and ICX and SNOWRIDGE)
4200              PCUCntConf[1] =  PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX and ICX and SNOWRIDGE)
4201              PCUCntConf[2] =  PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions
4202              PCUCntConf[3] =  PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions
4203          } else
4204          {
4205              std::cerr << "ERROR: no package C-state transition events defined for CPU model " << cpu_model << "\n";
4206          }
4207          break;
4208      case 7:
4209          if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model)
4210          {
4211              PCUCntConf[0] =  PCU_MSR_PMON_CTL_EVENT(0x7E) ; // UFS_TRANSITIONS_PERF_P_LIMIT
4212              PCUCntConf[1] =  PCU_MSR_PMON_CTL_EVENT(0x7D) ; // UFS_TRANSITIONS_IO_P_LIMIT
4213              PCUCntConf[2] =  PCU_MSR_PMON_CTL_EVENT(0x7A) ; // UFS_TRANSITIONS_UP_RING_TRAFFIC
4214              PCUCntConf[3] =  PCU_MSR_PMON_CTL_EVENT(0x7B) ; // UFS_TRANSITIONS_UP_STALL_CYCLES
4215          } else
4216          {
4217              std::cerr << "ERROR: no UFS transition events defined for CPU model " << cpu_model << "\n";
4218          }
4219          break;
4220     case 8:
4221          if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model)
4222          {
4223              PCUCntConf[0] =  PCU_MSR_PMON_CTL_EVENT(0x7C) ; // UFS_TRANSITIONS_DOWN
4224          } else
4225          {
4226              std::cerr << "ERROR: no UFS transition events defined for CPU model " << cpu_model << "\n";
4227          }
4228          break;
4229     default:
4230          std::cerr << "ERROR: unsupported PCU profile " << pcu_profile << "\n";
4231     }
4232 
4233     for (auto u : server_pcicfg_uncore)
4234     {
4235         u->program_power_metrics(mc_profile);
4236     }
4237     uint64 filter = 0;
4238     if (freq_bands == NULL)
4239     {
4240         filter =
4241             PCU_MSR_PMON_BOX_FILTER_BAND_0(10) + // 1000 MHz
4242             PCU_MSR_PMON_BOX_FILTER_BAND_1(20) + // 2000 MHz
4243             PCU_MSR_PMON_BOX_FILTER_BAND_2(30);  // 3000 MHz
4244     }
4245     else
4246     {
4247         filter =
4248             PCU_MSR_PMON_BOX_FILTER_BAND_0(freq_bands[0]) +
4249             PCU_MSR_PMON_BOX_FILTER_BAND_1(freq_bands[1]) +
4250             PCU_MSR_PMON_BOX_FILTER_BAND_2(freq_bands[2]);
4251     }
4252     programPCU(PCUCntConf, filter);
4253 
4254     return PCM::Success;
4255 }
4256 
programPCU(uint32 * PCUCntConf,const uint64 filter)4257 void PCM::programPCU(uint32* PCUCntConf, const uint64 filter)
4258 {
4259     for (int i = 0; (i < (int)server_pcicfg_uncore.size()) && MSR.size(); ++i)
4260     {
4261         if (i >= (int)pcuPMUs.size())
4262         {
4263             continue;
4264         }
4265 
4266         uint32 refCore = socketRefCore[i];
4267         TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
4268 
4269         pcuPMUs[i].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
4270 
4271         if (pcuPMUs[i].filter[0].get())
4272         {
4273             *pcuPMUs[i].filter[0] = filter;
4274         }
4275 
4276         program(pcuPMUs[i], &PCUCntConf[0], &PCUCntConf[4], UNC_PMON_UNIT_CTL_FRZ_EN);
4277     }
4278 }
4279 
program(const RawPMUConfigs & curPMUConfigs_,const bool silent)4280 PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool silent)
4281 {
4282     if (MSR.empty())  return PCM::MSRAccessDenied;
4283     RawPMUConfigs curPMUConfigs = curPMUConfigs_;
4284     constexpr auto globalRegPos = 0;
4285     if (curPMUConfigs.count("core"))
4286     {
4287         // need to program core PMU first
4288         EventSelectRegister regs[PERF_MAX_CUSTOM_COUNTERS];
4289         PCM::ExtendedCustomCoreEventDescription conf;
4290         conf.OffcoreResponseMsrValue[0] = 0;
4291         conf.OffcoreResponseMsrValue[1] = 0;
4292         FixedEventControlRegister fixedReg;
4293 
4294         auto corePMUConfig = curPMUConfigs["core"];
4295         if (corePMUConfig.programmable.size() > (size_t)getMaxCustomCoreEvents())
4296         {
4297             std::cerr << "ERROR: trying to program " << corePMUConfig.programmable.size() << " core PMU counters, which exceeds the max num possible ("<< getMaxCustomCoreEvents() << ").";
4298             return PCM::UnknownError;
4299         }
4300         size_t c = 0;
4301         for (; c < corePMUConfig.programmable.size() && c < (size_t)getMaxCustomCoreEvents() && c < PERF_MAX_CUSTOM_COUNTERS; ++c)
4302         {
4303             regs[c].value = corePMUConfig.programmable[c].first[0];
4304         }
4305         if (globalRegPos < corePMUConfig.programmable.size())
4306         {
4307             conf.OffcoreResponseMsrValue[0] = corePMUConfig.programmable[globalRegPos].first[1];
4308             conf.OffcoreResponseMsrValue[1] = corePMUConfig.programmable[globalRegPos].first[2];
4309         }
4310         conf.nGPCounters = (uint32)c;
4311         conf.gpCounterCfg = regs;
4312         if (corePMUConfig.fixed.empty())
4313         {
4314             conf.fixedCfg = NULL; // default
4315         }
4316         else
4317         {
4318             fixedReg.value = 0;
4319             for (auto cfg : corePMUConfig.fixed)
4320             {
4321                 fixedReg.value |= cfg.first[0];
4322             }
4323             conf.fixedCfg = &fixedReg;
4324         }
4325 
4326         const auto status = program(PCM::EXT_CUSTOM_CORE_EVENTS, &conf, silent);
4327         if (status != PCM::Success)
4328         {
4329             return status;
4330         }
4331         curPMUConfigs.erase("core");
4332     }
4333     for (auto pmuConfig : curPMUConfigs)
4334     {
4335         const auto & type = pmuConfig.first;
4336         const auto & events = pmuConfig.second;
4337         if (events.programmable.empty() && events.fixed.empty())
4338         {
4339             continue;
4340         }
4341         if (events.programmable.size() > ServerUncoreCounterState::maxCounters)
4342         {
4343             std::cerr << "ERROR: trying to program " << events.programmable.size() << " core PMU counters, which exceeds the max num possible (" << ServerUncoreCounterState::maxCounters << ").";
4344             return PCM::UnknownError;
4345         }
4346         uint32 events32[ServerUncoreCounterState::maxCounters] = { 0,0,0,0 };
4347         uint64 events64[ServerUncoreCounterState::maxCounters] = { 0,0,0,0 };
4348         for (size_t c = 0; c < events.programmable.size() && c < ServerUncoreCounterState::maxCounters; ++c)
4349         {
4350             events32[c] = (uint32)events.programmable[c].first[0];
4351             events64[c] = events.programmable[c].first[0];
4352         }
4353         if (type == "m3upi")
4354         {
4355             for (auto uncore : server_pcicfg_uncore)
4356             {
4357                 uncore->programM3UPI(events32);
4358             }
4359         }
4360         else if (type == "xpi" || type == "upi" || type == "qpi")
4361         {
4362             for (auto uncore : server_pcicfg_uncore)
4363             {
4364                 uncore->programXPI(events32);
4365             }
4366         }
4367         else if (type == "imc")
4368         {
4369             for (auto uncore : server_pcicfg_uncore)
4370             {
4371                 uncore->programIMC(events32);
4372             }
4373         }
4374         else if (type == "m2m")
4375         {
4376             for (auto uncore : server_pcicfg_uncore)
4377             {
4378                 uncore->programM2M(events64);
4379             }
4380         }
4381         else if (type == "pcu")
4382         {
4383             uint64 filter = 0;
4384             if (globalRegPos < events.programmable.size())
4385             {
4386                 filter = events.programmable[globalRegPos].first[1];
4387             }
4388             programPCU(events32, filter);
4389         }
4390         else if (type == "ubox")
4391         {
4392             programUBOX(events64);
4393         }
4394         else if (type == "cbo" || type == "cha")
4395         {
4396             uint64 filter0 = 0, filter1 = 0;
4397             if (globalRegPos < events.programmable.size())
4398             {
4399                 filter0 = events.programmable[globalRegPos].first[1];
4400                 filter1 = events.programmable[globalRegPos].first[2];
4401             }
4402             programCboRaw(events64, filter0, filter1);
4403         }
4404         else if (type == "iio")
4405         {
4406             programIIOCounters(events64);
4407         }
4408         else
4409         {
4410             std::cerr << "ERROR: unrecognized PMU type \"" << type << "\"\n";
4411             return PCM::UnknownError;
4412         }
4413     }
4414     return PCM::Success;
4415 }
4416 
freezeServerUncoreCounters()4417 void PCM::freezeServerUncoreCounters()
4418 {
4419     for (int i = 0; (i < (int)server_pcicfg_uncore.size()) && MSR.size(); ++i)
4420     {
4421         server_pcicfg_uncore[i]->freezeCounters();
4422         pcuPMUs[i].freeze(UNC_PMON_UNIT_CTL_FRZ_EN);
4423 
4424         if (IIOEventsAvailable())
4425         {
4426             for (auto & pmu : iioPMUs[i])
4427             {
4428                 pmu.second.freeze(UNC_PMON_UNIT_CTL_RSV);
4429             }
4430         }
4431 
4432         const auto refCore = socketRefCore[i];
4433         TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
4434         for (auto & pmu : cboPMUs[i])
4435         {
4436             pmu.freeze(UNC_PMON_UNIT_CTL_FRZ_EN);
4437         }
4438     }
4439 }
unfreezeServerUncoreCounters()4440 void PCM::unfreezeServerUncoreCounters()
4441 {
4442     for (int i = 0; (i < (int)server_pcicfg_uncore.size()) && MSR.size(); ++i)
4443     {
4444         server_pcicfg_uncore[i]->unfreezeCounters();
4445         pcuPMUs[i].unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
4446 
4447         if (IIOEventsAvailable())
4448         {
4449             for (auto & pmu : iioPMUs[i])
4450             {
4451                 pmu.second.unfreeze(UNC_PMON_UNIT_CTL_RSV);
4452             }
4453         }
4454 
4455         const auto refCore = socketRefCore[i];
4456         TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
4457         for (auto & pmu : cboPMUs[i])
4458         {
4459             pmu.unfreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
4460         }
4461     }
4462 }
readAndAggregate(std::shared_ptr<SafeMsrHandle> msr)4463 void UncoreCounterState::readAndAggregate(std::shared_ptr<SafeMsrHandle> msr)
4464 {
4465     const auto coreID = msr->getCoreId();
4466     TemporalThreadAffinity tempThreadAffinity(coreID); // speedup trick for Linux
4467 
4468     auto pcm = PCM::getInstance();
4469     pcm->readAndAggregatePackageCStateResidencies(msr, *this);
4470 }
4471 
getSystemCounterState()4472 SystemCounterState PCM::getSystemCounterState()
4473 {
4474     SystemCounterState result;
4475     if (MSR.size())
4476     {
4477         // read core and uncore counter state
4478         for (int32 core = 0; core < num_cores; ++core)
4479             if ( isCoreOnline( core ) )
4480                 result.readAndAggregate(MSR[core]);
4481 
4482         for (uint32 s = 0; s < (uint32)num_sockets; s++)
4483         {
4484             if ( isSocketOnline( s ) ) {
4485                 readAndAggregateUncoreMCCounters(s, result);
4486                 readAndAggregateEnergyCounters(s, result);
4487             }
4488         }
4489 
4490         readQPICounters(result);
4491 
4492         result.ThermalHeadroom = static_cast<int32>(PCM_INVALID_THERMAL_HEADROOM); // not available for system
4493     }
4494     return result;
4495 }
4496 
4497 template <class CounterStateType>
readAndAggregateMemoryBWCounters(const uint32 core,CounterStateType & result)4498 void PCM::readAndAggregateMemoryBWCounters(const uint32 core, CounterStateType & result)
4499 {
4500 #ifdef __linux__
4501     if (useResctrl)
4502     {
4503         if (CoreLocalMemoryBWMetricAvailable())
4504         {
4505             result.MemoryBWLocal += resctrl.getMBL(core) / (1024*1024);
4506         }
4507         if (CoreRemoteMemoryBWMetricAvailable())
4508         {
4509             result.MemoryBWTotal += resctrl.getMBT(core) / (1024*1024);
4510         }
4511         return;
4512     }
4513 #endif
4514      uint64 cMemoryBWLocal = 0;
4515      uint64 cMemoryBWTotal = 0;
4516 
4517      if(core < memory_bw_local.size())
4518      {
4519          cMemoryBWLocal = memory_bw_local[core]->read();
4520          cMemoryBWLocal = extractQOSMonitoring(cMemoryBWLocal);
4521          //std::cout << "Read MemoryBWLocal " << cMemoryBWLocal << "\n";
4522          if(cMemoryBWLocal==PCM_INVALID_QOS_MONITORING_DATA)
4523              result.MemoryBWLocal = PCM_INVALID_QOS_MONITORING_DATA; // do not accumulate invalid reading
4524          else
4525              result.MemoryBWLocal += (uint64)((double)(cMemoryBWLocal * L3ScalingFactor) / (1024.0 * 1024.0));
4526      }
4527      if(core < memory_bw_total.size())
4528      {
4529          cMemoryBWTotal = memory_bw_total[core]->read();
4530          cMemoryBWTotal = extractQOSMonitoring(cMemoryBWTotal);
4531          //std::cout << "Read MemoryBWTotal " << cMemoryBWTotal << "\n";
4532          if(cMemoryBWTotal==PCM_INVALID_QOS_MONITORING_DATA)
4533              result.MemoryBWTotal = PCM_INVALID_QOS_MONITORING_DATA; // do not accumulate invalid reading
4534          else
4535              result.MemoryBWTotal  += (uint64)((double)(cMemoryBWTotal * L3ScalingFactor) / (1024.0 * 1024.0));
4536      }
4537      //std::cout << std::flush;
4538 }
4539 
4540 template <class CounterStateType>
readAndAggregateUncoreMCCounters(const uint32 socket,CounterStateType & result)4541 void PCM::readAndAggregateUncoreMCCounters(const uint32 socket, CounterStateType & result)
4542 {
4543     if (LLCReadMissLatencyMetricsAvailable())
4544     {
4545         result.TOROccupancyIAMiss += getCBOCounterState(socket, EventPosition::TOR_OCCUPANCY);
4546         result.TORInsertsIAMiss += getCBOCounterState(socket, EventPosition::TOR_INSERTS);
4547         result.UncClocks += getUncoreClocks(socket);
4548     }
4549 
4550     const bool ReadMCStatsFromServerBW = (socket < serverBW.size());
4551     if (ReadMCStatsFromServerBW)
4552     {
4553         result.UncMCNormalReads += serverBW[socket]->getImcReads();
4554         result.UncMCFullWrites += serverBW[socket]->getImcWrites();
4555         if (PMMTrafficMetricsAvailable())
4556         {
4557             result.UncPMMReads += serverBW[socket]->getPMMReads();
4558             result.UncPMMWrites += serverBW[socket]->getPMMWrites();
4559         }
4560     }
4561 
4562     if (hasPCICFGUncore())
4563     {
4564         if (server_pcicfg_uncore.size() && server_pcicfg_uncore[socket].get())
4565         {
4566             server_pcicfg_uncore[socket]->freezeCounters();
4567 	    if (ReadMCStatsFromServerBW == false)
4568             {
4569                 result.UncMCNormalReads += server_pcicfg_uncore[socket]->getImcReads();
4570                 result.UncMCFullWrites += server_pcicfg_uncore[socket]->getImcWrites();
4571             }
4572             if (localMemoryRequestRatioMetricAvailable())
4573             {
4574                 if (hasCHA())
4575                 {
4576                     result.UncHARequests += getCBOCounterState(socket, EventPosition::REQUESTS_ALL);
4577                     result.UncHALocalRequests += getCBOCounterState(socket, EventPosition::REQUESTS_LOCAL);
4578                 }
4579                 else
4580                 {
4581                     result.UncHARequests += server_pcicfg_uncore[socket]->getHARequests();
4582                     result.UncHALocalRequests += server_pcicfg_uncore[socket]->getHALocalRequests();
4583                 }
4584             }
4585             if (PMMTrafficMetricsAvailable() && (ReadMCStatsFromServerBW == false))
4586             {
4587                 result.UncPMMReads += server_pcicfg_uncore[socket]->getPMMReads();
4588                 result.UncPMMWrites += server_pcicfg_uncore[socket]->getPMMWrites();
4589             }
4590             if (MCDRAMmemoryTrafficMetricsAvailable())
4591             {
4592                 result.UncEDCNormalReads += server_pcicfg_uncore[socket]->getEdcReads();
4593                 result.UncEDCFullWrites += server_pcicfg_uncore[socket]->getEdcWrites();
4594             }
4595             server_pcicfg_uncore[socket]->unfreezeCounters();
4596         }
4597     }
4598     else if(clientBW.get() && socket == 0)
4599     {
4600         result.UncMCNormalReads += clientImcReads->read();
4601         result.UncMCFullWrites += clientImcWrites->read();
4602         result.UncMCIORequests += clientIoRequests->read();
4603     }
4604     else
4605     {
4606         std::shared_ptr<SafeMsrHandle> msr = MSR[socketRefCore[socket]];
4607         TemporalThreadAffinity tempThreadAffinity(socketRefCore[socket]); // speedup trick for Linux
4608         switch (cpu_model)
4609         {
4610             case PCM::WESTMERE_EP:
4611             case PCM::NEHALEM_EP:
4612             {
4613                 uint64 cUncMCFullWrites = 0;
4614                 uint64 cUncMCNormalReads = 0;
4615                 msr->read(MSR_UNCORE_PMC0, &cUncMCFullWrites);
4616                 msr->read(MSR_UNCORE_PMC1, &cUncMCNormalReads);
4617                 result.UncMCFullWrites += extractUncoreGenCounterValue(cUncMCFullWrites);
4618                 result.UncMCNormalReads += extractUncoreGenCounterValue(cUncMCNormalReads);
4619             }
4620             break;
4621             case PCM::NEHALEM_EX:
4622             case PCM::WESTMERE_EX:
4623             {
4624                 uint64 cUncMCNormalReads = 0;
4625                 msr->read(MB0_MSR_PMU_CNT_0, &cUncMCNormalReads);
4626                 result.UncMCNormalReads += extractUncoreGenCounterValue(cUncMCNormalReads);
4627                 msr->read(MB1_MSR_PMU_CNT_0, &cUncMCNormalReads);
4628                 result.UncMCNormalReads += extractUncoreGenCounterValue(cUncMCNormalReads);
4629 
4630                 uint64 cUncMCFullWrites = 0;                         // really good approximation of
4631                 msr->read(BB0_MSR_PERF_CNT_1, &cUncMCFullWrites);
4632                 result.UncMCFullWrites += extractUncoreGenCounterValue(cUncMCFullWrites);
4633                 msr->read(BB1_MSR_PERF_CNT_1, &cUncMCFullWrites);
4634                 result.UncMCFullWrites += extractUncoreGenCounterValue(cUncMCFullWrites);
4635             }
4636             break;
4637 
4638             default:;
4639         }
4640     }
4641 }
4642 
4643 template <class CounterStateType>
readAndAggregateEnergyCounters(const uint32 socket,CounterStateType & result)4644 void PCM::readAndAggregateEnergyCounters(const uint32 socket, CounterStateType & result)
4645 {
4646     if(socket < (uint32)energy_status.size())
4647         result.PackageEnergyStatus += energy_status[socket]->read();
4648 
4649     if (socket < (uint32)dram_energy_status.size())
4650         result.DRAMEnergyStatus += dram_energy_status[socket]->read();
4651 }
4652 
4653 template <class CounterStateType>
readAndAggregatePackageCStateResidencies(std::shared_ptr<SafeMsrHandle> msr,CounterStateType & result)4654 void PCM::readAndAggregatePackageCStateResidencies(std::shared_ptr<SafeMsrHandle> msr, CounterStateType & result)
4655 {
4656     // reading package C state counters
4657     uint64 cCStateResidency[PCM::MAX_C_STATE + 1];
4658     memset(cCStateResidency, 0, sizeof(cCStateResidency));
4659 
4660     for(int i=0; i <= int(PCM::MAX_C_STATE) ;++i)
4661         if(pkgCStateMsr && pkgCStateMsr[i])
4662                 msr->read(pkgCStateMsr[i], &(cCStateResidency[i]));
4663 
4664     for (int i = 0; i <= int(PCM::MAX_C_STATE); ++i)
4665     {
4666         atomic_fetch_add((std::atomic<uint64> *)(result.CStateResidency + i), cCStateResidency[i]);
4667     }
4668 }
4669 
readQPICounters(SystemCounterState & result)4670 void PCM::readQPICounters(SystemCounterState & result)
4671 {
4672         // read QPI counters
4673         std::vector<bool> SocketProcessed(num_sockets, false);
4674         if (cpu_model == PCM::NEHALEM_EX || cpu_model == PCM::WESTMERE_EX)
4675         {
4676             for (int32 core = 0; core < num_cores; ++core)
4677             {
4678                 if(isCoreOnline(core) == false) continue;
4679 
4680                 if(core == socketRefCore[0]) MSR[core]->read(W_MSR_PMON_FIXED_CTR, &(result.uncoreTSC));
4681 
4682                 uint32 s = topology[core].socket;
4683 
4684                 if (!SocketProcessed[s])
4685                 {
4686                     TemporalThreadAffinity tempThreadAffinity(core); // speedup trick for Linux
4687 
4688                     // incoming data responses from QPI link 0
4689                     MSR[core]->read(R_MSR_PMON_CTR1, &(result.incomingQPIPackets[s][0]));
4690                     // incoming data responses from QPI link 1 (yes, from CTR0)
4691                     MSR[core]->read(R_MSR_PMON_CTR0, &(result.incomingQPIPackets[s][1]));
4692                     // incoming data responses from QPI link 2
4693                     MSR[core]->read(R_MSR_PMON_CTR8, &(result.incomingQPIPackets[s][2]));
4694                     // incoming data responses from QPI link 3
4695                     MSR[core]->read(R_MSR_PMON_CTR9, &(result.incomingQPIPackets[s][3]));
4696 
4697                     // outgoing idle flits from QPI link 0
4698                     MSR[core]->read(R_MSR_PMON_CTR3, &(result.outgoingQPIFlits[s][0]));
4699                     // outgoing idle flits from QPI link 1 (yes, from CTR0)
4700                     MSR[core]->read(R_MSR_PMON_CTR2, &(result.outgoingQPIFlits[s][1]));
4701                     // outgoing idle flits from QPI link 2
4702                     MSR[core]->read(R_MSR_PMON_CTR10, &(result.outgoingQPIFlits[s][2]));
4703                     // outgoing idle flits from QPI link 3
4704                     MSR[core]->read(R_MSR_PMON_CTR11, &(result.outgoingQPIFlits[s][3]));
4705 
4706                     SocketProcessed[s] = true;
4707                 }
4708             }
4709         }
4710         else if ((cpu_model == PCM::NEHALEM_EP || cpu_model == PCM::WESTMERE_EP))
4711         {
4712             if (num_sockets == 2)
4713             {
4714                 uint32 SCore[2] = { 0, 0 };
4715                 uint64 Total_Reads[2] = { 0, 0 };
4716                 uint64 Total_Writes[2] = { 0, 0 };
4717                 uint64 IOH_Reads[2] = { 0, 0 };
4718                 uint64 IOH_Writes[2] = { 0, 0 };
4719                 uint64 Remote_Reads[2] = { 0, 0 };
4720                 uint64 Remote_Writes[2] = { 0, 0 };
4721                 uint64 Local_Reads[2] = { 0, 0 };
4722                 uint64 Local_Writes[2] = { 0, 0 };
4723 
4724                 while (topology[SCore[0]].socket != 0) ++(SCore[0]);
4725                 while (topology[SCore[1]].socket != 1) ++(SCore[1]);
4726                 for (int s = 0; s < 2; ++s)
4727                 {
4728                     TemporalThreadAffinity tempThreadAffinity(SCore[s]); // speedup trick for Linux
4729 
4730                     MSR[SCore[s]]->read(MSR_UNCORE_PMC0, &Total_Writes[s]);
4731                     MSR[SCore[s]]->read(MSR_UNCORE_PMC1, &Total_Reads[s]);
4732                     MSR[SCore[s]]->read(MSR_UNCORE_PMC2, &IOH_Reads[s]);
4733                     MSR[SCore[s]]->read(MSR_UNCORE_PMC3, &IOH_Writes[s]);
4734                     MSR[SCore[s]]->read(MSR_UNCORE_PMC4, &Remote_Reads[s]);
4735                     MSR[SCore[s]]->read(MSR_UNCORE_PMC5, &Remote_Writes[s]);
4736                     MSR[SCore[s]]->read(MSR_UNCORE_PMC6, &Local_Reads[s]);
4737                     MSR[SCore[s]]->read(MSR_UNCORE_PMC7, &Local_Writes[s]);
4738                 }
4739 
4740 #if 1
4741                 // compute Remote_Reads differently
4742                 for (int s = 0; s < 2; ++s)
4743                 {
4744                     uint64 total = Total_Writes[s] + Total_Reads[s];
4745                     uint64 rem = IOH_Reads[s]
4746                                  + IOH_Writes[s]
4747                                  + Local_Reads[s]
4748                                  + Local_Writes[s]
4749                                  + Remote_Writes[s];
4750                     Remote_Reads[s] = (total > rem) ? (total - rem) : 0;
4751                 }
4752 #endif
4753 
4754 
4755                 // only an estimation (lower bound) - does not count NT stores correctly
4756                 result.incomingQPIPackets[0][0] = Remote_Reads[1] + Remote_Writes[0];
4757                 result.incomingQPIPackets[0][1] = IOH_Reads[0];
4758                 result.incomingQPIPackets[1][0] = Remote_Reads[0] + Remote_Writes[1];
4759                 result.incomingQPIPackets[1][1] = IOH_Reads[1];
4760             }
4761             else
4762             {
4763                 // for a single socket systems no information is available
4764                 result.incomingQPIPackets[0][0] = 0;
4765             }
4766         }
4767         else if (hasPCICFGUncore())
4768         {
4769                 for (int32 s = 0; (s < (int32)server_pcicfg_uncore.size()); ++s)
4770                 {
4771                     server_pcicfg_uncore[s]->freezeCounters();
4772                     for (uint32 port = 0; port < (uint32)getQPILinksPerSocket(); ++port)
4773                     {
4774                         result.incomingQPIPackets[s][port] = uint64(double(server_pcicfg_uncore[s]->getIncomingDataFlits(port)) / (64./getDataBytesPerFlit()));
4775                         result.outgoingQPIFlits[s][port] = server_pcicfg_uncore[s]->getOutgoingFlits(port);
4776                         result.TxL0Cycles[s][port] = server_pcicfg_uncore[s]->getUPIL0TxCycles(port);
4777                     }
4778                     server_pcicfg_uncore[s]->unfreezeCounters();
4779                 }
4780         }
4781         // end of reading QPI counters
4782 }
4783 
4784 template <class CounterStateType>
readPackageThermalHeadroom(const uint32 socket,CounterStateType & result)4785 void PCM::readPackageThermalHeadroom(const uint32 socket, CounterStateType & result)
4786 {
4787     if(packageThermalMetricsAvailable())
4788     {
4789         uint64 val = 0;
4790         MSR[socketRefCore[socket]]->read(MSR_PACKAGE_THERM_STATUS,&val);
4791         result.ThermalHeadroom = extractThermalHeadroom(val);
4792     }
4793     else
4794         result.ThermalHeadroom = PCM_INVALID_THERMAL_HEADROOM; // not available
4795 }
4796 
4797 // Explicit instantiation needed in topology.cpp
4798 template void PCM::readAndAggregatePackageCStateResidencies(std::shared_ptr<SafeMsrHandle>, UncoreCounterState &);
4799 template void PCM::readAndAggregateUncoreMCCounters<UncoreCounterState>(const uint32, UncoreCounterState&);
4800 template void PCM::readAndAggregateEnergyCounters<UncoreCounterState>(const uint32, UncoreCounterState&);
4801 template void PCM::readPackageThermalHeadroom<SocketCounterState>(const uint32, SocketCounterState &);
4802 
getSocketCounterState(uint32 socket)4803 SocketCounterState PCM::getSocketCounterState(uint32 socket)
4804 {
4805     SocketCounterState result;
4806     if (MSR.size())
4807     {
4808         // reading core and uncore counter states
4809         for (int32 core = 0; core < num_cores; ++core)
4810             if (isCoreOnline(core) && (topology[core].socket == int32(socket)))
4811                 result.readAndAggregate(MSR[core]);
4812 
4813         readAndAggregateUncoreMCCounters(socket, result);
4814 
4815         readAndAggregateEnergyCounters(socket, result);
4816 
4817         readPackageThermalHeadroom(socket, result);
4818 
4819     }
4820     return result;
4821 }
4822 
getAllCounterStates(SystemCounterState & systemState,std::vector<SocketCounterState> & socketStates,std::vector<CoreCounterState> & coreStates)4823 void PCM::getAllCounterStates(SystemCounterState & systemState, std::vector<SocketCounterState> & socketStates, std::vector<CoreCounterState> & coreStates)
4824 {
4825     // clear and zero-initialize all inputs
4826     systemState = SystemCounterState();
4827     socketStates.clear();
4828     socketStates.resize(num_sockets);
4829     coreStates.clear();
4830     coreStates.resize(num_cores);
4831 
4832     std::vector<std::future<void> > asyncCoreResults;
4833 
4834     for (int32 core = 0; core < num_cores; ++core)
4835     {
4836         // read core counters
4837         if (isCoreOnline(core))
4838         {
4839             std::packaged_task<void()> task([this,&coreStates,&socketStates,core]() -> void
4840                 {
4841                     coreStates[core].readAndAggregate(MSR[core]);
4842                     socketStates[topology[core].socket].UncoreCounterState::readAndAggregate(MSR[core]); // read package C state counters
4843                 }
4844             );
4845             asyncCoreResults.push_back(task.get_future());
4846             coreTaskQueues[core]->push(task);
4847         }
4848         // std::cout << "DEBUG2: " << core << " " << coreStates[core].InstRetiredAny << " \n";
4849     }
4850     // std::cout << std::flush;
4851     for (uint32 s = 0; s < (uint32)num_sockets; ++s)
4852     {
4853         int32 refCore = socketRefCore[s];
4854         if (refCore<0) refCore = 0;
4855         std::packaged_task<void()> task([this, s, &socketStates]() -> void
4856             {
4857                 readAndAggregateUncoreMCCounters(s, socketStates[s]);
4858                 readAndAggregateEnergyCounters(s, socketStates[s]);
4859                 readPackageThermalHeadroom(s, socketStates[s]);
4860             } );
4861         asyncCoreResults.push_back(task.get_future());
4862         coreTaskQueues[refCore]->push(task);
4863     }
4864 
4865     readQPICounters(systemState);
4866 
4867     for (auto & ar : asyncCoreResults)
4868         ar.wait();
4869 
4870     for (int32 core = 0; core < num_cores; ++core)
4871     {   // aggregate core counters into sockets
4872         if(isCoreOnline(core))
4873           socketStates[topology[core].socket] += coreStates[core];
4874     }
4875 
4876     for (int32 s = 0; s < num_sockets; ++s)
4877     {   // aggregate core counters from sockets into system state and
4878         // aggregate socket uncore iMC, energy and package C state counters into system
4879         systemState += socketStates[s];
4880     }
4881 }
4882 
getUncoreCounterStates(SystemCounterState & systemState,std::vector<SocketCounterState> & socketStates)4883 void PCM::getUncoreCounterStates(SystemCounterState & systemState, std::vector<SocketCounterState> & socketStates)
4884 {
4885     // clear and zero-initialize all inputs
4886     systemState = SystemCounterState();
4887     socketStates.clear();
4888     socketStates.resize(num_sockets);
4889     std::vector<CoreCounterState> refCoreStates(num_sockets);
4890 
4891     for (uint32 s = 0; s < (uint32)num_sockets; ++s)
4892     {
4893         const int32 refCore = socketRefCore[s];
4894         if(isCoreOnline(refCore))
4895         {
4896             refCoreStates[s].readAndAggregateTSC(MSR[refCore]);
4897         }
4898         readAndAggregateUncoreMCCounters(s, socketStates[s]);
4899         readAndAggregateEnergyCounters(s, socketStates[s]);
4900         readPackageThermalHeadroom(s, socketStates[s]);
4901     }
4902 
4903     readQPICounters(systemState);
4904 
4905     for (int32 s = 0; s < num_sockets; ++s)
4906     {
4907         const int32 refCore = socketRefCore[s];
4908         if(isCoreOnline(refCore))
4909         {
4910             for(uint32 core=0; core < getNumCores(); ++core)
4911             {
4912                 if(topology[core].socket == s && isCoreOnline(core))
4913                     socketStates[s] += refCoreStates[s];
4914             }
4915         }
4916         // aggregate socket uncore iMC, energy counters into system
4917         systemState += socketStates[s];
4918     }
4919 }
4920 
getCoreCounterState(uint32 core)4921 CoreCounterState PCM::getCoreCounterState(uint32 core)
4922 {
4923     CoreCounterState result;
4924     if (MSR.size()) result.readAndAggregate(MSR[core]);
4925     return result;
4926 }
4927 
getNumCores() const4928 uint32 PCM::getNumCores() const
4929 {
4930     return (uint32)num_cores;
4931 }
4932 
getNumOnlineCores() const4933 uint32 PCM::getNumOnlineCores() const
4934 {
4935     return (uint32)num_online_cores;
4936 }
4937 
getNumSockets() const4938 uint32 PCM::getNumSockets() const
4939 {
4940     return (uint32)num_sockets;
4941 }
4942 
getNumOnlineSockets() const4943 uint32 PCM::getNumOnlineSockets() const
4944 {
4945     return (uint32)num_online_sockets;
4946 }
4947 
4948 
getThreadsPerCore() const4949 uint32 PCM::getThreadsPerCore() const
4950 {
4951     return (uint32)threads_per_core;
4952 }
4953 
getSMT() const4954 bool PCM::getSMT() const
4955 {
4956     return threads_per_core > 1;
4957 }
4958 
getNominalFrequency() const4959 uint64 PCM::getNominalFrequency() const
4960 {
4961     return nominal_frequency;
4962 }
4963 
getL3ScalingFactor() const4964 uint32 PCM::getL3ScalingFactor() const
4965 {
4966     PCM_CPUID_INFO cpuinfo;
4967     pcm_cpuid(0xf,0x1,cpuinfo);
4968 
4969     return (uint32)cpuinfo.reg.ebx;
4970 
4971 }
4972 
isSomeCoreOfflined()4973 bool PCM::isSomeCoreOfflined()
4974 {
4975     PCM_CPUID_INFO cpuid_args;
4976     pcm_cpuid(0xB,1,cpuid_args);
4977     uint32 max_num_lcores_per_socket = cpuid_args.reg.ebx & 0xFFFF;
4978     uint32 max_num_lcores = max_num_lcores_per_socket * getNumSockets();
4979     if(threads_per_core == 1 && (getNumOnlineCores() * 2 == max_num_lcores)) // HT is disabled in the BIOS
4980     {
4981        return false;
4982     }
4983     return !(getNumOnlineCores() == max_num_lcores);
4984 }
4985 
getServerUncoreCounterState(uint32 socket)4986 ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket)
4987 {
4988     ServerUncoreCounterState result;
4989     if (socket < serverBW.size() && serverBW[socket].get())
4990     {
4991         result.freeRunningCounter[ServerUncoreCounterState::ImcReads] = serverBW[socket]->getImcReads();
4992         result.freeRunningCounter[ServerUncoreCounterState::ImcWrites] = serverBW[socket]->getImcWrites();
4993         result.freeRunningCounter[ServerUncoreCounterState::PMMReads] = serverBW[socket]->getPMMReads();
4994         result.freeRunningCounter[ServerUncoreCounterState::PMMWrites] = serverBW[socket]->getPMMWrites();
4995     }
4996     if(server_pcicfg_uncore.size() && server_pcicfg_uncore[socket].get())
4997     {
4998         server_pcicfg_uncore[socket]->freezeCounters();
4999         for(uint32 port=0;port < (uint32)server_pcicfg_uncore[socket]->getNumQPIPorts();++port)
5000         {
5001             assert(port < result.xPICounter.size());
5002             for (uint32 cnt = 0; cnt < ServerUncoreCounterState::maxCounters; ++cnt)
5003                 result.xPICounter[port][cnt] = server_pcicfg_uncore[socket]->getQPILLCounter(port, cnt);
5004             assert(port < result.M3UPICounter.size());
5005             for (uint32 cnt = 0; cnt < ServerUncoreCounterState::maxCounters; ++cnt)
5006                 result.M3UPICounter[port][cnt] = server_pcicfg_uncore[socket]->getM3UPICounter(port, cnt);
5007         }
5008         for (uint32 channel = 0; channel < (uint32)server_pcicfg_uncore[socket]->getNumMCChannels(); ++channel)
5009         {
5010             assert(channel < result.DRAMClocks.size());
5011             result.DRAMClocks[channel] = server_pcicfg_uncore[socket]->getDRAMClocks(channel);
5012             assert(channel < result.MCCounter.size());
5013             for (uint32 cnt = 0; cnt < ServerUncoreCounterState::maxCounters; ++cnt)
5014                 result.MCCounter[channel][cnt] = server_pcicfg_uncore[socket]->getMCCounter(channel, cnt);
5015         }
5016         for (uint32 channel = 0; channel < (uint32)server_pcicfg_uncore[socket]->getNumEDCChannels(); ++channel)
5017         {
5018             assert(channel < result.MCDRAMClocks.size());
5019             result.MCDRAMClocks[channel] = server_pcicfg_uncore[socket]->getMCDRAMClocks(channel);
5020             assert(channel < result.EDCCounter.size());
5021             for (uint32 cnt = 0; cnt < ServerUncoreCounterState::maxCounters; ++cnt)
5022                 result.EDCCounter[channel][cnt] = server_pcicfg_uncore[socket]->getEDCCounter(channel, cnt);
5023         }
5024     for (uint32 controller = 0; controller < (uint32)server_pcicfg_uncore[socket]->getNumMC(); ++controller)
5025     {
5026       assert(controller < result.M2MCounter.size());
5027       for (uint32 cnt = 0; cnt < ServerUncoreCounterState::maxCounters; ++cnt)
5028           result.M2MCounter[controller][cnt] = server_pcicfg_uncore[socket]->getM2MCounter(controller, cnt);
5029     }
5030         server_pcicfg_uncore[socket]->unfreezeCounters();
5031     }
5032     if (MSR.size())
5033     {
5034         uint32 refCore = socketRefCore[socket];
5035         TemporalThreadAffinity tempThreadAffinity(refCore);
5036         for (uint32 cbo = 0; socket < cboPMUs.size() && cbo < cboPMUs[socket].size() && cbo < ServerUncoreCounterState::maxCBOs; ++cbo)
5037         {
5038             for (int i = 0; i < ServerUncoreCounterState::maxCounters; ++i)
5039             {
5040                 result.CBOCounter[cbo][i] = *(cboPMUs[socket][cbo].counterValue[i]);
5041             }
5042         }
5043         for (uint32 stack = 0; socket < iioPMUs.size() && stack < iioPMUs[socket].size() && stack < ServerUncoreCounterState::maxIIOStacks; ++stack)
5044         {
5045             for (int i = 0; i < ServerUncoreCounterState::maxCounters; ++i)
5046             {
5047                 result.IIOCounter[stack][i] = *(iioPMUs[socket][stack].counterValue[i]);
5048             }
5049         }
5050         for (int i = 0; i < 2 && socket < uboxPMUs.size(); ++i)
5051         {
5052             result.UBOXCounter[i] = *(uboxPMUs[socket].counterValue[i]);
5053             result.UncClocks = getUncoreClocks(socket);
5054         }
5055         for (int i = 0; i < ServerUncoreCounterState::maxCounters && socket < pcuPMUs.size(); ++i)
5056             result.PCUCounter[i] = *pcuPMUs[socket].counterValue[i];
5057         // std::cout << "values read: " << result.PCUCounter[0] << " " << result.PCUCounter[1] << " " << result.PCUCounter[2] << " " << result.PCUCounter[3] << "\n";
5058         uint64 val=0;
5059         //MSR[refCore]->read(MSR_PKG_ENERGY_STATUS,&val);
5060         //std::cout << "Energy status: " << val << "\n";
5061         MSR[refCore]->read(MSR_PACKAGE_THERM_STATUS,&val);
5062         result.PackageThermalHeadroom = extractThermalHeadroom(val);
5063         MSR[refCore]->read(IA32_TIME_STAMP_COUNTER, &result.InvariantTSC);
5064         readAndAggregatePackageCStateResidencies(MSR[refCore], result);
5065     }
5066     // std::cout << std::flush;
5067     readAndAggregateEnergyCounters(socket, result);
5068 
5069     return result;
5070 }
5071 
5072 #ifndef _MSC_VER
print_mcfg(const char * path)5073 void print_mcfg(const char * path)
5074 {
5075     int mcfg_handle = ::open(path, O_RDONLY);
5076 
5077     if (mcfg_handle < 0)
5078     {
5079         std::cerr << "PCM Error: Cannot open " << path << "\n";
5080         throw std::exception();
5081     }
5082 
5083     MCFGHeader header;
5084 
5085     ssize_t read_bytes = ::read(mcfg_handle, (void *)&header, sizeof(MCFGHeader));
5086 
5087     if(read_bytes == 0)
5088     {
5089         std::cerr << "PCM Error: Cannot read " << path << "\n";
5090         throw std::exception();
5091     }
5092 
5093     const unsigned segments = header.nrecords();
5094     header.print();
5095     std::cout << "Segments: " << segments << "\n";
5096 
5097     for(unsigned int i=0; i<segments;++i)
5098     {
5099         MCFGRecord record;
5100         read_bytes = ::read(mcfg_handle, (void *)&record, sizeof(MCFGRecord));
5101         if(read_bytes == 0)
5102         {
5103               std::cerr << "PCM Error: Cannot read " << path << " (2)\n";
5104               throw std::exception();
5105         }
5106         std::cout << "Segment " << std::dec << i << " ";
5107         record.print();
5108     }
5109 
5110     ::close(mcfg_handle);
5111 }
5112 #endif
5113 
5114 
5115 static const uint32 IMC_DEV_IDS[] = {
5116     0x03cb0,
5117     0x03cb1,
5118     0x03cb4,
5119     0x03cb5,
5120     0x0EB4,
5121     0x0EB5,
5122     0x0EB0,
5123     0x0EB1,
5124     0x0EF4,
5125     0x0EF5,
5126     0x0EF0,
5127     0x0EF1,
5128     0x2fb0,
5129     0x2fb1,
5130     0x2fb4,
5131     0x2fb5,
5132     0x2fd0,
5133     0x2fd1,
5134     0x2fd4,
5135     0x2fd5,
5136     0x6fb0,
5137     0x6fb1,
5138     0x6fb4,
5139     0x6fb5,
5140     0x6fd0,
5141     0x6fd1,
5142     0x6fd4,
5143     0x6fd5,
5144     0x2042,
5145     0x2046,
5146     0x204a,
5147     0x7840,
5148     0x7841,
5149     0x7842,
5150     0x7843,
5151     0x7844,
5152     0x781f
5153 };
5154 
5155 static const uint32 UPI_DEV_IDS[] = {
5156     0x2058,
5157     0x3441
5158 };
5159 
5160 static const uint32 M2M_DEV_IDS[] = {
5161     0x2066,
5162     0x344A
5163 };
5164 
5165 Mutex socket2busMutex;
5166 std::vector<std::pair<uint32,uint32> > ServerPCICFGUncore::socket2iMCbus;
5167 std::vector<std::pair<uint32,uint32> > ServerPCICFGUncore::socket2UPIbus;
5168 std::vector<std::pair<uint32,uint32> > ServerPCICFGUncore::socket2M2Mbus;
5169 
initSocket2Bus(std::vector<std::pair<uint32,uint32>> & socket2bus,uint32 device,uint32 function,const uint32 DEV_IDS[],uint32 devIdsSize)5170 void initSocket2Bus(std::vector<std::pair<uint32, uint32> > & socket2bus, uint32 device, uint32 function, const uint32 DEV_IDS[], uint32 devIdsSize)
5171 {
5172     if (device == PCM_INVALID_DEV_ADDR || function == PCM_INVALID_FUNC_ADDR)
5173     {
5174         return;
5175     }
5176     Mutex::Scope _(socket2busMutex);
5177     if(!socket2bus.empty()) return;
5178 
5179     #ifdef __linux__
5180     const std::vector<MCFGRecord> & mcfg = PciHandleMM::getMCFGRecords();
5181     #else
5182     std::vector<MCFGRecord> mcfg;
5183     MCFGRecord segment;
5184     segment.PCISegmentGroupNumber = 0;
5185     segment.startBusNumber = 0;
5186     segment.endBusNumber = 0xff;
5187     mcfg.push_back(segment);
5188     #endif
5189 
5190     for(uint32 s = 0; s < (uint32)mcfg.size(); ++s)
5191     for (uint32 bus = (uint32)mcfg[s].startBusNumber; bus <= (uint32)mcfg[s].endBusNumber; ++bus)
5192     {
5193         uint32 value = 0;
5194         try
5195         {
5196             PciHandleType h(mcfg[s].PCISegmentGroupNumber, bus, device, function);
5197             h.read32(0, &value);
5198 
5199         } catch(...)
5200         {
5201             // invalid bus:devicei:function
5202             continue;
5203         }
5204         const uint32 vendor_id = value & 0xffff;
5205         const uint32 device_id = (value >> 16) & 0xffff;
5206         if (vendor_id != PCM_INTEL_PCI_VENDOR_ID)
5207            continue;
5208 
5209         for (uint32 i = 0; i < devIdsSize; ++i)
5210         {
5211            // match
5212            if(DEV_IDS[i] == device_id)
5213            {
5214                // std::cout << "DEBUG: found bus " << std::hex << bus << " with device ID " << device_id << std::dec << "\n";
5215                socket2bus.push_back(std::make_pair(mcfg[s].PCISegmentGroupNumber,bus));
5216                break;
5217            }
5218         }
5219     }
5220     //std::cout << std::flush;
5221 }
5222 
getBusFromSocket(const uint32 socket)5223 int getBusFromSocket(const uint32 socket)
5224 {
5225     int cur_bus = 0;
5226     uint32 cur_socket = 0;
5227     // std::cout << "socket: " << socket << "\n";
5228     while(cur_socket <= socket)
5229     {
5230         // std::cout << "reading from bus 0x" << std::hex << cur_bus << std::dec << " ";
5231         PciHandleType h(0, cur_bus, 5, 0);
5232         uint32 cpubusno = 0;
5233         h.read32(0x108, &cpubusno); // CPUBUSNO register
5234         cur_bus = (cpubusno >> 8)& 0x0ff;
5235         // std::cout << "socket: " << cur_socket << std::hex << " cpubusno: 0x" << std::hex << cpubusno << " " << cur_bus << std::dec << "\n";
5236         if(socket == cur_socket)
5237             return cur_bus;
5238         ++cur_socket;
5239         ++cur_bus;
5240         if(cur_bus > 0x0ff)
5241            return -1;
5242     }
5243     //std::cout << std::flush;
5244 
5245     return -1;
5246 }
5247 
createIntelPerfMonDevice(uint32 groupnr_,int32 bus_,uint32 dev_,uint32 func_,bool checkVendor)5248 PciHandleType * ServerPCICFGUncore::createIntelPerfMonDevice(uint32 groupnr_, int32 bus_, uint32 dev_, uint32 func_, bool checkVendor)
5249 {
5250     if (PciHandleType::exists(groupnr_, (uint32)bus_, dev_, func_))
5251     {
5252         PciHandleType * handle = new PciHandleType(groupnr_, bus_, dev_, func_);
5253 
5254         if(!checkVendor) return handle;
5255 
5256         uint32 vendor_id = 0;
5257         handle->read32(PCM_PCI_VENDOR_ID_OFFSET,&vendor_id);
5258         vendor_id &= 0x0ffff;
5259 
5260         if(vendor_id == PCM_INTEL_PCI_VENDOR_ID) return handle;
5261 
5262         delete handle;
5263     }
5264     return NULL;
5265 }
5266 
isSecureBoot() const5267 bool PCM::isSecureBoot() const
5268 {
5269     static int flag = -1;
5270     if (MSR.size() > 0 && flag == -1)
5271     {
5272         // std::cerr << "DEBUG: checking MSR in isSecureBoot\n";
5273         uint64 val = 0;
5274         if (MSR[0]->read(IA32_PERFEVTSEL0_ADDR, &val) != sizeof(val))
5275         {
5276             flag = 0; // some problem with MSR read, not secure boot
5277         }
5278         // read works
5279         if (MSR[0]->write(IA32_PERFEVTSEL0_ADDR, val) != sizeof(val)/* && errno == 1 */) // errno works only on windows
5280         { // write does not work -> secure boot
5281             flag = 1;
5282         }
5283         else
5284         {
5285             flag = 0; // can write MSR -> no secure boot
5286         }
5287     }
5288     return flag == 1;
5289 }
5290 
useLinuxPerfForUncore() const5291 bool PCM::useLinuxPerfForUncore() const
5292 {
5293     static int use = -1;
5294     if (use != -1)
5295     {
5296         return 1 == use;
5297     }
5298     use = 0;
5299     bool secureBoot = isSecureBoot();
5300 #ifdef PCM_USE_PERF
5301     const auto imcIDs = enumeratePerfPMUs("imc", 100);
5302     std::cout << "INFO: Linux perf interface to program uncore PMUs is " << (imcIDs.empty()?"NOT ":"") << "present\n";
5303     const char * perf_env = std::getenv("PCM_USE_UNCORE_PERF");
5304     if (perf_env != NULL && std::string(perf_env) == std::string("1"))
5305     {
5306         std::cout << "INFO: using Linux perf interface to program uncore PMUs because env variable PCM_USE_UNCORE_PERF=1\n";
5307         use = 1;
5308     }
5309     if (secureBoot)
5310     {
5311         std::cout << "INFO: Secure Boot detected. Using Linux perf for uncore PMU programming.\n";
5312         use = 1;
5313     }
5314     else
5315 #endif
5316     {
5317         if (secureBoot)
5318         {
5319             std::cerr << "ERROR: Secure Boot detected. Recompile PCM with -DPCM_USE_PERF or disable Secure Boot.\n";
5320         }
5321     }
5322     return 1 == use;
5323 }
5324 
ServerPCICFGUncore(uint32 socket_,const PCM * pcm)5325 ServerPCICFGUncore::ServerPCICFGUncore(uint32 socket_, const PCM * pcm) :
5326      iMCbus(-1)
5327    , UPIbus(-1)
5328    , M2Mbus(-1)
5329    , groupnr(0)
5330    , cpu_model(pcm->getCPUModel())
5331    , qpi_speed(0)
5332 {
5333     initRegisterLocations(pcm);
5334     initBuses(socket_, pcm);
5335 
5336     if (pcm->useLinuxPerfForUncore())
5337     {
5338         initPerf(socket_, pcm);
5339     }
5340     else
5341     {
5342         initDirect(socket_, pcm);
5343     }
5344 
5345     std::cerr << "Socket " << socket_ << ": " <<
5346         getNumMC() << " memory controllers detected with total number of " << getNumMCChannels() << " channels. " <<
5347         getNumQPIPorts() << " QPI ports detected." <<
5348         " " << m2mPMUs.size() << " M2M (mesh to memory) blocks detected."
5349         " " << haPMUs.size()  << " Home Agents detected."
5350         " " << m3upiPMUs.size() << " M3UPI blocks detected."
5351         "\n";
5352 }
5353 
initRegisterLocations(const PCM * pcm)5354 void ServerPCICFGUncore::initRegisterLocations(const PCM * pcm)
5355 {
5356 #define PCM_PCICFG_MC_INIT(controller, channel, arch) \
5357     MCRegisterLocation.resize(controller + 1); \
5358     MCRegisterLocation[controller].resize(channel + 1); \
5359     MCRegisterLocation[controller][channel] =  \
5360         std::make_pair(arch##_MC##controller##_CH##channel##_REGISTER_DEV_ADDR, arch##_MC##controller##_CH##channel##_REGISTER_FUNC_ADDR);
5361 
5362 #define PCM_PCICFG_QPI_INIT(port, arch) \
5363     XPIRegisterLocation.resize(port + 1); \
5364     XPIRegisterLocation[port] = std::make_pair(arch##_QPI_PORT##port##_REGISTER_DEV_ADDR, arch##_QPI_PORT##port##_REGISTER_FUNC_ADDR);
5365 
5366 #define PCM_PCICFG_M3UPI_INIT(port, arch) \
5367     M3UPIRegisterLocation.resize(port + 1); \
5368     M3UPIRegisterLocation[port] = std::make_pair(arch##_M3UPI_PORT##port##_REGISTER_DEV_ADDR, arch##_M3UPI_PORT##port##_REGISTER_FUNC_ADDR);
5369 
5370 #define PCM_PCICFG_EDC_INIT(controller, clock, arch) \
5371     EDCRegisterLocation.resize(controller + 1); \
5372     EDCRegisterLocation[controller] = std::make_pair(arch##_EDC##controller##_##clock##_REGISTER_DEV_ADDR, arch##_EDC##controller##_##clock##_REGISTER_FUNC_ADDR);
5373 
5374 #define PCM_PCICFG_M2M_INIT(x, arch) \
5375     M2MRegisterLocation.resize(x + 1); \
5376     M2MRegisterLocation[x] = std::make_pair(arch##_M2M_##x##_REGISTER_DEV_ADDR, arch##_M2M_##x##_REGISTER_FUNC_ADDR);
5377 
5378 #define PCM_PCICFG_HA_INIT(x, arch) \
5379     HARegisterLocation.resize(x + 1); \
5380     HARegisterLocation[x] = std::make_pair(arch##_HA##x##_REGISTER_DEV_ADDR, arch##_HA##x##_REGISTER_FUNC_ADDR);
5381 
5382     if(cpu_model == PCM::JAKETOWN || cpu_model == PCM::IVYTOWN)
5383     {
5384         PCM_PCICFG_MC_INIT(0, 0, JKTIVT)
5385         PCM_PCICFG_MC_INIT(0, 1, JKTIVT)
5386         PCM_PCICFG_MC_INIT(0, 2, JKTIVT)
5387         PCM_PCICFG_MC_INIT(0, 3, JKTIVT)
5388         PCM_PCICFG_MC_INIT(1, 0, JKTIVT)
5389         PCM_PCICFG_MC_INIT(1, 1, JKTIVT)
5390         PCM_PCICFG_MC_INIT(1, 2, JKTIVT)
5391         PCM_PCICFG_MC_INIT(1, 3, JKTIVT)
5392 
5393         PCM_PCICFG_QPI_INIT(0, JKTIVT);
5394         PCM_PCICFG_QPI_INIT(1, JKTIVT);
5395         PCM_PCICFG_QPI_INIT(2, JKTIVT);
5396     }
5397     else if(cpu_model == PCM::HASWELLX || cpu_model == PCM::BDX_DE || cpu_model == PCM::BDX)
5398     {
5399         PCM_PCICFG_MC_INIT(0, 0, HSX)
5400         PCM_PCICFG_MC_INIT(0, 1, HSX)
5401         PCM_PCICFG_MC_INIT(0, 2, HSX)
5402         PCM_PCICFG_MC_INIT(0, 3, HSX)
5403         PCM_PCICFG_MC_INIT(1, 0, HSX)
5404         PCM_PCICFG_MC_INIT(1, 1, HSX)
5405         PCM_PCICFG_MC_INIT(1, 2, HSX)
5406         PCM_PCICFG_MC_INIT(1, 3, HSX)
5407 
5408         PCM_PCICFG_QPI_INIT(0, HSX);
5409         PCM_PCICFG_QPI_INIT(1, HSX);
5410         PCM_PCICFG_QPI_INIT(2, HSX);
5411 
5412         PCM_PCICFG_HA_INIT(0, HSX);
5413         PCM_PCICFG_HA_INIT(1, HSX);
5414     }
5415     else if(cpu_model == PCM::SKX)
5416     {
5417         PCM_PCICFG_MC_INIT(0, 0, SKX)
5418         PCM_PCICFG_MC_INIT(0, 1, SKX)
5419         PCM_PCICFG_MC_INIT(0, 2, SKX)
5420         PCM_PCICFG_MC_INIT(0, 3, SKX)
5421         PCM_PCICFG_MC_INIT(1, 0, SKX)
5422         PCM_PCICFG_MC_INIT(1, 1, SKX)
5423         PCM_PCICFG_MC_INIT(1, 2, SKX)
5424         PCM_PCICFG_MC_INIT(1, 3, SKX)
5425 
5426         PCM_PCICFG_QPI_INIT(0, SKX);
5427         PCM_PCICFG_QPI_INIT(1, SKX);
5428         PCM_PCICFG_QPI_INIT(2, SKX);
5429 
5430         if (pcm->isCPX())
5431         {
5432             PCM_PCICFG_QPI_INIT(3, CPX);
5433             PCM_PCICFG_QPI_INIT(4, CPX);
5434             PCM_PCICFG_QPI_INIT(5, CPX);
5435         }
5436 
5437         PCM_PCICFG_M2M_INIT(0, SKX)
5438         PCM_PCICFG_M2M_INIT(1, SKX)
5439 
5440         // M3UPI
5441         if (pcm->isCPX())
5442         {
5443             // CPX
5444             PCM_PCICFG_M3UPI_INIT(0, CPX);
5445             PCM_PCICFG_M3UPI_INIT(1, CPX);
5446             PCM_PCICFG_M3UPI_INIT(2, CPX);
5447             PCM_PCICFG_M3UPI_INIT(3, CPX);
5448             PCM_PCICFG_M3UPI_INIT(4, CPX);
5449             PCM_PCICFG_M3UPI_INIT(5, CPX);
5450         }
5451         else
5452         {
5453             // SKX/CLX
5454             PCM_PCICFG_M3UPI_INIT(0, SKX);
5455             PCM_PCICFG_M3UPI_INIT(1, SKX);
5456             PCM_PCICFG_M3UPI_INIT(2, SKX);
5457         }
5458     }
5459     else if (cpu_model == PCM::ICX)
5460     {
5461         PCM_PCICFG_QPI_INIT(0, ICX);
5462         PCM_PCICFG_QPI_INIT(1, ICX);
5463         PCM_PCICFG_QPI_INIT(2, ICX);
5464 
5465         PCM_PCICFG_M3UPI_INIT(0, ICX);
5466         PCM_PCICFG_M3UPI_INIT(1, ICX);
5467         PCM_PCICFG_M3UPI_INIT(2, ICX);
5468 
5469         PCM_PCICFG_M2M_INIT(0, SERVER)
5470         PCM_PCICFG_M2M_INIT(1, SERVER)
5471         PCM_PCICFG_M2M_INIT(2, SERVER)
5472         PCM_PCICFG_M2M_INIT(3, SERVER)
5473     }
5474     else if(cpu_model == PCM::KNL)
5475     {
5476         // 2 DDR4 Memory Controllers with 3 channels each
5477         PCM_PCICFG_MC_INIT(0, 0, KNL)
5478         PCM_PCICFG_MC_INIT(0, 1, KNL)
5479         PCM_PCICFG_MC_INIT(0, 2, KNL)
5480         PCM_PCICFG_MC_INIT(1, 0, KNL)
5481         PCM_PCICFG_MC_INIT(1, 1, KNL)
5482         PCM_PCICFG_MC_INIT(1, 2, KNL)
5483 
5484     // 8 MCDRAM (Multi-Channel [Stacked] DRAM) Memory Controllers
5485         PCM_PCICFG_EDC_INIT(0, ECLK, KNL)
5486         PCM_PCICFG_EDC_INIT(1, ECLK, KNL)
5487         PCM_PCICFG_EDC_INIT(2, ECLK, KNL)
5488         PCM_PCICFG_EDC_INIT(3, ECLK, KNL)
5489         PCM_PCICFG_EDC_INIT(4, ECLK, KNL)
5490         PCM_PCICFG_EDC_INIT(5, ECLK, KNL)
5491         PCM_PCICFG_EDC_INIT(6, ECLK, KNL)
5492         PCM_PCICFG_EDC_INIT(7, ECLK, KNL)
5493     }
5494     else if (cpu_model == PCM::SNOWRIDGE)
5495     {
5496         PCM_PCICFG_M2M_INIT(0, SERVER)
5497         PCM_PCICFG_M2M_INIT(1, SERVER)
5498         PCM_PCICFG_M2M_INIT(2, SERVER)
5499         PCM_PCICFG_M2M_INIT(3, SERVER)
5500     }
5501     else
5502     {
5503         std::cerr << "Error: Uncore PMU for processor with model id " << cpu_model << " is not supported.\n";
5504         throw std::exception();
5505     }
5506 
5507 #undef PCM_PCICFG_MC_INIT
5508 #undef PCM_PCICFG_QPI_INIT
5509 #undef PCM_PCICFG_M3UPI_INIT
5510 #undef PCM_PCICFG_EDC_INIT
5511 #undef PCM_PCICFG_M2M_INIT
5512 #undef PCM_PCICFG_HA_INIT
5513 }
5514 
initBuses(uint32 socket_,const PCM * pcm)5515 void ServerPCICFGUncore::initBuses(uint32 socket_, const PCM * pcm)
5516 {
5517     const uint32 total_sockets_ = pcm->getNumSockets();
5518 
5519     if (M2MRegisterLocation.size())
5520     {
5521         initSocket2Bus(socket2M2Mbus, M2MRegisterLocation[0].first, M2MRegisterLocation[0].second, M2M_DEV_IDS, (uint32)sizeof(M2M_DEV_IDS) / sizeof(M2M_DEV_IDS[0]));
5522         if (socket_ < socket2M2Mbus.size())
5523         {
5524             groupnr = socket2M2Mbus[socket_].first;
5525             M2Mbus = socket2M2Mbus[socket_].second;
5526         }
5527         else
5528         {
5529             std::cerr << "PCM error: socket_ " << socket_ << " >= socket2M2Mbus.size() " << socket2M2Mbus.size() << "\n";
5530         }
5531         if (total_sockets_ != socket2M2Mbus.size())
5532         {
5533             std::cerr << "PCM warning: total_sockets_ " << total_sockets_ << " does not match socket2M2Mbus.size() " << socket2M2Mbus.size() << "\n";
5534         }
5535     }
5536 
5537     if (MCRegisterLocation.size() > 0 && MCRegisterLocation[0].size() > 0)
5538     {
5539         initSocket2Bus(socket2iMCbus, MCRegisterLocation[0][0].first, MCRegisterLocation[0][0].second, IMC_DEV_IDS, (uint32)sizeof(IMC_DEV_IDS) / sizeof(IMC_DEV_IDS[0]));
5540 
5541         if (total_sockets_ == socket2iMCbus.size())
5542         {
5543             if (total_sockets_ == socket2M2Mbus.size() && socket2iMCbus[socket_].first != socket2M2Mbus[socket_].first)
5544             {
5545                 std::cerr << "PCM error: mismatching PCICFG group number for M2M and IMC perfmon devices.\n";
5546                 M2Mbus = -1;
5547             }
5548             groupnr = socket2iMCbus[socket_].first;
5549             iMCbus = socket2iMCbus[socket_].second;
5550         }
5551         else if (total_sockets_ <= 4)
5552         {
5553             iMCbus = getBusFromSocket(socket_);
5554             if (iMCbus < 0)
5555             {
5556                 std::cerr << "Cannot find bus for socket " << socket_ << " on system with " << total_sockets_ << " sockets.\n";
5557                 throw std::exception();
5558             }
5559             else
5560             {
5561                 std::cerr << "PCM Warning: the bus for socket " << socket_ << " on system with " << total_sockets_ << " sockets could not find via PCI bus scan. Using cpubusno register. Bus = " << iMCbus << "\n";
5562             }
5563         }
5564         else
5565         {
5566             std::cerr << "Cannot find bus for socket " << socket_ << " on system with " << total_sockets_ << " sockets.\n";
5567             throw std::exception();
5568         }
5569     }
5570 
5571 #if 1
5572     if (total_sockets_ == 1) {
5573         /*
5574          * For single socket systems, do not worry at all about QPI ports.  This
5575          *  eliminates QPI LL programming error messages on single socket systems
5576          *  with BIOS that hides QPI performance counting PCI functions.  It also
5577          *  eliminates register programming that is not needed since no QPI traffic
5578          *  is possible with single socket systems.
5579          */
5580         return;
5581     }
5582 #endif
5583 
5584 #ifdef PCM_NOQPI
5585     return;
5586 #endif
5587 
5588     if (PCM::hasUPI(cpu_model))
5589     {
5590         initSocket2Bus(socket2UPIbus, XPIRegisterLocation[0].first, XPIRegisterLocation[0].second, UPI_DEV_IDS, (uint32)sizeof(UPI_DEV_IDS) / sizeof(UPI_DEV_IDS[0]));
5591         if(total_sockets_ == socket2UPIbus.size())
5592         {
5593             UPIbus = socket2UPIbus[socket_].second;
5594             if(groupnr != socket2UPIbus[socket_].first)
5595             {
5596                 UPIbus = -1;
5597                 std::cerr << "PCM error: mismatching PCICFG group number for UPI and IMC perfmon devices.\n";
5598             }
5599         }
5600         else
5601         {
5602             std::cerr << "PCM error: Did not find UPI perfmon device on every socket in a multisocket system.\n";
5603         }
5604     }
5605     else
5606     {
5607         UPIbus = iMCbus;
5608     }
5609 }
5610 
initDirect(uint32 socket_,const PCM * pcm)5611 void ServerPCICFGUncore::initDirect(uint32 socket_, const PCM * pcm)
5612 {
5613     {
5614         std::vector<std::shared_ptr<PciHandleType> > imcHandles;
5615 
5616         auto lastWorkingChannels = imcHandles.size();
5617         for (auto & ctrl: MCRegisterLocation)
5618         {
5619             for (auto & channel : ctrl)
5620             {
5621                 PciHandleType * handle = createIntelPerfMonDevice(groupnr, iMCbus, channel.first, channel.second, true);
5622                 if (handle) imcHandles.push_back(std::shared_ptr<PciHandleType>(handle));
5623             }
5624             if (imcHandles.size() > lastWorkingChannels)
5625             {
5626                 num_imc_channels.push_back((uint32)(imcHandles.size() - lastWorkingChannels));
5627             }
5628             lastWorkingChannels = imcHandles.size();
5629         }
5630 
5631         for (auto & handle : imcHandles)
5632         {
5633             if (cpu_model == PCM::KNL) {
5634                 imcPMUs.push_back(
5635                     UncorePMU(
5636                         std::make_shared<PCICFGRegister32>(handle, KNX_MC_CH_PCI_PMON_BOX_CTL_ADDR),
5637                         std::make_shared<PCICFGRegister32>(handle, KNX_MC_CH_PCI_PMON_CTL0_ADDR),
5638                         std::make_shared<PCICFGRegister32>(handle, KNX_MC_CH_PCI_PMON_CTL1_ADDR),
5639                         std::make_shared<PCICFGRegister32>(handle, KNX_MC_CH_PCI_PMON_CTL2_ADDR),
5640                         std::make_shared<PCICFGRegister32>(handle, KNX_MC_CH_PCI_PMON_CTL3_ADDR),
5641                         std::make_shared<PCICFGRegister64>(handle, KNX_MC_CH_PCI_PMON_CTR0_ADDR),
5642                         std::make_shared<PCICFGRegister64>(handle, KNX_MC_CH_PCI_PMON_CTR1_ADDR),
5643                         std::make_shared<PCICFGRegister64>(handle, KNX_MC_CH_PCI_PMON_CTR2_ADDR),
5644                         std::make_shared<PCICFGRegister64>(handle, KNX_MC_CH_PCI_PMON_CTR3_ADDR),
5645                         std::make_shared<PCICFGRegister32>(handle, KNX_MC_CH_PCI_PMON_FIXED_CTL_ADDR),
5646                         std::make_shared<PCICFGRegister64>(handle, KNX_MC_CH_PCI_PMON_FIXED_CTR_ADDR))
5647                 );
5648             }
5649             else {
5650                 imcPMUs.push_back(
5651                     UncorePMU(
5652                         std::make_shared<PCICFGRegister32>(handle, XPF_MC_CH_PCI_PMON_BOX_CTL_ADDR),
5653                         std::make_shared<PCICFGRegister32>(handle, XPF_MC_CH_PCI_PMON_CTL0_ADDR),
5654                         std::make_shared<PCICFGRegister32>(handle, XPF_MC_CH_PCI_PMON_CTL1_ADDR),
5655                         std::make_shared<PCICFGRegister32>(handle, XPF_MC_CH_PCI_PMON_CTL2_ADDR),
5656                         std::make_shared<PCICFGRegister32>(handle, XPF_MC_CH_PCI_PMON_CTL3_ADDR),
5657                         std::make_shared<PCICFGRegister64>(handle, XPF_MC_CH_PCI_PMON_CTR0_ADDR),
5658                         std::make_shared<PCICFGRegister64>(handle, XPF_MC_CH_PCI_PMON_CTR1_ADDR),
5659                         std::make_shared<PCICFGRegister64>(handle, XPF_MC_CH_PCI_PMON_CTR2_ADDR),
5660                         std::make_shared<PCICFGRegister64>(handle, XPF_MC_CH_PCI_PMON_CTR3_ADDR),
5661                         std::make_shared<PCICFGRegister32>(handle, XPF_MC_CH_PCI_PMON_FIXED_CTL_ADDR),
5662                         std::make_shared<PCICFGRegister64>(handle, XPF_MC_CH_PCI_PMON_FIXED_CTR_ADDR))
5663                 );
5664             }
5665         }
5666     }
5667 
5668     {
5669         std::vector<std::shared_ptr<PciHandleType> > m2mHandles;
5670 
5671         if (M2Mbus >= 0)
5672         {
5673             for (auto & reg : M2MRegisterLocation)
5674             {
5675                 PciHandleType * handle = createIntelPerfMonDevice(groupnr, M2Mbus, reg.first, reg.second, true);
5676                 if (handle) m2mHandles.push_back(std::shared_ptr<PciHandleType>(handle));
5677             }
5678         }
5679 
5680         for (auto & handle : m2mHandles)
5681         {
5682             if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE)
5683             {
5684                 m2mPMUs.push_back(
5685                     UncorePMU(
5686                         std::make_shared<PCICFGRegister32>(handle, SERVER_M2M_PCI_PMON_BOX_CTL_ADDR),
5687                         std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTL0_ADDR),
5688                         std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTL1_ADDR),
5689                         std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTL2_ADDR),
5690                         std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTL3_ADDR),
5691                         std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTR0_ADDR),
5692                         std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTR1_ADDR),
5693                         std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTR2_ADDR),
5694                         std::make_shared<PCICFGRegister64>(handle, SERVER_M2M_PCI_PMON_CTR3_ADDR)
5695                     )
5696                 );
5697             }
5698             else
5699             {
5700                 m2mPMUs.push_back(
5701                     UncorePMU(
5702                         std::make_shared<PCICFGRegister32>(handle, SKX_M2M_PCI_PMON_BOX_CTL_ADDR),
5703                         std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTL0_ADDR),
5704                         std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTL1_ADDR),
5705                         std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTL2_ADDR),
5706                         std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTL3_ADDR),
5707                         std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTR0_ADDR),
5708                         std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTR1_ADDR),
5709                         std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTR2_ADDR),
5710                         std::make_shared<PCICFGRegister64>(handle, SKX_M2M_PCI_PMON_CTR3_ADDR)
5711                     )
5712                 );
5713             }
5714         }
5715     }
5716 
5717     int numChannels = 0;
5718 
5719     if (cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::ICX)
5720     {
5721         numChannels = 2;
5722     }
5723 
5724     if (numChannels > 0)
5725     {
5726         initSocket2Ubox0Bus();
5727         if (socket_ < socket2UBOX0bus.size())
5728         {
5729             auto memBars = getServerMemBars((uint32)m2mPMUs.size(), socket2UBOX0bus[socket_].first, socket2UBOX0bus[socket_].second);
5730             for (auto & memBar : memBars)
5731             {
5732                 for (int channel = 0; channel < numChannels; ++channel)
5733                 {
5734                     auto handle = std::make_shared<MMIORange>(memBar + SERVER_MC_CH_PMON_BASE_ADDR + channel * SERVER_MC_CH_PMON_STEP, SERVER_MC_CH_PMON_SIZE, false);
5735                     imcPMUs.push_back(
5736                         UncorePMU(
5737                             std::make_shared<MMIORegister32>(handle, SERVER_MC_CH_PMON_BOX_CTL_OFFSET),
5738                             std::make_shared<MMIORegister32>(handle, SERVER_MC_CH_PMON_CTL0_OFFSET),
5739                             std::make_shared<MMIORegister32>(handle, SERVER_MC_CH_PMON_CTL1_OFFSET),
5740                             std::make_shared<MMIORegister32>(handle, SERVER_MC_CH_PMON_CTL2_OFFSET),
5741                             std::make_shared<MMIORegister32>(handle, SERVER_MC_CH_PMON_CTL3_OFFSET),
5742                             std::make_shared<MMIORegister64>(handle, SERVER_MC_CH_PMON_CTR0_OFFSET),
5743                             std::make_shared<MMIORegister64>(handle, SERVER_MC_CH_PMON_CTR1_OFFSET),
5744                             std::make_shared<MMIORegister64>(handle, SERVER_MC_CH_PMON_CTR2_OFFSET),
5745                             std::make_shared<MMIORegister64>(handle, SERVER_MC_CH_PMON_CTR3_OFFSET),
5746                             std::make_shared<MMIORegister32>(handle, SERVER_MC_CH_PMON_FIXED_CTL_OFFSET),
5747                             std::make_shared<MMIORegister64>(handle, SERVER_MC_CH_PMON_FIXED_CTR_OFFSET)
5748                         )
5749                     );
5750                 }
5751                 num_imc_channels.push_back(numChannels);
5752             }
5753         }
5754         else
5755         {
5756             std::cerr << "ERROR: socket " << socket_ << " is not found in socket2UBOX0bus. socket2UBOX0bus.size =" << socket2UBOX0bus.size() << std::endl;
5757         }
5758     }
5759 
5760     if (imcPMUs.empty())
5761     {
5762         std::cerr << "PCM error: no memory controllers found.\n";
5763         throw std::exception();
5764     }
5765 
5766     if (cpu_model == PCM::KNL)
5767     {
5768         std::vector<std::shared_ptr<PciHandleType> > edcHandles;
5769 
5770         for (auto & reg : EDCRegisterLocation)
5771         {
5772             PciHandleType * handle = createIntelPerfMonDevice(groupnr, iMCbus, reg.first, reg.second, true);
5773             if (handle) edcHandles.push_back(std::shared_ptr<PciHandleType>(handle));
5774         }
5775 
5776         for (auto & handle : edcHandles)
5777         {
5778             edcPMUs.push_back(
5779                 UncorePMU(
5780                     std::make_shared<PCICFGRegister32>(handle, KNX_EDC_CH_PCI_PMON_BOX_CTL_ADDR),
5781                     std::make_shared<PCICFGRegister32>(handle, KNX_EDC_CH_PCI_PMON_CTL0_ADDR),
5782                     std::make_shared<PCICFGRegister32>(handle, KNX_EDC_CH_PCI_PMON_CTL1_ADDR),
5783                     std::make_shared<PCICFGRegister32>(handle, KNX_EDC_CH_PCI_PMON_CTL2_ADDR),
5784                     std::make_shared<PCICFGRegister32>(handle, KNX_EDC_CH_PCI_PMON_CTL3_ADDR),
5785                     std::make_shared<PCICFGRegister64>(handle, KNX_EDC_CH_PCI_PMON_CTR0_ADDR),
5786                     std::make_shared<PCICFGRegister64>(handle, KNX_EDC_CH_PCI_PMON_CTR1_ADDR),
5787                     std::make_shared<PCICFGRegister64>(handle, KNX_EDC_CH_PCI_PMON_CTR2_ADDR),
5788                     std::make_shared<PCICFGRegister64>(handle, KNX_EDC_CH_PCI_PMON_CTR3_ADDR),
5789                     std::make_shared<PCICFGRegister32>(handle, KNX_EDC_CH_PCI_PMON_FIXED_CTL_ADDR),
5790                     std::make_shared<PCICFGRegister64>(handle, KNX_EDC_CH_PCI_PMON_FIXED_CTR_ADDR))
5791             );
5792         }
5793     }
5794 
5795     std::vector<std::shared_ptr<PciHandleType> > m3upiHandles;
5796     if (UPIbus >= 0)
5797     {
5798         for (auto& reg : M3UPIRegisterLocation)
5799         {
5800             PciHandleType* handle = createIntelPerfMonDevice(groupnr, UPIbus, reg.first, reg.second, true);
5801             if (handle) m3upiHandles.push_back(std::shared_ptr<PciHandleType>(handle));
5802         }
5803     }
5804     for (auto& handle : m3upiHandles)
5805     {
5806         if (cpu_model == PCM::ICX)
5807         {
5808             m3upiPMUs.push_back(
5809                 UncorePMU(
5810                     std::make_shared<PCICFGRegister32>(handle, ICX_M3UPI_PCI_PMON_BOX_CTL_ADDR),
5811                     std::make_shared<PCICFGRegister32>(handle, ICX_M3UPI_PCI_PMON_CTL0_ADDR),
5812                     std::make_shared<PCICFGRegister32>(handle, ICX_M3UPI_PCI_PMON_CTL1_ADDR),
5813                     std::make_shared<PCICFGRegister32>(handle, ICX_M3UPI_PCI_PMON_CTL2_ADDR),
5814                     std::make_shared<PCICFGRegister32>(handle, ICX_M3UPI_PCI_PMON_CTL3_ADDR),
5815                     std::make_shared<PCICFGRegister64>(handle, ICX_M3UPI_PCI_PMON_CTR0_ADDR),
5816                     std::make_shared<PCICFGRegister64>(handle, ICX_M3UPI_PCI_PMON_CTR1_ADDR),
5817                     std::make_shared<PCICFGRegister64>(handle, ICX_M3UPI_PCI_PMON_CTR2_ADDR),
5818                     std::make_shared<PCICFGRegister64>(handle, ICX_M3UPI_PCI_PMON_CTR3_ADDR)
5819                 )
5820             );
5821         }
5822         else
5823         {
5824             m3upiPMUs.push_back(
5825                 UncorePMU(
5826                     std::make_shared<PCICFGRegister32>(handle, M3UPI_PCI_PMON_BOX_CTL_ADDR),
5827                     std::make_shared<PCICFGRegister32>(handle, M3UPI_PCI_PMON_CTL0_ADDR),
5828                     std::make_shared<PCICFGRegister32>(handle, M3UPI_PCI_PMON_CTL1_ADDR),
5829                     std::make_shared<PCICFGRegister32>(handle, M3UPI_PCI_PMON_CTL2_ADDR),
5830                     std::shared_ptr<PCICFGRegister32>(),
5831                     std::make_shared<PCICFGRegister64>(handle, M3UPI_PCI_PMON_CTR0_ADDR),
5832                     std::make_shared<PCICFGRegister64>(handle, M3UPI_PCI_PMON_CTR1_ADDR),
5833                     std::make_shared<PCICFGRegister64>(handle, M3UPI_PCI_PMON_CTR2_ADDR),
5834                     std::shared_ptr<PCICFGRegister64>()
5835                 )
5836             );
5837         }
5838     }
5839 
5840     {
5841         std::vector<std::shared_ptr<PciHandleType> > haHandles;
5842         for (auto & reg : HARegisterLocation)
5843         {
5844             auto handle = createIntelPerfMonDevice(groupnr, iMCbus, reg.first, reg.second, true);
5845             if (handle) haHandles.push_back(std::shared_ptr<PciHandleType>(handle));
5846         }
5847 
5848         for (auto & handle : haHandles)
5849         {
5850             haPMUs.push_back(
5851                 UncorePMU(
5852                     std::make_shared<PCICFGRegister32>(handle, XPF_HA_PCI_PMON_BOX_CTL_ADDR),
5853                     std::make_shared<PCICFGRegister32>(handle, XPF_HA_PCI_PMON_CTL0_ADDR),
5854                     std::make_shared<PCICFGRegister32>(handle, XPF_HA_PCI_PMON_CTL1_ADDR),
5855                     std::make_shared<PCICFGRegister32>(handle, XPF_HA_PCI_PMON_CTL2_ADDR),
5856                     std::make_shared<PCICFGRegister32>(handle, XPF_HA_PCI_PMON_CTL3_ADDR),
5857                     std::make_shared<PCICFGRegister64>(handle, XPF_HA_PCI_PMON_CTR0_ADDR),
5858                     std::make_shared<PCICFGRegister64>(handle, XPF_HA_PCI_PMON_CTR1_ADDR),
5859                     std::make_shared<PCICFGRegister64>(handle, XPF_HA_PCI_PMON_CTR2_ADDR),
5860                     std::make_shared<PCICFGRegister64>(handle, XPF_HA_PCI_PMON_CTR3_ADDR)
5861                 )
5862             );
5863         }
5864     }
5865 
5866     if (pcm->getNumSockets() == 1) {
5867         /*
5868          * For single socket systems, do not worry at all about QPI ports.  This
5869          *  eliminates QPI LL programming error messages on single socket systems
5870          *  with BIOS that hides QPI performance counting PCI functions.  It also
5871          *  eliminates register programming that is not needed since no QPI traffic
5872          *  is possible with single socket systems.
5873          */
5874         xpiPMUs.clear();
5875         return;
5876     }
5877 
5878 #ifdef PCM_NOQPI
5879     xpiPMUs.clear();
5880     std::cerr << getNumMC() << " memory controllers detected with total number of " << imcPMUs.size() << " channels. " <<
5881         m2mPMUs.size() << " M2M (mesh to memory) blocks detected. "
5882         << haPMUs.size() << " Home Agents detected. "
5883         << m3upiPMUs.size() << " M3UPI blocks detected. "
5884         "\n";
5885     return;
5886 #endif
5887 
5888     std::vector<std::shared_ptr<PciHandleType> > qpiLLHandles;
5889     auto xPI = pcm->xPI();
5890     try
5891     {
5892         for (size_t i = 0; i < XPIRegisterLocation.size(); ++i)
5893         {
5894             PciHandleType * handle = createIntelPerfMonDevice(groupnr, UPIbus, XPIRegisterLocation[i].first, XPIRegisterLocation[i].second, true);
5895             if (handle)
5896                 qpiLLHandles.push_back(std::shared_ptr<PciHandleType>(handle));
5897             else
5898             {
5899                 if (i == 0 || i == 1)
5900                 {
5901                     std::cerr << "ERROR: " << xPI << " LL monitoring device (" << std::hex << groupnr << ":" << UPIbus << ":" << XPIRegisterLocation[i].first << ":" <<
5902                         XPIRegisterLocation[i].second << ") is missing. The " << xPI << " statistics will be incomplete or missing." << std::dec << "\n";
5903                 }
5904                 else if (pcm->getCPUBrandString().find("E7") != std::string::npos) // Xeon E7
5905                 {
5906                     std::cerr << "ERROR: " << xPI << " LL performance monitoring device for the third " << xPI << " link was not found on " << pcm->getCPUBrandString() <<
5907                         " processor in socket " << socket_ << ". Possibly BIOS hides the device. The " << xPI << " statistics will be incomplete or missing.\n";
5908                 }
5909             }
5910         }
5911     }
5912     catch (...)
5913     {
5914         std::cerr << "PCM Error: can not create " << xPI << " LL handles.\n";
5915         throw std::exception();
5916     }
5917 
5918     for (auto & handle : qpiLLHandles)
5919     {
5920         if (cpu_model == PCM::SKX)
5921         {
5922             xpiPMUs.push_back(
5923                 UncorePMU(
5924                     std::make_shared<PCICFGRegister32>(handle, U_L_PCI_PMON_BOX_CTL_ADDR),
5925                     std::make_shared<PCICFGRegister32>(handle, U_L_PCI_PMON_CTL0_ADDR),
5926                     std::make_shared<PCICFGRegister32>(handle, U_L_PCI_PMON_CTL1_ADDR),
5927                     std::make_shared<PCICFGRegister32>(handle, U_L_PCI_PMON_CTL2_ADDR),
5928                     std::make_shared<PCICFGRegister32>(handle, U_L_PCI_PMON_CTL3_ADDR),
5929                     std::make_shared<PCICFGRegister64>(handle, U_L_PCI_PMON_CTR0_ADDR),
5930                     std::make_shared<PCICFGRegister64>(handle, U_L_PCI_PMON_CTR1_ADDR),
5931                     std::make_shared<PCICFGRegister64>(handle, U_L_PCI_PMON_CTR2_ADDR),
5932                     std::make_shared<PCICFGRegister64>(handle, U_L_PCI_PMON_CTR3_ADDR)
5933                     )
5934             );
5935         }
5936         else if (cpu_model == PCM::ICX)
5937         {
5938             xpiPMUs.push_back(
5939                 UncorePMU(
5940                     std::make_shared<PCICFGRegister32>(handle, ICX_UPI_PCI_PMON_BOX_CTL_ADDR),
5941                     std::make_shared<PCICFGRegister32>(handle, ICX_UPI_PCI_PMON_CTL0_ADDR),
5942                     std::make_shared<PCICFGRegister32>(handle, ICX_UPI_PCI_PMON_CTL1_ADDR),
5943                     std::make_shared<PCICFGRegister32>(handle, ICX_UPI_PCI_PMON_CTL2_ADDR),
5944                     std::make_shared<PCICFGRegister32>(handle, ICX_UPI_PCI_PMON_CTL3_ADDR),
5945                     std::make_shared<PCICFGRegister64>(handle, ICX_UPI_PCI_PMON_CTR0_ADDR),
5946                     std::make_shared<PCICFGRegister64>(handle, ICX_UPI_PCI_PMON_CTR1_ADDR),
5947                     std::make_shared<PCICFGRegister64>(handle, ICX_UPI_PCI_PMON_CTR2_ADDR),
5948                     std::make_shared<PCICFGRegister64>(handle, ICX_UPI_PCI_PMON_CTR3_ADDR)
5949                 )
5950             );
5951         }
5952         else
5953         {
5954             xpiPMUs.push_back(
5955                 UncorePMU(
5956                     std::make_shared<PCICFGRegister32>(handle, Q_P_PCI_PMON_BOX_CTL_ADDR),
5957                     std::make_shared<PCICFGRegister32>(handle, Q_P_PCI_PMON_CTL0_ADDR),
5958                     std::make_shared<PCICFGRegister32>(handle, Q_P_PCI_PMON_CTL1_ADDR),
5959                     std::make_shared<PCICFGRegister32>(handle, Q_P_PCI_PMON_CTL2_ADDR),
5960                     std::make_shared<PCICFGRegister32>(handle, Q_P_PCI_PMON_CTL3_ADDR),
5961                     std::make_shared<PCICFGRegister64>(handle, Q_P_PCI_PMON_CTR0_ADDR),
5962                     std::make_shared<PCICFGRegister64>(handle, Q_P_PCI_PMON_CTR1_ADDR),
5963                     std::make_shared<PCICFGRegister64>(handle, Q_P_PCI_PMON_CTR2_ADDR),
5964                     std::make_shared<PCICFGRegister64>(handle, Q_P_PCI_PMON_CTR3_ADDR)
5965                     )
5966             );
5967         }
5968     }
5969 }
5970 
5971 
5972 #ifdef PCM_USE_PERF
5973 class PerfVirtualDummyUnitControlRegister : public HWRegister
5974 {
5975     uint64 lastValue;
5976 public:
PerfVirtualDummyUnitControlRegister()5977     PerfVirtualDummyUnitControlRegister() : lastValue(0) {}
operator =(uint64 val)5978     void operator = (uint64 val) override
5979     {
5980         lastValue = val;
5981     }
operator uint64()5982     operator uint64 () override
5983     {
5984         return lastValue;
5985     }
5986 };
5987 
5988 class PerfVirtualFilterRegister;
5989 
5990 class PerfVirtualControlRegister : public HWRegister
5991 {
5992     friend class PerfVirtualCounterRegister;
5993     friend class PerfVirtualFilterRegister;
5994     int fd;
5995     int socket;
5996     int pmuID;
5997     perf_event_attr event;
5998     bool fixed;
close()5999     void close()
6000     {
6001         if (fd >= 0)
6002         {
6003             ::close(fd);
6004             fd = -1;
6005         }
6006     }
6007 public:
PerfVirtualControlRegister(int socket_,int pmuID_,bool fixed_=false)6008     PerfVirtualControlRegister(int socket_, int pmuID_, bool fixed_ = false) :
6009         fd(-1),
6010         socket(socket_),
6011         pmuID(pmuID_),
6012         fixed(fixed_)
6013     {
6014         event = PCM_init_perf_event_attr(false);
6015         event.type = pmuID;
6016     }
operator =(uint64 val)6017     void operator = (uint64 val) override
6018     {
6019         close();
6020         event.config = fixed ? 0xff : val;
6021         const auto core = PCM::getInstance()->socketRefCore[socket];
6022         if ((fd = syscall(SYS_perf_event_open, &event, -1, core, -1, 0)) <= 0)
6023         {
6024             std::cerr << "Linux Perf: Error on programming PMU " << pmuID << ":  " << strerror(errno) << "\n";
6025             std::cerr << "config: 0x" << std::hex << event.config << " config1: 0x" << event.config1 << " config2: 0x" << event.config2 << std::dec << "\n";
6026             if (errno == 24) std::cerr << "try executing 'ulimit -n 10000' to increase the limit on the number of open files.\n";
6027             return;
6028         }
6029     }
operator uint64()6030     operator uint64 () override
6031     {
6032         return event.config;
6033     }
~PerfVirtualControlRegister()6034     ~PerfVirtualControlRegister()
6035     {
6036         close();
6037     }
getFD() const6038     int getFD() const { return fd; }
getPMUID() const6039     int getPMUID() const { return pmuID; }
6040 };
6041 
6042 class PerfVirtualCounterRegister : public HWRegister
6043 {
6044     std::shared_ptr<PerfVirtualControlRegister> controlReg;
6045 public:
PerfVirtualCounterRegister(const std::shared_ptr<PerfVirtualControlRegister> & controlReg_)6046     PerfVirtualCounterRegister(const std::shared_ptr<PerfVirtualControlRegister> & controlReg_) : controlReg(controlReg_)
6047     {
6048     }
operator =(uint64)6049     void operator = (uint64 /* val */) override
6050     {
6051         // no-op
6052     }
operator uint64()6053     operator uint64 () override
6054     {
6055         uint64 result = 0;
6056         if (controlReg.get() && (controlReg->getFD() >= 0))
6057         {
6058             int status = ::read(controlReg->getFD(), &result, sizeof(result));
6059             if (status != sizeof(result))
6060             {
6061                 std::cerr << "PCM Error: failed to read from Linux perf handle " << controlReg->getFD() << " PMU " << controlReg->getPMUID() << "\n";
6062             }
6063         }
6064         return result;
6065     }
6066 };
6067 
6068 class PerfVirtualFilterRegister : public HWRegister
6069 {
6070     uint64 lastValue;
6071     std::array<std::shared_ptr<PerfVirtualControlRegister>, 4> controlRegs;
6072     int filterNr;
6073 public:
PerfVirtualFilterRegister(std::array<std::shared_ptr<PerfVirtualControlRegister>,4> & controlRegs_,int filterNr_)6074     PerfVirtualFilterRegister(std::array<std::shared_ptr<PerfVirtualControlRegister>, 4> & controlRegs_, int filterNr_) :
6075             lastValue(0),
6076             controlRegs(controlRegs_),
6077             filterNr(filterNr_)
6078     {
6079     }
operator =(uint64 val)6080     void operator = (uint64 val) override
6081     {
6082         lastValue = val;
6083         for (auto & ctl: controlRegs)
6084         {
6085             union {
6086                 uint64 config1;
6087                 uint32 config1HL[2];
6088             } cvt;
6089             cvt.config1 = ctl->event.config1;
6090 	    cvt.config1HL[filterNr] = val;
6091 	    ctl->event.config1 = cvt.config1;
6092         }
6093     }
operator uint64()6094     operator uint64 () override
6095     {
6096         return lastValue;
6097     }
6098 };
6099 
enumeratePerfPMUs(const std::string & type,int max_id)6100 std::vector<int> enumeratePerfPMUs(const std::string & type, int max_id)
6101 {
6102     auto getPerfPMUID = [](const std::string & type, int num)
6103     {
6104         int id = -1;
6105         std::ostringstream pmuIDPath(std::ostringstream::out);
6106         pmuIDPath << std::string("/sys/bus/event_source/devices/uncore_") << type;
6107         if (num != -1)
6108         {
6109             pmuIDPath << "_" << num;
6110         }
6111         pmuIDPath << "/type";
6112         const std::string pmuIDStr = readSysFS(pmuIDPath.str().c_str(), true);
6113         if (pmuIDStr.size())
6114         {
6115             id = std::atoi(pmuIDStr.c_str());
6116         }
6117         return id;
6118     };
6119     std::vector<int> ids;
6120     for (int i = -1; i < max_id; ++i)
6121     {
6122         int pmuID = getPerfPMUID(type, i);
6123         if (pmuID > 0)
6124         {
6125             // std::cout << "DEBUG: " << type << " pmu id " << pmuID << " found\n";
6126             ids.push_back(pmuID);
6127         }
6128     }
6129     return ids;
6130 }
6131 
populatePerfPMUs(unsigned socket_,const std::vector<int> & ids,std::vector<UncorePMU> & pmus,bool fixed,bool filter0,bool filter1)6132 void populatePerfPMUs(unsigned socket_, const std::vector<int> & ids, std::vector<UncorePMU> & pmus, bool fixed, bool filter0, bool filter1)
6133 {
6134     for (const auto & id : ids)
6135     {
6136         std::array<std::shared_ptr<PerfVirtualControlRegister>, 4> controlRegs = {
6137             std::make_shared<PerfVirtualControlRegister>(socket_, id),
6138                     std::make_shared<PerfVirtualControlRegister>(socket_, id),
6139                     std::make_shared<PerfVirtualControlRegister>(socket_, id),
6140                     std::make_shared<PerfVirtualControlRegister>(socket_, id)
6141         };
6142         std::shared_ptr<PerfVirtualCounterRegister> counterReg0 = std::make_shared<PerfVirtualCounterRegister>(controlRegs[0]);
6143         std::shared_ptr<PerfVirtualCounterRegister> counterReg1 = std::make_shared<PerfVirtualCounterRegister>(controlRegs[1]);
6144         std::shared_ptr<PerfVirtualCounterRegister> counterReg2 = std::make_shared<PerfVirtualCounterRegister>(controlRegs[2]);
6145         std::shared_ptr<PerfVirtualCounterRegister> counterReg3 = std::make_shared<PerfVirtualCounterRegister>(controlRegs[3]);
6146         std::shared_ptr<PerfVirtualControlRegister> fixedControlReg = std::make_shared<PerfVirtualControlRegister>(socket_, id, true);
6147         std::shared_ptr<PerfVirtualCounterRegister> fixedCounterReg = std::make_shared<PerfVirtualCounterRegister>(fixedControlReg);
6148         std::shared_ptr<PerfVirtualFilterRegister> filterReg0 = std::make_shared<PerfVirtualFilterRegister>(controlRegs, 0);
6149         std::shared_ptr<PerfVirtualFilterRegister> filterReg1 = std::make_shared<PerfVirtualFilterRegister>(controlRegs, 1);
6150         pmus.push_back(
6151             UncorePMU(
6152                 std::make_shared<PerfVirtualDummyUnitControlRegister>(),
6153                 controlRegs[0],
6154                 controlRegs[1],
6155                 controlRegs[2],
6156                 controlRegs[3],
6157                 counterReg0,
6158                 counterReg1,
6159                 counterReg2,
6160                 counterReg3,
6161                 fixed ? fixedControlReg : std::shared_ptr<HWRegister>(),
6162                 fixed ? fixedCounterReg : std::shared_ptr<HWRegister>(),
6163                 filter0 ? filterReg0 : std::shared_ptr<HWRegister>(),
6164                 filter1 ? filterReg1 : std::shared_ptr<HWRegister>()
6165             )
6166         );
6167     }
6168 }
6169 #endif
6170 
initPerf(uint32 socket_,const PCM *)6171 void ServerPCICFGUncore::initPerf(uint32 socket_, const PCM * /*pcm*/)
6172 {
6173 #ifdef PCM_USE_PERF
6174     auto imcIDs = enumeratePerfPMUs("imc", 100);
6175     auto m2mIDs = enumeratePerfPMUs("m2m", 100);
6176     auto haIDs = enumeratePerfPMUs("ha", 100);
6177     auto numMemControllers = std::max(m2mIDs.size(), haIDs.size());
6178     for (size_t i = 0; i < numMemControllers; ++i)
6179     {
6180         const int channelsPerController = imcIDs.size() / numMemControllers;
6181         num_imc_channels.push_back(channelsPerController);
6182     }
6183     populatePerfPMUs(socket_, imcIDs, imcPMUs, true);
6184     populatePerfPMUs(socket_, m2mIDs, m2mPMUs, false);
6185     populatePerfPMUs(socket_, enumeratePerfPMUs("qpi", 100), xpiPMUs, false);
6186     populatePerfPMUs(socket_, enumeratePerfPMUs("upi", 100), xpiPMUs, false);
6187     populatePerfPMUs(socket_, enumeratePerfPMUs("m3upi", 100), m3upiPMUs, false);
6188     populatePerfPMUs(socket_, haIDs, haPMUs, false);
6189 #endif
6190 }
6191 
getNumMCChannels(const uint32 controller) const6192 size_t ServerPCICFGUncore::getNumMCChannels(const uint32 controller) const
6193 {
6194     if (controller < num_imc_channels.size())
6195     {
6196         return num_imc_channels[controller];
6197     }
6198     return 0;
6199 }
6200 
~ServerPCICFGUncore()6201 ServerPCICFGUncore::~ServerPCICFGUncore()
6202 {
6203 }
6204 
6205 
programServerUncoreMemoryMetrics(const ServerUncoreMemoryMetrics & metrics,const int rankA,const int rankB)6206 void ServerPCICFGUncore::programServerUncoreMemoryMetrics(const ServerUncoreMemoryMetrics & metrics, const int rankA, const int rankB)
6207 {
6208     PCM * pcm = PCM::getInstance();
6209     uint32 MCCntConfig[4] = {0,0,0,0};
6210     uint32 EDCCntConfig[4] = {0,0,0,0};
6211     if(rankA < 0 && rankB < 0)
6212     {
6213         auto setEvents2_3 = [&](const uint32 partial_write_event) {
6214             auto noPmem = [&pcm]() -> bool
6215             {
6216                 if (pcm->PMMTrafficMetricsAvailable() == false)
6217                 {
6218                     std::cerr << "PCM Error: PMM/Pmem metrics are not available on your platform\n";
6219                     return true;
6220                 }
6221                 return false;
6222             };
6223             switch (metrics)
6224             {
6225                 case PmemMemoryMode:
6226                 case PmemMixedMode:
6227                     if (noPmem()) return false;
6228                     MCCntConfig[EventPosition::PMM_MM_MISS_CLEAN] = MC_CH_PCI_PMON_CTL_EVENT(0xd3) + MC_CH_PCI_PMON_CTL_UMASK(2); // monitor TAGCHK.MISS_CLEAN on counter 2
6229                     MCCntConfig[EventPosition::PMM_MM_MISS_DIRTY] = MC_CH_PCI_PMON_CTL_EVENT(0xd3) + MC_CH_PCI_PMON_CTL_UMASK(4); // monitor TAGCHK.MISS_DIRTY on counter 3
6230                     break;
6231                 case Pmem:
6232                     if (noPmem()) return false;
6233                     MCCntConfig[EventPosition::PMM_READ] = MC_CH_PCI_PMON_CTL_EVENT(0xe3);  // monitor PMM_RDQ_REQUESTS on counter 2
6234                     MCCntConfig[EventPosition::PMM_WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0xe7); // monitor PMM_WPQ_REQUESTS on counter 3
6235                     break;
6236                 case PartialWrites:
6237                     MCCntConfig[EventPosition::PARTIAL] = partial_write_event;
6238                     break;
6239                 default:
6240                     std::cerr << "PCM Error: unknown metrics: " << metrics << "\n";
6241                     return false;
6242             }
6243             return true;
6244         };
6245         switch(cpu_model)
6246         {
6247         case PCM::KNL:
6248             MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(1);  // monitor reads on counter 0: CAS.RD
6249             MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(2);  // monitor reads on counter 1: CAS.WR
6250             EDCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x01) + MC_CH_PCI_PMON_CTL_UMASK(1);  // monitor reads on counter 0: RPQ
6251             EDCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x02) + MC_CH_PCI_PMON_CTL_UMASK(1);  // monitor reads on counter 1: WPQ
6252             break;
6253         case PCM::SNOWRIDGE:
6254         case PCM::ICX:
6255             if (metrics == PmemMemoryMode)
6256             {
6257                 MCCntConfig[EventPosition::NM_HIT] = MC_CH_PCI_PMON_CTL_EVENT(0xd3) + MC_CH_PCI_PMON_CTL_UMASK(1);  // monitor reads on counter 0: UNC_M_TAGCHK.HIT
6258             }
6259             else
6260             {
6261                 MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x0f);  // monitor reads on counter 0: CAS_COUNT.RD
6262                 MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x30); // monitor writes on counter 1: CAS_COUNT.WR
6263             }
6264             if (setEvents2_3(MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x0c)) == false) // monitor partial writes on counter 2: CAS_COUNT.RD_UNDERFILL
6265             {
6266                 return;
6267             }
6268             break;
6269         default:
6270             MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(3);  // monitor reads on counter 0: CAS_COUNT.RD
6271             MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(12); // monitor writes on counter 1: CAS_COUNT.WR
6272             if (setEvents2_3(MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(2)) == false) // monitor partial writes on counter 2: CAS_COUNT.RD_UNDERFILL
6273             {
6274                 return;
6275             }
6276         }
6277     } else {
6278         switch(cpu_model)
6279         {
6280         case PCM::IVYTOWN:
6281             MCCntConfig[EventPosition::READ_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(0xff); // RD_CAS_RANK(rankA) all banks
6282             MCCntConfig[EventPosition::WRITE_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(0xff); // WR_CAS_RANK(rankA) all banks
6283             MCCntConfig[EventPosition::READ_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(0xff); // RD_CAS_RANK(rankB) all banks
6284             MCCntConfig[EventPosition::WRITE_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(0xff); // WR_CAS_RANK(rankB) all banks
6285             break;
6286         case PCM::HASWELLX:
6287         case PCM::BDX_DE:
6288         case PCM::BDX:
6289         case PCM::SKX:
6290             MCCntConfig[EventPosition::READ_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(16); // RD_CAS_RANK(rankA) all banks
6291             MCCntConfig[EventPosition::WRITE_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(16); // WR_CAS_RANK(rankA) all banks
6292             MCCntConfig[EventPosition::READ_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(16); // RD_CAS_RANK(rankB) all banks
6293             MCCntConfig[EventPosition::WRITE_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(16); // WR_CAS_RANK(rankB) all banks
6294             break;
6295         case PCM::ICX:
6296         case PCM::SNOWRIDGE:
6297             MCCntConfig[EventPosition::READ_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(0x28); // RD_CAS_RANK(rankA) all banks
6298             MCCntConfig[EventPosition::WRITE_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(0x28); // WR_CAS_RANK(rankA) all banks
6299             MCCntConfig[EventPosition::READ_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(0x28); // RD_CAS_RANK(rankB) all banks
6300             MCCntConfig[EventPosition::WRITE_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(0x28); // WR_CAS_RANK(rankB) all banks
6301             break;
6302         case PCM::KNL:
6303             MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(1);  // monitor reads on counter 0: CAS.RD
6304             MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(2);  // monitor reads on counter 1: CAS.WR
6305             EDCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x01) + MC_CH_PCI_PMON_CTL_UMASK(1);  // monitor reads on counter 0: RPQ
6306             EDCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x02) + MC_CH_PCI_PMON_CTL_UMASK(1);  // monitor reads on counter 1: WPQ
6307             break;
6308         default:
6309             std::cerr << "PCM Error: your processor " << pcm->getCPUBrandString() << " model " << cpu_model << " does not support the required performance events \n";
6310             return;
6311         }
6312     }
6313     programIMC(MCCntConfig);
6314     if(cpu_model == PCM::KNL) programEDC(EDCCntConfig);
6315 
6316     programM2M();
6317 
6318     xpiPMUs.clear(); // no QPI events used
6319     return;
6320 }
6321 
program()6322 void ServerPCICFGUncore::program()
6323 {
6324     PCM * pcm = PCM::getInstance();
6325     uint32 MCCntConfig[4] = {0, 0, 0, 0};
6326     uint32 EDCCntConfig[4] = {0, 0, 0, 0};
6327     switch(cpu_model)
6328     {
6329     case PCM::KNL:
6330         MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(1);  // monitor reads on counter 0: CAS_COUNT.RD
6331         MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(2); // monitor writes on counter 1: CAS_COUNT.WR
6332         EDCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x01) + MC_CH_PCI_PMON_CTL_UMASK(1);  // monitor reads on counter 0: RPQ
6333         EDCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x02) + MC_CH_PCI_PMON_CTL_UMASK(1);  // monitor reads on counter 1: WPQ
6334         break;
6335     case PCM::SNOWRIDGE:
6336     case PCM::ICX:
6337         MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x0f);  // monitor reads on counter 0: CAS_COUNT.RD
6338         MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x30); // monitor writes on counter 1: CAS_COUNT.WR
6339         break;
6340     default:
6341         MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(3);  // monitor reads on counter 0: CAS_COUNT.RD
6342         MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(12); // monitor writes on counter 1: CAS_COUNT.WR
6343     }
6344 
6345     if (pcm->PMMTrafficMetricsAvailable())
6346     {
6347         MCCntConfig[EventPosition::PMM_READ] = MC_CH_PCI_PMON_CTL_EVENT(0xe3); // monitor PMM_RDQ_REQUESTS on counter 2
6348         MCCntConfig[EventPosition::PMM_WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0xe7); // monitor PMM_WPQ_REQUESTS on counter 3
6349     }
6350 
6351     programIMC(MCCntConfig);
6352     if(cpu_model == PCM::KNL) programEDC(EDCCntConfig);
6353 
6354     programM2M();
6355 
6356     uint32 event[4];
6357     if (PCM::hasUPI(cpu_model))
6358     {
6359         // monitor TxL0_POWER_CYCLES
6360         event[0] = Q_P_PCI_PMON_CTL_EVENT(0x26);
6361         // monitor RxL_FLITS.ALL_DATA on counter 1
6362         event[1] = Q_P_PCI_PMON_CTL_EVENT(0x03) + Q_P_PCI_PMON_CTL_UMASK(0xF);
6363         // monitor TxL_FLITS.NON_DATA+ALL_DATA on counter 2
6364         event[2] = Q_P_PCI_PMON_CTL_EVENT(0x02) + Q_P_PCI_PMON_CTL_UMASK((0x97|0x0F));
6365         // monitor UPI CLOCKTICKS
6366         event[ServerUncoreCounterState::EventPosition::xPI_CLOCKTICKS] = Q_P_PCI_PMON_CTL_EVENT(0x01);
6367     }
6368     else
6369     {
6370         // monitor DRS data received on counter 0: RxL_FLITS_G1.DRS_DATA
6371         event[0] = Q_P_PCI_PMON_CTL_EVENT(0x02) + Q_P_PCI_PMON_CTL_EVENT_EXT + Q_P_PCI_PMON_CTL_UMASK(8);
6372         // monitor NCB data received on counter 1: RxL_FLITS_G2.NCB_DATA
6373         event[1] = Q_P_PCI_PMON_CTL_EVENT(0x03) + Q_P_PCI_PMON_CTL_EVENT_EXT + Q_P_PCI_PMON_CTL_UMASK(4);
6374         // monitor outgoing data+nondata flits on counter 2: TxL_FLITS_G0.DATA + TxL_FLITS_G0.NON_DATA
6375         event[2] = Q_P_PCI_PMON_CTL_EVENT(0x00) + Q_P_PCI_PMON_CTL_UMASK(6);
6376         // monitor QPI clocks
6377         event[ServerUncoreCounterState::EventPosition::xPI_CLOCKTICKS] = Q_P_PCI_PMON_CTL_EVENT(0x14); // QPI clocks (CLOCKTICKS)
6378     }
6379     programXPI(event);
6380     programHA();
6381 }
6382 
programXPI(const uint32 * event)6383 void ServerPCICFGUncore::programXPI(const uint32 * event)
6384 {
6385     const uint32 extra = PCM::hasUPI(cpu_model) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN;
6386     for (uint32 i = 0; i < (uint32)xpiPMUs.size(); ++i)
6387     {
6388         // QPI LL PMU
6389 
6390         if (xpiPMUs[i].initFreeze(extra,
6391             "       Please see BIOS options to enable the export of QPI/UPI performance monitoring devices (devices 8 and 9: function 2).\n")
6392             == false)
6393         {
6394             std::cout << "Link " << (i + 1) << " is disabled\n";
6395             continue;
6396         }
6397 
6398         PCM::program(xpiPMUs[i], event, event + 4, extra);
6399     }
6400     cleanupQPIHandles();
6401 }
6402 
cleanupQPIHandles()6403 void ServerPCICFGUncore::cleanupQPIHandles()
6404 {
6405     for(auto i = xpiPMUs.begin(); i != xpiPMUs.end(); ++i)
6406     {
6407         if (!i->valid())
6408         {
6409             xpiPMUs.erase(i);
6410             cleanupQPIHandles();
6411             return;
6412         }
6413     }
6414 }
6415 
cleanupPMUs()6416 void ServerPCICFGUncore::cleanupPMUs()
6417 {
6418     for (auto & pmu : xpiPMUs)
6419     {
6420         pmu.cleanup();
6421     }
6422     for (auto & pmu : imcPMUs)
6423     {
6424         pmu.cleanup();
6425     }
6426     for (auto & pmu : edcPMUs)
6427     {
6428         pmu.cleanup();
6429     }
6430     for (auto & pmu : m2mPMUs)
6431     {
6432         pmu.cleanup();
6433     }
6434     for (auto & pmu : haPMUs)
6435     {
6436         pmu.cleanup();
6437     }
6438 }
6439 
getImcReads()6440 uint64 ServerPCICFGUncore::getImcReads()
6441 {
6442     return getImcReadsForChannels((uint32)0, (uint32)imcPMUs.size());
6443 }
6444 
getImcReadsForController(uint32 controller)6445 uint64 ServerPCICFGUncore::getImcReadsForController(uint32 controller)
6446 {
6447     assert(controller < num_imc_channels.size());
6448     uint32 beginChannel = 0;
6449     for (uint32 i = 0; i < controller; ++i)
6450     {
6451         beginChannel += num_imc_channels[i];
6452     }
6453     const uint32 endChannel = beginChannel + num_imc_channels[controller];
6454     return getImcReadsForChannels(beginChannel, endChannel);
6455 }
6456 
getImcReadsForChannels(uint32 beginChannel,uint32 endChannel)6457 uint64 ServerPCICFGUncore::getImcReadsForChannels(uint32 beginChannel, uint32 endChannel)
6458 {
6459     uint64 result = 0;
6460     for (uint32 i = beginChannel; i < endChannel && i < imcPMUs.size(); ++i)
6461     {
6462         result += getMCCounter(i, EventPosition::READ);
6463     }
6464     return result;
6465 }
6466 
getImcWrites()6467 uint64 ServerPCICFGUncore::getImcWrites()
6468 {
6469     uint64 result = 0;
6470     for (uint32 i = 0; i < (uint32)imcPMUs.size(); ++i)
6471     {
6472         result += getMCCounter(i, EventPosition::WRITE);
6473     }
6474 
6475     return result;
6476 }
6477 
getPMMReads()6478 uint64 ServerPCICFGUncore::getPMMReads()
6479 {
6480     uint64 result = 0;
6481     for (uint32 i = 0; i < (uint32)m2mPMUs.size(); ++i)
6482     {
6483         result += getM2MCounter(i, EventPosition::PMM_READ);
6484     }
6485     return result;
6486 }
6487 
getPMMWrites()6488 uint64 ServerPCICFGUncore::getPMMWrites()
6489 {
6490     uint64 result = 0;
6491     for (uint32 i = 0; i < (uint32)m2mPMUs.size(); ++i)
6492     {
6493         result += getM2MCounter(i, EventPosition::PMM_WRITE);
6494     }
6495     return result;
6496 }
6497 
getEdcReads()6498 uint64 ServerPCICFGUncore::getEdcReads()
6499 {
6500     uint64 result = 0;
6501 
6502     for (auto & pmu: edcPMUs)
6503     {
6504         result += *pmu.counterValue[EventPosition::READ];
6505     }
6506 
6507     return result;
6508 }
6509 
getEdcWrites()6510 uint64 ServerPCICFGUncore::getEdcWrites()
6511 {
6512     uint64 result = 0;
6513 
6514     for (auto & pmu : edcPMUs)
6515     {
6516         result += *pmu.counterValue[EventPosition::WRITE];
6517     }
6518 
6519     return result;
6520 }
6521 
getIncomingDataFlits(uint32 port)6522 uint64 ServerPCICFGUncore::getIncomingDataFlits(uint32 port)
6523 {
6524     uint64 drs = 0, ncb = 0;
6525 
6526     if (port >= (uint32)xpiPMUs.size())
6527         return 0;
6528 
6529     if (PCM::hasUPI(cpu_model) == false)
6530     {
6531         drs = *xpiPMUs[port].counterValue[0];
6532     }
6533     ncb = *xpiPMUs[port].counterValue[1];
6534 
6535     return drs + ncb;
6536 }
6537 
getOutgoingFlits(uint32 port)6538 uint64 ServerPCICFGUncore::getOutgoingFlits(uint32 port)
6539 {
6540     return getQPILLCounter(port,2);
6541 }
6542 
getUPIL0TxCycles(uint32 port)6543 uint64 ServerPCICFGUncore::getUPIL0TxCycles(uint32 port)
6544 {
6545     if (PCM::hasUPI(cpu_model))
6546         return getQPILLCounter(port,0);
6547     return 0;
6548 }
6549 
program_power_metrics(int mc_profile)6550 void ServerPCICFGUncore::program_power_metrics(int mc_profile)
6551 {
6552     uint32 xPIEvents[4] = { 0,0,0,0 };
6553     xPIEvents[ServerUncoreCounterState::EventPosition::xPI_TxL0P_POWER_CYCLES] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_model) ? 0x27 : 0x0D)); // L0p Tx Cycles (TxL0P_POWER_CYCLES)
6554     xPIEvents[ServerUncoreCounterState::EventPosition::xPI_L1_POWER_CYCLES] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_model) ? 0x21 : 0x12)); // L1 Cycles (L1_POWER_CYCLES)
6555     xPIEvents[ServerUncoreCounterState::EventPosition::xPI_CLOCKTICKS] = (uint32)Q_P_PCI_PMON_CTL_EVENT((PCM::hasUPI(cpu_model) ? 0x01 : 0x14)); // QPI/UPI clocks (CLOCKTICKS)
6556 
6557     programXPI(xPIEvents);
6558 
6559     uint32 MCCntConfig[4] = {0,0,0,0};
6560     unsigned int UNC_M_POWER_CKE_CYCLES = 0x83;
6561     if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE)
6562     {
6563         UNC_M_POWER_CKE_CYCLES = 0x47;
6564     }
6565     switch(mc_profile)
6566     {
6567         case 0: // POWER_CKE_CYCLES.RANK0 and POWER_CKE_CYCLES.RANK1
6568             MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(1) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6569             MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(1) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6570             MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(2) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6571             MCCntConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(2) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6572             break;
6573         case  1: // POWER_CKE_CYCLES.RANK2 and POWER_CKE_CYCLES.RANK3
6574             MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(4) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6575             MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(4) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6576             MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(8) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6577             MCCntConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(8) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6578             break;
6579         case 2: // POWER_CKE_CYCLES.RANK4 and POWER_CKE_CYCLES.RANK5
6580             MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x10) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6581             MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x10) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6582             MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x20) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6583             MCCntConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x20) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6584             break;
6585         case 3: // POWER_CKE_CYCLES.RANK6 and POWER_CKE_CYCLES.RANK7
6586             MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x40) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6587             MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x40) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6588             MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x80) + MC_CH_PCI_PMON_CTL_INVERT + MC_CH_PCI_PMON_CTL_THRESH(1);
6589             MCCntConfig[3] = MC_CH_PCI_PMON_CTL_EVENT(UNC_M_POWER_CKE_CYCLES) + MC_CH_PCI_PMON_CTL_UMASK(0x80) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6590            break;
6591         case 4: // POWER_SELF_REFRESH
6592             MCCntConfig[0] = MC_CH_PCI_PMON_CTL_EVENT(0x43);
6593             MCCntConfig[1] = MC_CH_PCI_PMON_CTL_EVENT(0x43) + MC_CH_PCI_PMON_CTL_THRESH(1) + MC_CH_PCI_PMON_CTL_EDGE_DET;
6594             MCCntConfig[2] = MC_CH_PCI_PMON_CTL_EVENT(0x85);
6595             break;
6596     }
6597 
6598     programIMC(MCCntConfig);
6599 }
6600 
programIMC(const uint32 * MCCntConfig)6601 void ServerPCICFGUncore::programIMC(const uint32 * MCCntConfig)
6602 {
6603     const uint32 extraIMC = (cpu_model == PCM::SKX)?UNC_PMON_UNIT_CTL_RSV:UNC_PMON_UNIT_CTL_FRZ_EN;
6604 
6605     for (uint32 i = 0; i < (uint32)imcPMUs.size(); ++i)
6606     {
6607         // imc PMU
6608         imcPMUs[i].initFreeze(extraIMC);
6609 
6610         // enable fixed counter (DRAM clocks)
6611         *imcPMUs[i].fixedCounterControl = MC_CH_PCI_PMON_FIXED_CTL_EN;
6612 
6613         // reset it
6614         *imcPMUs[i].fixedCounterControl = MC_CH_PCI_PMON_FIXED_CTL_EN + MC_CH_PCI_PMON_FIXED_CTL_RST;
6615 
6616         PCM::program(imcPMUs[i], MCCntConfig, MCCntConfig + 4, extraIMC);
6617     }
6618 }
6619 
programEDC(const uint32 * EDCCntConfig)6620 void ServerPCICFGUncore::programEDC(const uint32 * EDCCntConfig)
6621 {
6622     for (uint32 i = 0; i < (uint32)edcPMUs.size(); ++i)
6623     {
6624         edcPMUs[i].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
6625 
6626         // MCDRAM clocks enabled by default
6627         *edcPMUs[i].fixedCounterControl = EDC_CH_PCI_PMON_FIXED_CTL_EN;
6628 
6629         PCM::program(edcPMUs[i], EDCCntConfig, EDCCntConfig + 4, UNC_PMON_UNIT_CTL_FRZ_EN);
6630     }
6631 }
6632 
programM2M()6633 void ServerPCICFGUncore::programM2M()
6634 {
6635     uint64 cfg[4] = {0, 0, 0, 0};
6636     switch (cpu_model)
6637     {
6638     case PCM::ICX:
6639         cfg[EventPosition::NM_HIT] = M2M_PCI_PMON_CTL_EVENT(0x2c) + M2M_PCI_PMON_CTL_UMASK(3);    // UNC_M2M_TAG_HIT.NM_DRD_HIT_* events (CLEAN | DIRTY)
6640         cfg[EventPosition::M2M_CLOCKTICKS] = 0;                                                      // CLOCKTICKS
6641         cfg[EventPosition::PMM_READ] = M2M_PCI_PMON_CTL_EVENT(0x37) + M2M_PCI_PMON_CTL_UMASK(0x20) + UNC_PMON_CTL_UMASK_EXT(0x07);  // UNC_M2M_IMC_READS.TO_PMM
6642         cfg[EventPosition::PMM_WRITE] = M2M_PCI_PMON_CTL_EVENT(0x38) + M2M_PCI_PMON_CTL_UMASK(0x80) + UNC_PMON_CTL_UMASK_EXT(0x1C); // UNC_M2M_IMC_WRITES.TO_PMM
6643         break;
6644     default:
6645         cfg[EventPosition::NM_HIT] = M2M_PCI_PMON_CTL_EVENT(0x2c) + M2M_PCI_PMON_CTL_UMASK(3);    // UNC_M2M_TAG_HIT.NM_DRD_HIT_* events (CLEAN | DIRTY)
6646         cfg[EventPosition::M2M_CLOCKTICKS] = 0;                                                      // CLOCKTICKS
6647         cfg[EventPosition::PMM_READ] = M2M_PCI_PMON_CTL_EVENT(0x37) + M2M_PCI_PMON_CTL_UMASK(0x8);  // UNC_M2M_IMC_READS.TO_PMM
6648         cfg[EventPosition::PMM_WRITE] = M2M_PCI_PMON_CTL_EVENT(0x38) + M2M_PCI_PMON_CTL_UMASK(0x20); // UNC_M2M_IMC_WRITES.TO_PMM
6649     }
6650     programM2M(cfg);
6651 }
6652 
programM2M(const uint64 * M2MCntConfig)6653 void ServerPCICFGUncore::programM2M(const uint64* M2MCntConfig)
6654 {
6655     {
6656         for (auto & pmu : m2mPMUs)
6657         {
6658             pmu.initFreeze(UNC_PMON_UNIT_CTL_RSV);
6659             PCM::program(pmu, M2MCntConfig, M2MCntConfig + 4, UNC_PMON_UNIT_CTL_RSV);
6660         }
6661     }
6662 }
6663 
programM3UPI(const uint32 * M3UPICntConfig)6664 void ServerPCICFGUncore::programM3UPI(const uint32* M3UPICntConfig)
6665 {
6666     {
6667         for (auto& pmu : m3upiPMUs)
6668         {
6669             pmu.initFreeze(UNC_PMON_UNIT_CTL_RSV);
6670             PCM::program(pmu, M3UPICntConfig, M3UPICntConfig + 4, UNC_PMON_UNIT_CTL_RSV);
6671         }
6672     }
6673 }
6674 
programHA(const uint32 * config)6675 void ServerPCICFGUncore::programHA(const uint32 * config)
6676 {
6677     for (auto & pmu : haPMUs)
6678     {
6679         pmu.initFreeze(UNC_PMON_UNIT_CTL_RSV);
6680         PCM::program(pmu, config, config + 4, UNC_PMON_UNIT_CTL_RSV);
6681     }
6682 }
6683 
getHARequests()6684 uint64 ServerPCICFGUncore::getHARequests()
6685 {
6686     uint64 result = 0;
6687     for (auto & pmu: haPMUs)
6688     {
6689         result += *pmu.counterValue[PCM::EventPosition::REQUESTS_ALL];
6690     }
6691     return result;
6692 }
6693 
getHALocalRequests()6694 uint64 ServerPCICFGUncore::getHALocalRequests()
6695 {
6696     uint64 result = 0;
6697     for (auto & pmu: haPMUs)
6698     {
6699         result += *pmu.counterValue[PCM::EventPosition::REQUESTS_LOCAL];
6700     }
6701     return result;
6702 }
6703 
programHA()6704 void ServerPCICFGUncore::programHA()
6705 {
6706 	uint32 config[4];
6707 	config[0] = 0;
6708 	config[1] = 0;
6709 #ifdef PCM_HA_REQUESTS_READS_ONLY
6710 	// HA REQUESTS READ: LOCAL + REMOTE
6711 	config[PCM::EventPosition::REQUESTS_ALL] = HA_PCI_PMON_CTL_EVENT(0x01) + HA_PCI_PMON_CTL_UMASK((1 + 2));
6712 	// HA REQUESTS READ: LOCAL ONLY
6713 	config[PCM::EventPosition::REQUESTS_LOCAL] = HA_PCI_PMON_CTL_EVENT(0x01) + HA_PCI_PMON_CTL_UMASK((1));
6714 #else
6715 	// HA REQUESTS READ+WRITE+REMOTE+LOCAL
6716 	config[PCM::EventPosition::REQUESTS_ALL] = HA_PCI_PMON_CTL_EVENT(0x01) + HA_PCI_PMON_CTL_UMASK((1 + 2 + 4 + 8));
6717 	// HA REQUESTS READ+WRITE (LOCAL only)
6718 	config[PCM::EventPosition::REQUESTS_LOCAL] = HA_PCI_PMON_CTL_EVENT(0x01) + HA_PCI_PMON_CTL_UMASK((1 + 4));
6719 #endif
6720 	programHA(config);
6721 }
6722 
freezeCounters()6723 void ServerPCICFGUncore::freezeCounters()
6724 {
6725     writeAllUnitControl(UNC_PMON_UNIT_CTL_FRZ + ((cpu_model == PCM::SKX) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN));
6726 }
6727 
writeAllUnitControl(const uint32 value)6728 void ServerPCICFGUncore::writeAllUnitControl(const uint32 value)
6729 {
6730     for (auto& pmuVector : allPMUs)
6731     {
6732         for (auto& pmu : *pmuVector)
6733         {
6734             pmu.writeUnitControl(value);
6735         }
6736     }
6737 }
6738 
unfreezeCounters()6739 void ServerPCICFGUncore::unfreezeCounters()
6740 {
6741     writeAllUnitControl((cpu_model == PCM::SKX) ? UNC_PMON_UNIT_CTL_RSV : UNC_PMON_UNIT_CTL_FRZ_EN);
6742 }
6743 
getQPIClocks(uint32 port)6744 uint64 ServerPCICFGUncore::getQPIClocks(uint32 port)
6745 {
6746     return getQPILLCounter(port, ServerUncoreCounterState::EventPosition::xPI_CLOCKTICKS);
6747 }
6748 
getQPIL0pTxCycles(uint32 port)6749 uint64 ServerPCICFGUncore::getQPIL0pTxCycles(uint32 port)
6750 {
6751     return getQPILLCounter(port, ServerUncoreCounterState::EventPosition::xPI_TxL0P_POWER_CYCLES);
6752 }
6753 
getQPIL1Cycles(uint32 port)6754 uint64 ServerPCICFGUncore::getQPIL1Cycles(uint32 port)
6755 {
6756     return getQPILLCounter(port, ServerUncoreCounterState::EventPosition::xPI_L1_POWER_CYCLES);
6757 }
6758 
getDRAMClocks(uint32 channel)6759 uint64 ServerPCICFGUncore::getDRAMClocks(uint32 channel)
6760 {
6761     uint64 result = 0;
6762 
6763     if (channel < (uint32)imcPMUs.size())
6764         result = *(imcPMUs[channel].fixedCounterValue);
6765 
6766     // std::cout << "DEBUG: DRAMClocks on channel " << channel << " = " << result << "\n";
6767     return result;
6768 }
6769 
getMCDRAMClocks(uint32 channel)6770 uint64 ServerPCICFGUncore::getMCDRAMClocks(uint32 channel)
6771 {
6772     uint64 result = 0;
6773 
6774     if (channel < (uint32)edcPMUs.size())
6775         result = *edcPMUs[channel].fixedCounterValue;
6776 
6777     // std::cout << "DEBUG: MCDRAMClocks on EDC" << channel << " = " << result << "\n";
6778     return result;
6779 }
6780 
getPMUCounter(std::vector<UncorePMU> & pmu,const uint32 id,const uint32 counter)6781 uint64 ServerPCICFGUncore::getPMUCounter(std::vector<UncorePMU> & pmu, const uint32 id, const uint32 counter)
6782 {
6783     uint64 result = 0;
6784 
6785     if (id < (uint32)pmu.size() && counter < 4 && pmu[id].counterValue[counter].get() != nullptr)
6786     {
6787         result = *(pmu[id].counterValue[counter]);
6788     }
6789     else
6790     {
6791         //std::cout << "DEBUG: Invalid ServerPCICFGUncore::getPMUCounter(" << id << ", " << counter << ") \n";
6792     }
6793     // std::cout << "DEBUG: ServerPCICFGUncore::getPMUCounter(" << id << ", " << counter << ") = " << result << "\n";
6794     return result;
6795 }
6796 
getMCCounter(uint32 channel,uint32 counter)6797 uint64 ServerPCICFGUncore::getMCCounter(uint32 channel, uint32 counter)
6798 {
6799     return getPMUCounter(imcPMUs, channel, counter);
6800 }
6801 
getEDCCounter(uint32 channel,uint32 counter)6802 uint64 ServerPCICFGUncore::getEDCCounter(uint32 channel, uint32 counter)
6803 {
6804     return getPMUCounter(edcPMUs, channel, counter);
6805 }
6806 
getM2MCounter(uint32 box,uint32 counter)6807 uint64 ServerPCICFGUncore::getM2MCounter(uint32 box, uint32 counter)
6808 {
6809     return getPMUCounter(m2mPMUs, box, counter);
6810 }
6811 
getQPILLCounter(uint32 port,uint32 counter)6812 uint64 ServerPCICFGUncore::getQPILLCounter(uint32 port, uint32 counter)
6813 {
6814     return getPMUCounter(xpiPMUs, port, counter);
6815 }
6816 
getM3UPICounter(uint32 port,uint32 counter)6817 uint64 ServerPCICFGUncore::getM3UPICounter(uint32 port, uint32 counter)
6818 {
6819     // std::cout << "DEBUG: ServerPCICFGUncore::getM3UPICounter(" << port << ", " << counter << ") = " << getPMUCounter(m3upiPMUs, port, counter) << "\n";
6820     return getPMUCounter(m3upiPMUs, port, counter);
6821 }
6822 
enableJKTWorkaround(bool enable)6823 void ServerPCICFGUncore::enableJKTWorkaround(bool enable)
6824 {
6825     {
6826         PciHandleType reg(groupnr,iMCbus,14,0);
6827         uint32 value = 0;
6828         reg.read32(0x84, &value);
6829         if(enable)
6830             value |= 2;
6831         else
6832             value &= (~2);
6833         reg.write32(0x84, value);
6834     }
6835     {
6836         PciHandleType reg(groupnr,iMCbus,8,0);
6837         uint32 value = 0;
6838         reg.read32(0x80, &value);
6839         if(enable)
6840             value |= 2;
6841         else
6842             value &= (~2);
6843         reg.write32(0x80, value);
6844     }
6845     {
6846         PciHandleType reg(groupnr,iMCbus,9,0);
6847         uint32 value = 0;
6848         reg.read32(0x80, &value);
6849         if(enable)
6850             value |= 2;
6851         else
6852             value &= (~2);
6853         reg.write32(0x80, value);
6854     }
6855 }
6856 
6857 #define PCM_MEM_CAPACITY (1024ULL*1024ULL*64ULL) // 64 MByte
6858 
initMemTest(ServerPCICFGUncore::MemTestParam & param)6859 void ServerPCICFGUncore::initMemTest(ServerPCICFGUncore::MemTestParam & param)
6860 {
6861     auto & memBufferBlockSize = param.first;
6862     auto & memBuffers = param.second;
6863 #ifdef __linux__
6864     size_t capacity = PCM_MEM_CAPACITY;
6865     char * buffer = (char *)mmap(NULL, capacity, PROT_READ | PROT_WRITE,
6866         MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
6867     if (buffer == MAP_FAILED) {
6868         std::cerr << "ERROR: mmap failed\n";
6869         return;
6870     }
6871     unsigned long long maxNode = (unsigned long long)(readMaxFromSysFS("/sys/devices/system/node/online") + 1);
6872     if (maxNode == 0)
6873     {
6874         std::cerr << "ERROR: max node is 0 \n";
6875         return;
6876     }
6877     if (maxNode >= 63) maxNode = 63;
6878     const unsigned long long nodeMask = (1ULL << maxNode) - 1ULL;
6879     if (0 != syscall(SYS_mbind, buffer, capacity, 3 /* MPOL_INTERLEAVE */,
6880         &nodeMask, maxNode, 0))
6881     {
6882         std::cerr << "ERROR: mbind failed. nodeMask: " << nodeMask << " maxNode: " << maxNode << "\n";
6883         return;
6884     }
6885     memBuffers.push_back((uint64 *)buffer);
6886     memBufferBlockSize = capacity;
6887 #elif defined(_MSC_VER)
6888     ULONG HighestNodeNumber;
6889     if (!GetNumaHighestNodeNumber(&HighestNodeNumber))
6890     {
6891         std::cerr << "ERROR: GetNumaHighestNodeNumber call failed.\n";
6892         return;
6893     }
6894     memBufferBlockSize = 4096;
6895     for (int i = 0; i < PCM_MEM_CAPACITY / memBufferBlockSize; ++i)
6896     {
6897         LPVOID result = VirtualAllocExNuma(
6898             GetCurrentProcess(),
6899             NULL,
6900             memBufferBlockSize,
6901             MEM_RESERVE | MEM_COMMIT,
6902             PAGE_READWRITE,
6903             i % (HighestNodeNumber + 1)
6904         );
6905 
6906         if (result == NULL)
6907         {
6908             std::cerr << "ERROR: " << i << " VirtualAllocExNuma failed.\n";
6909             for (auto b : memBuffers)
6910             {
6911                 VirtualFree(b, memBufferBlockSize, MEM_RELEASE);
6912             }
6913             memBuffers.clear();
6914             break;
6915         }
6916         else
6917         {
6918             memBuffers.push_back((uint64 *)result);
6919         }
6920     }
6921     #else
6922     std::cerr << "ERROR: memory test is not implemented. QPI/UPI speed and utilization metrics may not be reliable.\n";
6923     #endif
6924     for (auto b : memBuffers)
6925         std::fill(b, b + (memBufferBlockSize / sizeof(uint64)), 0ULL);
6926 }
6927 
doMemTest(const ServerPCICFGUncore::MemTestParam & param)6928 void ServerPCICFGUncore::doMemTest(const ServerPCICFGUncore::MemTestParam & param)
6929 {
6930     const auto & memBufferBlockSize = param.first;
6931     const auto & memBuffers = param.second;
6932     // read and write each cache line once
6933     for (auto b : memBuffers)
6934         for (unsigned int i = 0; i < memBufferBlockSize / sizeof(uint64); i += 64 / sizeof(uint64))
6935         {
6936             (b[i])++;
6937         }
6938 }
6939 
cleanupMemTest(const ServerPCICFGUncore::MemTestParam & param)6940 void ServerPCICFGUncore::cleanupMemTest(const ServerPCICFGUncore::MemTestParam & param)
6941 {
6942     const auto & memBufferBlockSize = param.first;
6943     const auto & memBuffers = param.second;
6944     for (auto b : memBuffers)
6945     {
6946 #if defined(__linux__)
6947         munmap(b, memBufferBlockSize);
6948 #elif defined(_MSC_VER)
6949         VirtualFree(b, memBufferBlockSize, MEM_RELEASE);
6950 #elif defined(__FreeBSD__)
6951         (void) b;                  // avoid the unused variable warning
6952         (void) memBufferBlockSize; // avoid the unused variable warning
6953 #else
6954 #endif
6955     }
6956 }
6957 
computeQPISpeed(const uint32 core_nr,const int cpumodel)6958 uint64 ServerPCICFGUncore::computeQPISpeed(const uint32 core_nr, const int cpumodel)
6959 {
6960     if(qpi_speed.empty())
6961     {
6962         PCM * pcm = PCM::getInstance();
6963         TemporalThreadAffinity aff(core_nr);
6964         qpi_speed.resize(getNumQPIPorts());
6965 
6966         auto getSpeed = [&] (size_t i) {
6967            if (i == 1) return 0ULL; // link 1 should have the same speed as link 0, skip it
6968            uint64 result = 0;
6969            if (PCM::hasUPI(cpumodel) == false && i < XPIRegisterLocation.size())
6970            {
6971                PciHandleType reg(groupnr,UPIbus, XPIRegisterLocation[i].first, QPI_PORT0_MISC_REGISTER_FUNC_ADDR);
6972                uint32 value = 0;
6973                reg.read32(QPI_RATE_STATUS_ADDR, &value);
6974                value &= 7; // extract lower 3 bits
6975                if(value) result = static_cast<uint64>((4000000000ULL + ((uint64)value)*800000000ULL)*2ULL);
6976            }
6977            if(result == 0ULL)
6978            {
6979                if (PCM::hasUPI(cpumodel) == false)
6980                    std::cerr << "Warning: QPI_RATE_STATUS register is not available on port " << i << ". Computing QPI speed using a measurement loop.\n";
6981 
6982                // compute qpi speed
6983                const uint64 timerGranularity = 1000000ULL; // mks
6984 
6985                MemTestParam param;
6986                initMemTest(param);
6987                uint64 startClocks = getQPIClocks((uint32)i);
6988                uint64 startTSC = pcm->getTickCount(timerGranularity, core_nr);
6989                uint64 endTSC;
6990                do
6991                {
6992                     doMemTest(param);
6993                     endTSC = pcm->getTickCount(timerGranularity, core_nr);
6994                } while (endTSC - startTSC < 200000ULL); // spin for 200 ms
6995 
6996                uint64 endClocks = getQPIClocks((uint32)i);
6997                cleanupMemTest(param);
6998 
6999                result = (uint64(double(endClocks - startClocks) * PCM::getBytesPerLinkCycle(cpumodel) * double(timerGranularity) / double(endTSC - startTSC)));
7000                if(cpumodel == PCM::HASWELLX || cpumodel == PCM::BDX) /* BDX_DE does not have QPI. */{
7001                   result /=2; // HSX runs QPI clocks with doubled speed
7002                }
7003            }
7004            return result;
7005          };
7006          std::vector<std::future<uint64> > getSpeedsAsync;
7007          for (size_t i = 0; i < getNumQPIPorts(); ++i) {
7008              getSpeedsAsync.push_back(std::async(std::launch::async, getSpeed, i));
7009          }
7010          for (size_t i = 0; i < getNumQPIPorts(); ++i) {
7011              qpi_speed[i] = (i==1)? qpi_speed[0] : getSpeedsAsync[i].get(); // link 1 does not have own speed register, it runs with the speed of link 0
7012          }
7013          if (PCM::hasUPI(cpumodel))
7014          {
7015              // check the speed of link 3
7016              if(qpi_speed.size() == 3 && qpi_speed[2] == 0)
7017              {
7018                 std::cerr << "UPI link 3 is disabled\n";
7019                 qpi_speed.resize(2);
7020                 xpiPMUs.resize(2);
7021              }
7022          }
7023     }
7024     if(!qpi_speed.empty())
7025     {
7026         return *std::max_element(qpi_speed.begin(),qpi_speed.end());
7027     }
7028     else
7029     {
7030         return 0;
7031     }
7032 }
7033 
reportQPISpeed() const7034 void ServerPCICFGUncore::reportQPISpeed() const
7035 {
7036     PCM * m = PCM::getInstance();
7037     std::cerr.precision(1);
7038     std::cerr << std::fixed;
7039     for (uint32 i = 0; i < (uint32)qpi_speed.size(); ++i)
7040         std::cerr << "Max QPI link " << i << " speed: " << qpi_speed[i] / (1e9) << " GBytes/second (" << qpi_speed[i] / (1e9 * m->getBytesPerLinkTransfer()) << " GT/second)\n";
7041 }
7042 
CX_MSR_PMON_CTRY(uint32 Cbo,uint32 Ctr) const7043 uint64 PCM::CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr) const
7044 {
7045     if(JAKETOWN == cpu_model || IVYTOWN == cpu_model)
7046     {
7047         return JKT_C0_MSR_PMON_CTR0 + ((JKTIVT_CBO_MSR_STEP)*Cbo) + Ctr;
7048 
7049     } else if(HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model)
7050     {
7051         return HSX_C0_MSR_PMON_CTR0 + ((HSX_CBO_MSR_STEP)*Cbo) + Ctr;
7052     }
7053     else if (ICX == cpu_model || SNOWRIDGE == cpu_model)
7054     {
7055         return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_CTR0_OFFSET + Ctr;
7056     }
7057     return 0;
7058 }
7059 
CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const7060 uint64 PCM::CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const
7061 {
7062     if(JAKETOWN == cpu_model || IVYTOWN == cpu_model)
7063     {
7064         return JKT_C0_MSR_PMON_BOX_FILTER + ((JKTIVT_CBO_MSR_STEP)*Cbo);
7065 
7066     } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model)
7067     {
7068         return HSX_C0_MSR_PMON_BOX_FILTER + ((HSX_CBO_MSR_STEP)*Cbo);
7069     } else if (KNL == cpu_model)
7070     {
7071         return KNL_CHA0_MSR_PMON_BOX_CTL + ((KNL_CHA_MSR_STEP)*Cbo);
7072     }
7073     else if (ICX == cpu_model)
7074     {
7075         return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_BOX_FILTER_OFFSET;
7076     }
7077 
7078     return 0;
7079 }
7080 
CX_MSR_PMON_BOX_FILTER1(uint32 Cbo) const7081 uint64 PCM::CX_MSR_PMON_BOX_FILTER1(uint32 Cbo) const
7082 {
7083     if(IVYTOWN == cpu_model)
7084     {
7085         return IVT_C0_MSR_PMON_BOX_FILTER1 + ((JKTIVT_CBO_MSR_STEP)*Cbo);
7086 
7087     } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model)
7088     {
7089         return HSX_C0_MSR_PMON_BOX_FILTER1 + ((HSX_CBO_MSR_STEP)*Cbo);
7090     }
7091     return 0;
7092 }
7093 
CX_MSR_PMON_CTLY(uint32 Cbo,uint32 Ctl) const7094 uint64 PCM::CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const
7095 {
7096     if(JAKETOWN == cpu_model || IVYTOWN == cpu_model)
7097     {
7098         return JKT_C0_MSR_PMON_CTL0 + ((JKTIVT_CBO_MSR_STEP)*Cbo) + Ctl;
7099 
7100     } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model)
7101     {
7102         return HSX_C0_MSR_PMON_CTL0 + ((HSX_CBO_MSR_STEP)*Cbo) + Ctl;
7103     }
7104     else if (ICX == cpu_model || SNOWRIDGE == cpu_model)
7105     {
7106         return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_CTL0_OFFSET + Ctl;
7107     }
7108     return 0;
7109 }
7110 
CX_MSR_PMON_BOX_CTL(uint32 Cbo) const7111 uint64 PCM::CX_MSR_PMON_BOX_CTL(uint32 Cbo) const
7112 {
7113     if(JAKETOWN == cpu_model || IVYTOWN == cpu_model)
7114     {
7115         return JKT_C0_MSR_PMON_BOX_CTL + ((JKTIVT_CBO_MSR_STEP)*Cbo);
7116 
7117     } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model)
7118     {
7119         return HSX_C0_MSR_PMON_BOX_CTL + ((HSX_CBO_MSR_STEP)*Cbo);
7120     } else if (KNL == cpu_model)
7121     {
7122         return KNL_CHA0_MSR_PMON_BOX_CTRL + ((KNL_CHA_MSR_STEP)*Cbo);
7123     }
7124     else if (ICX == cpu_model)
7125     {
7126         return ICX_CHA_MSR_PMON_BOX_CTL[Cbo];
7127     }
7128     else if (SNOWRIDGE == cpu_model)
7129     {
7130         return SNR_CHA_MSR_PMON_BOX_CTL[Cbo];
7131     }
7132     return 0;
7133 }
7134 
getMaxNumOfCBoxes() const7135 uint32 PCM::getMaxNumOfCBoxes() const
7136 {
7137     static int num = -1;
7138     if (num >= 0)
7139     {
7140         return (uint32)num;
7141     }
7142     if (KNL == cpu_model || SKX == cpu_model || ICX == cpu_model)
7143     {
7144         /*
7145          *  on KNL two physical cores share CHA.
7146          *  The number of CHAs in the processor is stored in bits 5:0
7147          *  of NCUPMONConfig [0x702] MSR.
7148          */
7149         uint64 val;
7150         uint32 refCore = socketRefCore[0];
7151         uint32 NCUPMONConfig = 0x702;
7152         MSR[refCore]->read(NCUPMONConfig, &val);
7153         num = (uint32)(val & 63);
7154     }
7155     else if (SNOWRIDGE == cpu_model)
7156     {
7157         num = (uint32)num_phys_cores_per_socket / 4;
7158     }
7159     else
7160     {
7161         /*
7162          *  on other supported CPUs there is one CBox per physical core.  This calculation will get us
7163          *  the number of physical cores per socket which is the expected
7164          *  value to be returned.
7165          */
7166         num = (uint32)num_phys_cores_per_socket;
7167     }
7168     return num;
7169 }
7170 
getMaxNumOfIIOStacks() const7171 uint32 PCM::getMaxNumOfIIOStacks() const
7172 {
7173     if (iioPMUs.size() > 0)
7174     {
7175         return (uint32)iioPMUs[0].size();
7176     }
7177     return 0;
7178 }
7179 
programCboOpcodeFilter(const uint32 opc0,UncorePMU & pmu,const uint32 nc_,const uint32 opc1,const uint32 loc,const uint32 rem)7180 void PCM::programCboOpcodeFilter(const uint32 opc0, UncorePMU & pmu, const uint32 nc_, const uint32 opc1, const uint32 loc, const uint32 rem)
7181 {
7182     if(JAKETOWN == cpu_model)
7183     {
7184         *pmu.filter[0] = JKT_CBO_MSR_PMON_BOX_FILTER_OPC(opc0);
7185 
7186     } else if(IVYTOWN == cpu_model || HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model)
7187     {
7188         *pmu.filter[1] = IVTHSX_CBO_MSR_PMON_BOX_FILTER1_OPC(opc0);
7189     } else if(SKX == cpu_model)
7190     {
7191         *pmu.filter[1] = SKX_CHA_MSR_PMON_BOX_FILTER1_OPC0(opc0) +
7192                 SKX_CHA_MSR_PMON_BOX_FILTER1_OPC1(opc1) +
7193                 (rem?SKX_CHA_MSR_PMON_BOX_FILTER1_REM(1):0ULL) +
7194                 (loc?SKX_CHA_MSR_PMON_BOX_FILTER1_LOC(1):0ULL) +
7195                 SKX_CHA_MSR_PMON_BOX_FILTER1_NM(1) +
7196                 SKX_CHA_MSR_PMON_BOX_FILTER1_NOT_NM(1) +
7197                 (nc_?SKX_CHA_MSR_PMON_BOX_FILTER1_NC(1):0ULL);
7198     }
7199     else
7200     {
7201         std::cerr << "ERROR: programCboOpcodeFilter function is not implemented for cpu model " << cpu_model << std::endl;
7202         throw std::exception();
7203     }
7204 }
7205 
programIIOCounters(uint64 rawEvents[4],int IIOStack)7206 void PCM::programIIOCounters(uint64 rawEvents[4], int IIOStack)
7207 {
7208     std::vector<int32> IIO_units;
7209     if (IIOStack == -1)
7210     {
7211         int stacks_count;
7212         switch (getCPUModel())
7213         {
7214         case PCM::ICX:
7215             stacks_count = ICX_IIO_STACK_COUNT;
7216             break;
7217         case PCM::SNOWRIDGE:
7218             stacks_count = SNR_IIO_STACK_COUNT;
7219             break;
7220         case PCM::SKX:
7221         default:
7222             stacks_count = SKX_IIO_STACK_COUNT;
7223             break;
7224         }
7225         IIO_units.reserve(stacks_count);
7226         for (int stack = 0; stack < stacks_count; ++stack) {
7227             IIO_units.push_back(stack);
7228         }
7229     }
7230     else
7231         IIO_units.push_back(IIOStack);
7232 
7233     for (int32 i = 0; (i < num_sockets) && MSR.size() && iioPMUs.size(); ++i)
7234     {
7235         uint32 refCore = socketRefCore[i];
7236         TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
7237 
7238         for (const auto & unit: IIO_units)
7239         {
7240             if (iioPMUs[i].count(unit) == 0)
7241             {
7242                 std::cerr << "IIO PMU unit (stack) " << unit << " is not found \n";
7243                 continue;
7244             }
7245             auto & pmu = iioPMUs[i][unit];
7246             pmu.initFreeze(UNC_PMON_UNIT_CTL_RSV);
7247 
7248             program(pmu, &rawEvents[0], &rawEvents[4], UNC_PMON_UNIT_CTL_RSV);
7249         }
7250     }
7251 }
7252 
programPCIeEventGroup(eventGroup_t & eventGroup)7253 void PCM::programPCIeEventGroup(eventGroup_t &eventGroup)
7254 {
7255     assert(eventGroup.size() > 0);
7256     uint64 events[4] = {0};
7257     uint64 umask[4] = {0};
7258 
7259     switch (cpu_model)
7260     {
7261         case PCM::ICX:
7262         case PCM::SNOWRIDGE:
7263             for (uint32 idx = 0; idx < eventGroup.size(); ++idx)
7264                 events[idx] = eventGroup[idx];
7265             programCbo(events);
7266             break;
7267         case PCM::SKX:
7268         //JKT through СLX generations allow programming only one required event at a time.
7269             if (eventGroup[0] & SKX_CHA_MSR_PMON_BOX_FILTER1_NC(1))
7270                 umask[0] |= (uint64)(SKX_CHA_TOR_INSERTS_UMASK_IRQ(1));
7271                 else
7272                 umask[0] |= (uint64)(SKX_CHA_TOR_INSERTS_UMASK_PRQ(1));
7273 
7274             if (eventGroup[0] & SKX_CHA_MSR_PMON_BOX_FILTER1_RSV(1))
7275                 umask[0] |= (uint64)(SKX_CHA_TOR_INSERTS_UMASK_HIT(1));
7276                 else
7277                 umask[0] |= (uint64)(SKX_CHA_TOR_INSERTS_UMASK_MISS(1));
7278 
7279             events[0] += CBO_MSR_PMON_CTL_EVENT(0x35) + CBO_MSR_PMON_CTL_UMASK(umask[0]);
7280             programCbo(events, SKX_CHA_MSR_PMON_BOX_GET_OPC0(eventGroup[0]),
7281                                     SKX_CHA_MSR_PMON_BOX_GET_NC(eventGroup[0]));
7282             break;
7283         case PCM::BDX_DE:
7284         case PCM::BDX:
7285         case PCM::KNL:
7286         case PCM::HASWELLX:
7287         case PCM::IVYTOWN:
7288         case PCM::JAKETOWN:
7289             events[0] = CBO_MSR_PMON_CTL_EVENT(0x35);
7290             events[0] += BDX_CBO_MSR_PMON_BOX_GET_FLT(eventGroup[0]) ? CBO_MSR_PMON_CTL_UMASK(0x3) : CBO_MSR_PMON_CTL_UMASK(1);
7291             events[0] += BDX_CBO_MSR_PMON_BOX_GET_TID(eventGroup[0]) ? CBO_MSR_PMON_CTL_TID_EN : 0ULL;
7292 
7293             programCbo(events, BDX_CBO_MSR_PMON_BOX_GET_OPC0(eventGroup[0]),
7294                     0, BDX_CBO_MSR_PMON_BOX_GET_TID(eventGroup[0]) ? 0x3e : 0ULL);
7295             break;
7296     }
7297 }
7298 
programCbo(const uint64 * events,const uint32 opCode,const uint32 nc_,const uint32 llc_lookup_tid_filter,const uint32 loc,const uint32 rem)7299 void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc_, const uint32 llc_lookup_tid_filter, const uint32 loc, const uint32 rem)
7300 {
7301     for (size_t i = 0; (i < cboPMUs.size()) && MSR.size(); ++i)
7302     {
7303         uint32 refCore = socketRefCore[i];
7304         TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
7305 
7306         for(uint32 cbo = 0; cbo < getMaxNumOfCBoxes(); ++cbo)
7307         {
7308             cboPMUs[i][cbo].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
7309 
7310             if (ICX != cpu_model && SNOWRIDGE != cpu_model)
7311                 programCboOpcodeFilter(opCode, cboPMUs[i][cbo], nc_, 0, loc, rem);
7312 
7313             if((HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) && llc_lookup_tid_filter != 0)
7314                 *cboPMUs[i][cbo].filter[0] = llc_lookup_tid_filter;
7315 
7316             PCM::program(cboPMUs[i][cbo], events, events + ServerUncoreCounterState::maxCounters, UNC_PMON_UNIT_CTL_FRZ_EN);
7317 
7318             for (int c = 0; c < ServerUncoreCounterState::maxCounters; ++c)
7319             {
7320                 *cboPMUs[i][cbo].counterValue[c] = 0;
7321             }
7322         }
7323     }
7324 }
7325 
programCboRaw(const uint64 * events,const uint64 filter0,const uint64 filter1)7326 void PCM::programCboRaw(const uint64* events, const uint64 filter0, const uint64 filter1)
7327 {
7328     for (size_t i = 0; (i < cboPMUs.size()) && MSR.size(); ++i)
7329     {
7330         uint32 refCore = socketRefCore[i];
7331         TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
7332 
7333         for (uint32 cbo = 0; cbo < getMaxNumOfCBoxes(); ++cbo)
7334         {
7335             cboPMUs[i][cbo].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
7336 
7337             if (cboPMUs[i][cbo].filter[0].get())
7338             {
7339                 *cboPMUs[i][cbo].filter[0] = filter0;
7340             }
7341 
7342             if (cboPMUs[i][cbo].filter[1].get())
7343             {
7344                 *cboPMUs[i][cbo].filter[1] = filter1;
7345             }
7346 
7347             PCM::program(cboPMUs[i][cbo], events, events + 4, UNC_PMON_UNIT_CTL_FRZ_EN);
7348 
7349             for (int c = 0; c < 4; ++c)
7350             {
7351                 *cboPMUs[i][cbo].counterValue[c] = 0;
7352             }
7353         }
7354     }
7355 }
7356 
programUBOX(const uint64 * events)7357 void PCM::programUBOX(const uint64* events)
7358 {
7359     for (size_t s = 0; (s < uboxPMUs.size()) && MSR.size(); ++s)
7360     {
7361         uint32 refCore = socketRefCore[s];
7362         TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
7363 
7364         *uboxPMUs[s].fixedCounterControl = UCLK_FIXED_CTL_EN;
7365 
7366         PCM::program(uboxPMUs[s], events, events + 2, 0);
7367     }
7368 }
7369 
getCBOCounterState(const uint32 socket_,const uint32 ctr_)7370 uint64 PCM::getCBOCounterState(const uint32 socket_, const uint32 ctr_)
7371 {
7372     uint64 result = 0;
7373 
7374     const uint32 refCore = socketRefCore[socket_];
7375     TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
7376 
7377     for(auto & pmu: cboPMUs[socket_])
7378     {
7379         result += *pmu.counterValue[ctr_];
7380     }
7381     return result;
7382 }
7383 
getUncoreClocks(const uint32 socket_)7384 uint64 PCM::getUncoreClocks(const uint32 socket_)
7385 {
7386     uint64 result = 0;
7387     if (socket_ < uboxPMUs.size())
7388     {
7389         result = *uboxPMUs[socket_].fixedCounterValue;
7390     }
7391     return result;
7392 }
7393 
getPCIeCounterState(const uint32 socket_,const uint32 ctr_)7394 PCIeCounterState PCM::getPCIeCounterState(const uint32 socket_, const uint32 ctr_)
7395 {
7396     PCIeCounterState result;
7397     result.data = getCBOCounterState(socket_, ctr_);
7398     return result;
7399 }
7400 
getPCIeCounterData(const uint32 socket_,const uint32 ctr_)7401 uint64 PCM::getPCIeCounterData(const uint32 socket_, const uint32 ctr_)
7402 {
7403     return getCBOCounterState(socket_, ctr_);
7404 }
7405 
initLLCReadMissLatencyEvents(uint64 * events,uint32 & opCode)7406 void PCM::initLLCReadMissLatencyEvents(uint64 * events, uint32 & opCode)
7407 {
7408     if (LLCReadMissLatencyMetricsAvailable() == false)
7409     {
7410         return;
7411     }
7412     uint64 umask = 3ULL; // MISS_OPCODE
7413     switch (cpu_model)
7414     {
7415         case ICX:
7416         case SNOWRIDGE:
7417             umask = 1ULL;
7418             break;
7419         case SKX:
7420             umask = (uint64)(SKX_CHA_TOR_INSERTS_UMASK_IRQ(1)) + (uint64)(SKX_CHA_TOR_INSERTS_UMASK_MISS(1));
7421             break;
7422     }
7423 
7424     uint64 umask_ext = 0;
7425     switch (cpu_model)
7426     {
7427         case ICX:
7428             umask_ext = 0xC817FE;
7429             break;
7430         case SNOWRIDGE:
7431             umask_ext = 0xC827FE;
7432             break;
7433     }
7434 
7435     const uint64 all_umasks = CBO_MSR_PMON_CTL_UMASK(umask) + UNC_PMON_CTL_UMASK_EXT(umask_ext);
7436     events[EventPosition::TOR_OCCUPANCY] = CBO_MSR_PMON_CTL_EVENT(0x36) + all_umasks; // TOR_OCCUPANCY (must be on counter 0)
7437     events[EventPosition::TOR_INSERTS] = CBO_MSR_PMON_CTL_EVENT(0x35) + all_umasks; // TOR_INSERTS
7438 
7439     opCode = (SKX == cpu_model) ? 0x202 : 0x182;
7440 }
7441 
programCbo()7442 void PCM::programCbo()
7443 {
7444     uint64 events[ServerUncoreCounterState::maxCounters];
7445     std::fill(events, events + ServerUncoreCounterState::maxCounters, 0);
7446     uint32 opCode = 0;
7447 
7448     initLLCReadMissLatencyEvents(events, opCode);
7449     initCHARequestEvents(events);
7450 
7451     programCbo(events, opCode);
7452 
7453     programUBOX(nullptr);
7454 }
7455 
initCHARequestEvents(uint64 * config)7456 void PCM::initCHARequestEvents(uint64 * config)
7457 {
7458     if (localMemoryRequestRatioMetricAvailable() && hasCHA())
7459     {
7460 #ifdef PCM_HA_REQUESTS_READS_ONLY
7461         // HA REQUESTS READ: LOCAL + REMOTE
7462         config[EventPosition::REQUESTS_ALL] = CBO_MSR_PMON_CTL_EVENT(0x50) + CBO_MSR_PMON_CTL_UMASK((1 + 2));
7463         // HA REQUESTS READ: LOCAL ONLY
7464         config[EventPosition::REQUESTS_LOCAL] = CBO_MSR_PMON_CTL_EVENT(0x50) + CBO_MSR_PMON_CTL_UMASK((1));
7465 #else
7466         // HA REQUESTS READ+WRITE+REMOTE+LOCAL
7467         config[EventPosition::REQUESTS_ALL] = CBO_MSR_PMON_CTL_EVENT(0x50) + CBO_MSR_PMON_CTL_UMASK((1 + 2 + 4 + 8));
7468         // HA REQUESTS READ+WRITE (LOCAL only)
7469         config[EventPosition::REQUESTS_LOCAL] = CBO_MSR_PMON_CTL_EVENT(0x50) + CBO_MSR_PMON_CTL_UMASK((1 + 4));
7470 #endif
7471     }
7472 }
7473 
CounterWidthExtender(AbstractRawCounter * raw_counter_,uint64 counter_width_,uint32 watchdog_delay_ms_)7474 CounterWidthExtender::CounterWidthExtender(AbstractRawCounter * raw_counter_, uint64 counter_width_, uint32 watchdog_delay_ms_) : raw_counter(raw_counter_), counter_width(counter_width_), watchdog_delay_ms(watchdog_delay_ms_)
7475 {
7476     last_raw_value = (*raw_counter)();
7477     extended_value = last_raw_value;
7478     //std::cout << "Initial Value " << extended_value << "\n";
7479     UpdateThread = new std::thread(
7480         [&]() {
7481         while (1)
7482         {
7483             MySleepMs(static_cast<int>(this->watchdog_delay_ms));
7484             /* uint64 dummy = */ this->read();
7485         }
7486     }
7487     );
7488 }
~CounterWidthExtender()7489 CounterWidthExtender::~CounterWidthExtender()
7490 {
7491     delete UpdateThread;
7492     if (raw_counter) delete raw_counter;
7493 }
7494 
cleanup()7495 void UncorePMU::cleanup()
7496 {
7497     for (int i = 0; i < 4; ++i)
7498     {
7499         if (counterControl[i].get()) *counterControl[i] = 0;
7500     }
7501     if (unitControl.get()) *unitControl = 0;
7502     if (fixedCounterControl.get()) *fixedCounterControl = 0;
7503 }
7504 
freeze(const uint32 extra)7505 void UncorePMU::freeze(const uint32 extra)
7506 {
7507     *unitControl = extra + UNC_PMON_UNIT_CTL_FRZ;
7508 }
7509 
unfreeze(const uint32 extra)7510 void UncorePMU::unfreeze(const uint32 extra)
7511 {
7512     *unitControl = extra;
7513 }
7514 
initFreeze(const uint32 extra,const char * xPICheckMsg)7515 bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg)
7516 {
7517     // freeze enable
7518     *unitControl = extra;
7519     if (xPICheckMsg)
7520     {
7521         if ((extra & UNC_PMON_UNIT_CTL_VALID_BITS_MASK) != ((*unitControl) & UNC_PMON_UNIT_CTL_VALID_BITS_MASK))
7522         {
7523             unitControl = nullptr;
7524             return false;
7525         }
7526     }
7527     // freeze
7528     *unitControl = extra + UNC_PMON_UNIT_CTL_FRZ;
7529 
7530 #ifdef PCM_UNCORE_PMON_BOX_CHECK_STATUS
7531     const uint64 val = *unitControl;
7532     if ((val & UNC_PMON_UNIT_CTL_VALID_BITS_MASK) != (extra + UNC_PMON_UNIT_CTL_FRZ))
7533     {
7534         std::cerr << "ERROR: PMU counter programming seems not to work. PMON_BOX_CTL=0x" << std::hex << val << " needs to be =0x" << (UNC_PMON_UNIT_CTL_FRZ_EN + UNC_PMON_UNIT_CTL_FRZ) << "\n";
7535         if (xPICheckMsg)
7536         {
7537             std::cerr << xPICheckMsg;
7538         }
7539     }
7540 #endif
7541     return true;
7542 }
7543 
resetUnfreeze(const uint32 extra)7544 void UncorePMU::resetUnfreeze(const uint32 extra)
7545 {
7546     // reset counter values
7547     *unitControl = extra + UNC_PMON_UNIT_CTL_FRZ + UNC_PMON_UNIT_CTL_RST_COUNTERS;
7548 
7549     // unfreeze counters
7550     *unitControl = extra;
7551 }
7552 
getIIOCounterState(int socket,int IIOStack,int counter)7553 IIOCounterState PCM::getIIOCounterState(int socket, int IIOStack, int counter)
7554 {
7555     IIOCounterState result;
7556     result.data = 0;
7557     if (socket < (int)iioPMUs.size() && iioPMUs[socket].count(IIOStack) > 0)
7558     {
7559         result.data = *iioPMUs[socket][IIOStack].counterValue[counter];
7560     }
7561     return result;
7562 }
7563 
getIIOCounterStates(int socket,int IIOStack,IIOCounterState * result)7564 void PCM::getIIOCounterStates(int socket, int IIOStack, IIOCounterState * result)
7565 {
7566     uint32 refCore = socketRefCore[socket];
7567     TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
7568 
7569     for (int c = 0; c < 4; ++c) {
7570         result[c] = getIIOCounterState(socket, IIOStack, c);
7571     }
7572 }
7573 
setupCustomCoreEventsForNuma(PCM::ExtendedCustomCoreEventDescription & conf) const7574 void PCM::setupCustomCoreEventsForNuma(PCM::ExtendedCustomCoreEventDescription& conf) const
7575 {
7576     switch (this->getCPUModel())
7577     {
7578     case PCM::WESTMERE_EX:
7579         // OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM:  Offcore requests satisfied by the local DRAM
7580         conf.OffcoreResponseMsrValue[0] = 0x40FF;
7581         // OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_DRAM: Offcore requests satisfied by a remote DRAM
7582         conf.OffcoreResponseMsrValue[1] = 0x20FF;
7583         break;
7584     case PCM::JAKETOWN:
7585     case PCM::IVYTOWN:
7586         // OFFCORE_RESPONSE.*.LOCAL_DRAM
7587         conf.OffcoreResponseMsrValue[0] = 0x780400000 | 0x08FFF;
7588         // OFFCORE_RESPONSE.*.REMOTE_DRAM
7589         conf.OffcoreResponseMsrValue[1] = 0x7ff800000 | 0x08FFF;
7590         break;
7591     case PCM::HASWELLX:
7592         // OFFCORE_RESPONSE.*.LOCAL_DRAM
7593         conf.OffcoreResponseMsrValue[0] = 0x600400000 | 0x08FFF;
7594         // OFFCORE_RESPONSE.*.REMOTE_DRAM
7595         conf.OffcoreResponseMsrValue[1] = 0x63f800000 | 0x08FFF;
7596         break;
7597     case PCM::BDX:
7598         // OFFCORE_RESPONSE.ALL_REQUESTS.L3_MISS.LOCAL_DRAM
7599         conf.OffcoreResponseMsrValue[0] = 0x0604008FFF;
7600         // OFFCORE_RESPONSE.ALL_REQUESTS.L3_MISS.REMOTE_DRAM
7601         conf.OffcoreResponseMsrValue[1] = 0x067BC08FFF;
7602         break;
7603     case PCM::SKX:
7604         // OFFCORE_RESPONSE.ALL_REQUESTS.L3_MISS_LOCAL_DRAM.ANY_SNOOP
7605         conf.OffcoreResponseMsrValue[0] = 0x3FC0008FFF | (1 << 26);
7606         // OFFCORE_RESPONSE.ALL_REQUESTS.L3_MISS_REMOTE_(HOP0,HOP1,HOP2P)_DRAM.ANY_SNOOP
7607         conf.OffcoreResponseMsrValue[1] = 0x3FC0008FFF | (1 << 27) | (1 << 28) | (1 << 29);
7608         break;
7609     case PCM::ICX:
7610         std::cout << "INFO: Monitored accesses include demand + L2 cache prefetcher, code read and RFO.\n";
7611         // OCR.READS_TO_CORE.LOCAL_DRAM
7612         conf.OffcoreResponseMsrValue[0] = 0x0104000477;
7613         // OCR.READS_TO_CORE.REMOTE_DRAM
7614         conf.OffcoreResponseMsrValue[1] = 0x0730000477;
7615         break;
7616     default:
7617         throw UnsupportedProcessorException();
7618     }
7619 }
7620 
7621 } // namespace pcm
7622