1 #include <stdio.h>
2 #include <dlfcn.h>
3 #include <stdint.h>
4 #include <stdlib.h>
5
pcm_getcpu()6 int pcm_getcpu()
7 {
8 int id = -1;
9 asm volatile (
10 "rdtscp\n\t"
11 "mov %%ecx, %0\n\t":
12 "=r" (id) :: "%rax", "%rcx", "%rdx");
13 // processor ID is in ECX: https://www.felixcloutier.com/x86/rdtscp
14 // Linux encodes the NUMA node starting at bit 12, so remove the NUMA
15 // bits when returning the CPU integer by masking with 0xFFF.
16 return id & 0xFFF;
17 }
18
19 struct {
20 int (*pcm_c_build_core_event)(uint8_t id, const char * argv);
21 int (*pcm_c_init)();
22 void (*pcm_c_start)();
23 void (*pcm_c_stop)();
24 uint64_t (*pcm_c_get_cycles)(uint32_t core_id);
25 uint64_t (*pcm_c_get_instr)(uint32_t core_id);
26 uint64_t (*pcm_c_get_core_event)(uint32_t core_id, uint32_t event_id);
27 } PCM;
28
29 #ifndef PCM_DYNAMIC_LIB
30 /* Library functions declaration (instead of .h file) */
31 int pcm_c_build_core_event(uint8_t, const char *);
32 int pcm_c_init();
33 void pcm_c_start();
34 void pcm_c_stop();
35 uint64_t pcm_c_get_cycles(uint32_t);
36 uint64_t pcm_c_get_instr(uint32_t);
37 uint64_t pcm_c_get_core_event(uint32_t, uint32_t);
38 #endif
39
40
main(int argc,const char * argv[])41 int main(int argc, const char *argv[])
42 {
43 int i,a[100],b[100],c[100];
44 uint32_t total = 0;
45 int lcore_id;
46
47 /* Seed for predictable rand() results */
48 srand(0);
49 for (i=0; i < 100; ++i) {
50 a[i] = rand();
51 b[i] = rand();
52 c[i] = rand();
53 }
54
55 #ifdef PCM_DYNAMIC_LIB
56 void * handle = dlopen("libpcm.so", RTLD_LAZY);
57 if(!handle) {
58 printf("Abort: could not (dynamically) load shared library \n");
59 return -1;
60 }
61
62 PCM.pcm_c_build_core_event = (int (*)(uint8_t, const char *)) dlsym(handle, "pcm_c_build_core_event");
63 PCM.pcm_c_init = (int (*)()) dlsym(handle, "pcm_c_init");
64 PCM.pcm_c_start = (void (*)()) dlsym(handle, "pcm_c_start");
65 PCM.pcm_c_stop = (void (*)()) dlsym(handle, "pcm_c_stop");
66 PCM.pcm_c_get_cycles = (uint64_t (*)(uint32_t)) dlsym(handle, "pcm_c_get_cycles");
67 PCM.pcm_c_get_instr = (uint64_t (*)(uint32_t)) dlsym(handle, "pcm_c_get_instr");
68 PCM.pcm_c_get_core_event = (uint64_t (*)(uint32_t,uint32_t)) dlsym(handle, "pcm_c_get_core_event");
69 #else
70 PCM.pcm_c_build_core_event = pcm_c_build_core_event;
71 PCM.pcm_c_init = pcm_c_init;
72 PCM.pcm_c_start = pcm_c_start;
73 PCM.pcm_c_stop = pcm_c_stop;
74 PCM.pcm_c_get_cycles = pcm_c_get_cycles;
75 PCM.pcm_c_get_instr = pcm_c_get_instr;
76 PCM.pcm_c_get_core_event = pcm_c_get_core_event;
77 #endif
78
79 if(PCM.pcm_c_init == NULL || PCM.pcm_c_start == NULL || PCM.pcm_c_stop == NULL ||
80 PCM.pcm_c_get_cycles == NULL || PCM.pcm_c_get_instr == NULL ||
81 PCM.pcm_c_build_core_event == NULL)
82 return -1;
83 switch(argc-1)
84 {
85 case 4:
86 PCM.pcm_c_build_core_event(3,argv[3]);
87 case 3:
88 PCM.pcm_c_build_core_event(2,argv[2]);
89 case 2:
90 PCM.pcm_c_build_core_event(1,argv[2]);
91 case 1:
92 PCM.pcm_c_build_core_event(0,argv[1]);
93 case 0:
94 break;
95 default:
96 printf("Number of arguments are too many! exit...\n");
97 return -2;
98 }
99
100 printf("[c_example] Initializing PCM measurements:\n");
101 PCM.pcm_c_init();
102
103 printf("[c_example] Calling PCM start()\n");
104 PCM.pcm_c_start();
105 for(i=0;i<10000;i++)
106 c[i%100] = 4 * a[i%100] + b[i%100];
107 for(i=0;i<100;i++)
108 total += c[i];
109 PCM.pcm_c_stop();
110
111 printf("[c_example] PCM measurment stopped, compute result %u\n", total);
112
113 lcore_id = pcm_getcpu();
114 printf("C:%lu I:%lu, IPC:%3.2f\n",
115 PCM.pcm_c_get_cycles(lcore_id),
116 PCM.pcm_c_get_instr(lcore_id),
117 (double)PCM.pcm_c_get_instr(lcore_id)/PCM.pcm_c_get_cycles(lcore_id));
118 printf("CPU%d E0: %lu, E1: %lu, E2: %lu, E3: %lu\n",
119 lcore_id,
120 PCM.pcm_c_get_core_event(lcore_id,0),
121 PCM.pcm_c_get_core_event(lcore_id,1),
122 PCM.pcm_c_get_core_event(lcore_id,2),
123 PCM.pcm_c_get_core_event(lcore_id,3));
124
125 return 0;
126 }
127