1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <unistd.h>
6 #include <assert.h>
7 
8 #include "ppc.h"
9 #include "uarch.h"
10 #include "udev.h"
11 #include "../common/udev.h"
12 #include "../common/global.h"
13 
get_cache_info(struct cpuInfo * cpu)14 struct cache* get_cache_info(struct cpuInfo* cpu) {
15   struct cache* cach = emalloc(sizeof(struct cache));
16   init_cache_struct(cach);
17 
18   cach->L1i->size = get_l1i_cache_size(0);
19   cach->L1d->size = get_l1d_cache_size(0);
20   cach->L2->size = get_l2_cache_size(0);
21   cach->L3->size = get_l3_cache_size(0);
22 
23   if(cach->L1i->size > 0) {
24     cach->L1i->exists = true;
25     cach->L1i->num_caches = get_num_caches_by_level(cpu, 0);
26     cach->max_cache_level = 1;
27   }
28   if(cach->L1d->size > 0) {
29     cach->L1d->exists = true;
30     cach->L1d->num_caches = get_num_caches_by_level(cpu, 1);
31     cach->max_cache_level = 2;
32   }
33   if(cach->L2->size > 0) {
34     cach->L2->exists = true;
35     cach->L2->num_caches = get_num_caches_by_level(cpu, 2);
36     cach->max_cache_level = 3;
37   }
38   if(cach->L3->size > 0) {
39     cach->L3->exists = true;
40     cach->L3->num_caches = get_num_caches_by_level(cpu, 3);
41     cach->max_cache_level = 4;
42   }
43 
44   return cach;
45 }
46 
get_topology_info(struct cache * cach)47 struct topology* get_topology_info(struct cache* cach) {
48   struct topology* topo = emalloc(sizeof(struct topology));
49   init_topology_struct(topo, cach);
50 
51   // 1. Total cores detection
52   if((topo->total_cores = sysconf(_SC_NPROCESSORS_ONLN)) == -1) {
53     printWarn("sysconf(_SC_NPROCESSORS_ONLN): %s", strerror(errno));
54     topo->total_cores = 1; // fallback
55   }
56 
57   // To find physical cores, we use topo->total_cores and core_ids
58   // To find number of sockets, we use package_ids
59   int* core_ids = emalloc(sizeof(int) * topo->total_cores);
60   int* package_ids = emalloc(sizeof(int) * topo->total_cores);
61 
62   if(!fill_core_ids_from_sys(core_ids, topo->total_cores)) {
63     printWarn("fill_core_ids_from_sys failed, output may be incomplete/invalid");
64     for(int i=0; i < topo->total_cores; i++) core_ids[i] = 0;
65   }
66   if(!fill_package_ids_from_sys(package_ids, topo->total_cores)) {
67     printWarn("fill_package_ids_from_sys failed, output may be incomplete/invalid");
68     for(int i=0; i < topo->total_cores; i++) package_ids[i] = 0;
69   }
70 
71   // 2. Socket detection
72   int *package_ids_count = emalloc(sizeof(int) * topo->total_cores);
73   for(int i=0; i < topo->total_cores; i++) {
74     package_ids_count[i] = 0;
75   }
76   for(int i=0; i < topo->total_cores; i++) {
77     package_ids_count[package_ids[i]]++;
78   }
79   for(int i=0; i < topo->total_cores; i++) {
80     if(package_ids_count[i] != 0) {
81       topo->sockets++;
82     }
83   }
84 
85   // 3. Physical cores detection
86   int *core_ids_unified = emalloc(sizeof(int) * topo->total_cores);
87   for(int i=0; i < topo->total_cores; i++) {
88     core_ids_unified[i] = -1;
89   }
90   bool found = false;
91   for(int i=0; i < topo->total_cores; i++) {
92     for(int j=0; j < topo->total_cores && !found; j++) {
93       if(core_ids_unified[j] == core_ids[i]) found = true;
94     }
95     if(!found) {
96       core_ids_unified[topo->physical_cores] = core_ids[i];
97       topo->physical_cores++;
98     }
99     found = false;
100   }
101 
102   topo->physical_cores = topo->physical_cores / topo->sockets; // only count cores on one socket
103   topo->logical_cores = topo->total_cores / topo->sockets;     // only count threads on one socket
104   topo->smt_supported = topo->logical_cores / topo->physical_cores;
105 
106   free(core_ids);
107   free(package_ids);
108   free(package_ids_count);
109   free(core_ids_unified);
110 
111   return topo;
112 }
113 
mfpvr()114 static inline uint32_t mfpvr() {
115     uint32_t pvr;
116 
117     asm ("mfpvr %0"
118          : "=r"(pvr));
119     return pvr;
120 }
121 
get_cpu_uarch(struct cpuInfo * cpu)122 struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
123   return get_uarch_from_pvr(cpu->pvr);
124 }
125 
get_frequency_info()126 struct frequency* get_frequency_info() {
127   struct frequency* freq = emalloc(sizeof(struct frequency));
128 
129   freq->max = get_max_freq_from_file(0);
130   freq->base = get_min_freq_from_file(0);
131 
132   return freq;
133 }
134 
get_peak_performance(struct cpuInfo * cpu,struct topology * topo,int64_t freq)135 int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) {
136   /*
137    * Not sure about this
138    * PP(SP) = N_CORES * FREQUENCY * 4(If altivec)
139    */
140 
141   //First check we have consistent data
142   if(freq == UNKNOWN_FREQ) {
143     return -1;
144   }
145 
146   struct features* feat = cpu->feat;
147   int64_t flops = topo->physical_cores * topo->sockets * (freq * 1000000);
148   if(feat->altivec) flops = flops * 4;
149 
150   // POWER9 has the concept called "slices". Each SMT4 core has two super-slices,
151   // and each super-slice is capable of doing two FLOPS per cycle. In the case of
152   // SMT8, it has 4 super-slices, thus four FLOPS per cycle.
153   if(is_power9(cpu->arch)) {
154     int threads_per_core = topo->logical_cores / topo->physical_cores;
155     flops = flops * (threads_per_core / 2);
156   }
157 
158   return flops;
159 }
160 
get_cpu_info()161 struct cpuInfo* get_cpu_info() {
162   struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
163   struct features* feat = emalloc(sizeof(struct features));
164   cpu->feat = feat;
165 
166   bool *ptr = &(feat->AES);
167   for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
168     *ptr = false;
169   }
170 
171   int len;
172   char* path = emalloc(sizeof(char) * (strlen(_PATH_DT) + strlen(_PATH_DT_PART) + 1));
173   sprintf(path, "%s%s", _PATH_DT, _PATH_DT_PART);
174 
175   cpu->cpu_name = read_file(path, &len);
176   cpu->pvr = mfpvr();
177   cpu->arch = get_cpu_uarch(cpu);
178   cpu->freq = get_frequency_info();
179   cpu->topo = get_topology_info(cpu->cach);
180   cpu->cach = get_cache_info(cpu);
181   feat->altivec = has_altivec(cpu->arch);
182   cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq));
183 
184   if(cpu->cach == NULL || cpu->topo == NULL) {
185     return NULL;
186   }
187   return cpu;
188 }
189 
get_str_altivec(struct cpuInfo * cpu)190 char* get_str_altivec(struct cpuInfo* cpu) {
191   char* string = ecalloc(4, sizeof(char));
192 
193   if(cpu->feat->altivec) strcpy(string, "Yes");
194   else strcpy(string, "No");
195 
196   return string;
197 }
198 
get_str_topology(struct topology * topo,bool dual_socket)199 char* get_str_topology(struct topology* topo, bool dual_socket) {
200   char* string;
201   if(topo->smt_supported > 1) {
202     uint32_t size = 3+3+17+1;
203     string = emalloc(sizeof(char)*size);
204     if(dual_socket)
205       snprintf(string, size, "%d cores (%d threads)", topo->physical_cores * topo->sockets, topo->logical_cores * topo->sockets);
206     else
207       snprintf(string, size, "%d cores (%d threads)",topo->physical_cores,topo->logical_cores);
208   }
209   else {
210     uint32_t size = 3+7+1;
211     string = emalloc(sizeof(char)*size);
212     if(dual_socket)
213       snprintf(string, size, "%d cores",topo->physical_cores * topo->sockets);
214     else
215       snprintf(string, size, "%d cores",topo->physical_cores);
216   }
217   return string;
218 }
219 
220 
print_debug(struct cpuInfo * cpu)221 void print_debug(struct cpuInfo* cpu) {
222   printf("PVR: 0x%.8X\n", cpu->pvr);
223 }
224