1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <assert.h>
5 #include <stdbool.h>
6 #include <errno.h>
7 
8 #ifdef __linux__
9   #include <sys/auxv.h>
10   #include <asm/hwcap.h>
11 #elif defined __APPLE__ || __MACH__
12   #include "sysctl.h"
13   // From Linux kernel: arch/arm64/include/asm/cputype.h
14   #define MIDR_APPLE_M1_ICESTORM  0x610F0220
15   #define MIDR_APPLE_M1_FIRESTORM 0x610F0230
16   #ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM
17     #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3
18   #endif
19 #endif
20 
21 #include "../common/global.h"
22 #include "udev.h"
23 #include "midr.h"
24 #include "uarch.h"
25 #include "soc.h"
26 
get_cache_info(struct cpuInfo * cpu)27 struct cache* get_cache_info(struct cpuInfo* cpu) {
28   struct cache* cach = emalloc(sizeof(struct cache));
29   init_cache_struct(cach);
30 
31   cach->max_cache_level = 2;
32   for(int i=0; i < cach->max_cache_level + 1; i++) {
33     cach->cach_arr[i]->exists = true;
34     cach->cach_arr[i]->num_caches = 1;
35     cach->cach_arr[i]->size = 0;
36   }
37 
38   return cach;
39 }
40 
get_frequency_info(uint32_t core)41 struct frequency* get_frequency_info(uint32_t core) {
42   struct frequency* freq = emalloc(sizeof(struct frequency));
43 
44   freq->base = UNKNOWN_FREQ;
45   freq->max = get_max_freq_from_file(core);
46 
47   return freq;
48 }
49 
get_topology_info(struct cpuInfo * cpu,struct cache * cach,uint32_t * midr_array,int socket_idx,int ncores)50 struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, uint32_t* midr_array, int socket_idx, int ncores) {
51   struct topology* topo = emalloc(sizeof(struct topology));
52   init_topology_struct(topo, cach);
53 
54   int sockets_seen = 0;
55   int first_core_idx = 0;
56   int currrent_core_idx = 0;
57   int cores_in_socket = 0;
58 
59   while(socket_idx + 1 > sockets_seen) {
60     if(currrent_core_idx < ncores && midr_array[first_core_idx] == midr_array[currrent_core_idx]) {
61       currrent_core_idx++;
62       cores_in_socket++;
63     }
64     else {
65       topo->total_cores = cores_in_socket;
66       cores_in_socket = 0;
67       first_core_idx = currrent_core_idx;
68       sockets_seen++;
69     }
70   }
71 
72   return topo;
73 }
74 
get_peak_performance(struct cpuInfo * cpu)75 int64_t get_peak_performance(struct cpuInfo* cpu) {
76   struct cpuInfo* ptr = cpu;
77 
78   //First check we have consistent data
79   for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
80     if(get_freq(ptr->freq) == UNKNOWN_FREQ) {
81       return -1;
82     }
83   }
84 
85   int64_t flops = 0;
86 
87   ptr = cpu;
88   for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
89     flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
90   }
91   if(cpu->feat->NEON) flops = flops * 4;
92 
93   return flops;
94 }
95 
cores_are_equal(int c1pos,int c2pos,uint32_t * midr_array,int32_t * freq_array)96 bool cores_are_equal(int c1pos, int c2pos, uint32_t* midr_array, int32_t* freq_array) {
97   return midr_array[c1pos] == midr_array[c2pos] && freq_array[c1pos] == freq_array[c2pos];
98 }
99 
fill_ids_from_midr(uint32_t * midr_array,int32_t * freq_array,uint32_t * ids_array,int len)100 uint32_t fill_ids_from_midr(uint32_t* midr_array, int32_t* freq_array, uint32_t* ids_array, int len) {
101   uint32_t latest_id = 0;
102   bool found;
103   ids_array[0] = latest_id;
104 
105   for (int i = 1; i < len; i++) {
106     int j = 0;
107     found = false;
108 
109     for (j = 0; j < len && !found; j++) {
110       if (i != j && cores_are_equal(i, j, midr_array, freq_array)) {
111         if(j > i) {
112           latest_id++;
113           ids_array[i] = latest_id;
114         }
115         else {
116           ids_array[i] = ids_array[j];
117         }
118         found = true;
119       }
120     }
121     if(!found) {
122       latest_id++;
123       ids_array[i] = latest_id;
124     }
125   }
126 
127   return latest_id+1;
128 }
129 
init_cpu_info(struct cpuInfo * cpu)130 void init_cpu_info(struct cpuInfo* cpu) {
131   cpu->next_cpu = NULL;
132 }
133 
134 // We assume all cpus share the same hardware
135 // capabilities but I'm not sure it is always
136 // true...
137 // ARM32 https://elixir.bootlin.com/linux/latest/source/arch/arm/include/uapi/asm/hwcap.h
138 // ARM64 https://elixir.bootlin.com/linux/latest/source/arch/arm64/include/uapi/asm/hwcap.h
get_features_info()139 struct features* get_features_info() {
140   struct features* feat = emalloc(sizeof(struct features));
141   bool *ptr = &(feat->AES);
142   for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
143     *ptr = false;
144   }
145 
146 #ifdef __linux__
147   errno = 0;
148   long hwcaps = getauxval(AT_HWCAP);
149 
150   if(errno == ENOENT) {
151     printWarn("Unable to retrieve AT_HWCAP using getauxval");
152   }
153 #ifdef __aarch64__
154   else {
155     feat->AES = hwcaps & HWCAP_AES;
156     feat->CRC32 = hwcaps & HWCAP_CRC32;
157     feat->SHA1 = hwcaps & HWCAP_SHA1;
158     feat->SHA2 = hwcaps & HWCAP_SHA2;
159     feat->NEON = hwcaps & HWCAP_ASIMD;
160   }
161 #else
162   else {
163     feat->NEON = hwcaps & HWCAP_NEON;
164   }
165 
166   hwcaps = getauxval(AT_HWCAP2);
167   if(errno == ENOENT) {
168     printWarn("Unable to retrieve AT_HWCAP2 using getauxval");
169   }
170   else {
171     feat->AES = hwcaps & HWCAP2_AES;
172     feat->CRC32 = hwcaps & HWCAP2_CRC32;
173     feat->SHA1 = hwcaps & HWCAP2_SHA1;
174     feat->SHA2 = hwcaps & HWCAP2_SHA2;
175   }
176 #endif // ifdef __aarch64__
177 #elif defined __APPLE__ || __MACH__
178   // Must be M1
179   feat->AES = true;
180   feat->CRC32 = true;
181   feat->SHA1 = true;
182   feat->SHA2 = true;
183   feat->NEON = true;
184 #endif  // ifdef __linux__
185 
186   return feat;
187 }
188 
189 #ifdef __linux__
get_cpu_info_linux(struct cpuInfo * cpu)190 struct cpuInfo* get_cpu_info_linux(struct cpuInfo* cpu) {
191   init_cpu_info(cpu);
192   int ncores = get_ncores_from_cpuinfo();
193   bool success = false;
194   int32_t* freq_array = emalloc(sizeof(uint32_t) * ncores);
195   uint32_t* midr_array = emalloc(sizeof(uint32_t) * ncores);
196   uint32_t* ids_array = emalloc(sizeof(uint32_t) * ncores);
197 
198   for(int i=0; i < ncores; i++) {
199     midr_array[i] = get_midr_from_cpuinfo(i, &success);
200 
201     if(!success) {
202       printWarn("Unable to fetch MIDR for core %d. This is probably because the core is offline", i);
203       midr_array[i] = midr_array[0];
204     }
205 
206     freq_array[i] = get_max_freq_from_file(i);
207     if(freq_array[i] == UNKNOWN_FREQ) {
208       printWarn("Unable to fetch max frequency for core %d. This is probably because the core is offline", i);
209       freq_array[i] = freq_array[0];
210     }
211   }
212   uint32_t sockets = fill_ids_from_midr(midr_array, freq_array, ids_array, ncores);
213 
214   struct cpuInfo* ptr = cpu;
215   int midr_idx = 0;
216   int tmp_midr_idx = 0;
217   for(uint32_t i=0; i < sockets; i++) {
218     if(i > 0) {
219       ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
220       ptr = ptr->next_cpu;
221       init_cpu_info(ptr);
222 
223       tmp_midr_idx = midr_idx;
224       while(cores_are_equal(midr_idx, tmp_midr_idx, midr_array, freq_array)) tmp_midr_idx++;
225       midr_idx = tmp_midr_idx;
226     }
227 
228     ptr->midr = midr_array[midr_idx];
229     ptr->arch = get_uarch_from_midr(ptr->midr, ptr);
230 
231     ptr->feat = get_features_info();
232     ptr->freq = get_frequency_info(midr_idx);
233     ptr->cach = get_cache_info(ptr);
234     ptr->topo = get_topology_info(ptr, ptr->cach, midr_array, i, ncores);
235   }
236 
237   cpu->num_cpus = sockets;
238   cpu->hv = emalloc(sizeof(struct hypervisor));
239   cpu->hv->present = false;
240   cpu->soc = get_soc();
241   cpu->peak_performance = get_peak_performance(cpu);
242 
243   return cpu;
244 }
245 
246 #elif defined __APPLE__ || __MACH__
fill_cpu_info_firestorm_icestorm(struct cpuInfo * cpu)247 void fill_cpu_info_firestorm_icestorm(struct cpuInfo* cpu) {
248   // 1. Fill ICESTORM
249   struct cpuInfo* ice = cpu;
250 
251   ice->midr = MIDR_APPLE_M1_ICESTORM;
252   ice->arch = get_uarch_from_midr(ice->midr, ice);
253   ice->cach = get_cache_info(ice);
254   ice->feat = get_features_info();
255   ice->topo = malloc(sizeof(struct topology));
256   ice->topo->cach = ice->cach;
257   ice->topo->total_cores = 4;
258   ice->freq = malloc(sizeof(struct frequency));
259   ice->freq->base = UNKNOWN_FREQ;
260   ice->freq->max = 2064;
261   ice->hv = malloc(sizeof(struct hypervisor));
262   ice->hv->present = false;
263   ice->next_cpu = malloc(sizeof(struct cpuInfo));
264 
265   // 2. Fill FIRESTORM
266   struct cpuInfo* fire = ice->next_cpu;
267   fire->midr = MIDR_APPLE_M1_FIRESTORM;
268   fire->arch = get_uarch_from_midr(fire->midr, fire);
269   fire->cach = get_cache_info(fire);
270   fire->feat = get_features_info();
271   fire->topo = malloc(sizeof(struct topology));
272   fire->topo->cach = fire->cach;
273   fire->topo->total_cores = 4;
274   fire->freq = malloc(sizeof(struct frequency));
275   fire->freq->base = UNKNOWN_FREQ;
276   fire->freq->max = 3200;
277   fire->hv = malloc(sizeof(struct hypervisor));
278   fire->hv->present = false;
279   fire->next_cpu = NULL;
280 }
281 
get_cpu_info_mach(struct cpuInfo * cpu)282 struct cpuInfo* get_cpu_info_mach(struct cpuInfo* cpu) {
283   uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
284 
285   // Manually fill the cpuInfo assuming that the CPU
286   // is a ARM_FIRESTORM_ICESTORM (Apple M1)
287   if(cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) {
288     cpu->num_cpus = 2;
289     cpu->soc = get_soc();
290     fill_cpu_info_firestorm_icestorm(cpu);
291     cpu->peak_performance = get_peak_performance(cpu);
292   }
293   else {
294     printBug("Found invalid cpu_family: 0x%.8X", cpu_family);
295     return NULL;
296   }
297 
298   return cpu;
299 }
300 #endif
301 
get_cpu_info()302 struct cpuInfo* get_cpu_info() {
303   struct cpuInfo* cpu = malloc(sizeof(struct cpuInfo));
304   init_cpu_info(cpu);
305 
306   #ifdef __linux__
307     return get_cpu_info_linux(cpu);
308   #elif defined __APPLE__ || __MACH__
309     return get_cpu_info_mach(cpu);
310   #endif
311 }
312 
get_str_topology(struct cpuInfo * cpu,struct topology * topo,bool dual_socket)313 char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket) {
314   uint32_t size = 3+7+1;
315   char*  string = emalloc(sizeof(char)*size);
316   snprintf(string, size, "%d cores", topo->total_cores);
317 
318   return string;
319 }
320 
get_str_features(struct cpuInfo * cpu)321 char* get_str_features(struct cpuInfo* cpu) {
322   struct features* feat = cpu->feat;
323   uint32_t max_len = strlen("NEON,SHA1,SHA2,AES,CRC32,") + 1;
324   uint32_t len = 0;
325   char* string = ecalloc(max_len, sizeof(char));
326 
327   if(feat->NEON) {
328     strcat(string, "NEON,");
329     len += 5;
330   }
331   if(feat->SHA1) {
332     strcat(string, "SHA1,");
333     len += 5;
334   }
335   if(feat->SHA2) {
336     strcat(string, "SHA2,");
337     len += 5;
338   }
339   if(feat->AES) {
340     strcat(string, "AES,");
341     len += 4;
342   }
343   if(feat->CRC32) {
344     strcat(string, "CRC32,");
345     len += 6;
346   }
347 
348   if(len > 0) {
349     string[len-1] = '\0';
350     return string;
351   }
352   else
353     return NULL;
354 }
355 
print_debug(struct cpuInfo * cpu)356 void print_debug(struct cpuInfo* cpu) {
357   int ncores = get_ncores_from_cpuinfo();
358   bool success = false;
359 
360   for(int i=0; i < ncores; i++) {
361     printf("[Core %d] ", i);
362     long freq = get_max_freq_from_file(i);
363     uint32_t midr = get_midr_from_cpuinfo(i, &success);
364     if(!success) {
365       printWarn("Unable to fetch MIDR for core %d. This is probably because the core is offline", i);
366       printf("0x%.8X ", get_midr_from_cpuinfo(0, &success));
367     }
368     else {
369       printf("0x%.8X ", midr);
370     }
371     if(freq == UNKNOWN_FREQ) {
372       printWarn("Unable to fetch max frequency for core %d. This is probably because the core is offline", i);
373       printf("%ld MHz\n", get_max_freq_from_file(0));
374     }
375     else {
376       printf("%ld MHz\n", freq);
377     }
378   }
379 }
380 
free_topo_struct(struct topology * topo)381 void free_topo_struct(struct topology* topo) {
382   free(topo);
383 }
384