1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <assert.h>
5 #include <stdbool.h>
6 #include <errno.h>
7
8 #ifdef __linux__
9 #include <sys/auxv.h>
10 #include <asm/hwcap.h>
11 #elif defined __APPLE__ || __MACH__
12 #include "sysctl.h"
13 // From Linux kernel: arch/arm64/include/asm/cputype.h
14 #define MIDR_APPLE_M1_ICESTORM 0x610F0220
15 #define MIDR_APPLE_M1_FIRESTORM 0x610F0230
16 #ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM
17 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3
18 #endif
19 #endif
20
21 #include "../common/global.h"
22 #include "udev.h"
23 #include "midr.h"
24 #include "uarch.h"
25 #include "soc.h"
26
get_cache_info(struct cpuInfo * cpu)27 struct cache* get_cache_info(struct cpuInfo* cpu) {
28 struct cache* cach = emalloc(sizeof(struct cache));
29 init_cache_struct(cach);
30
31 cach->max_cache_level = 2;
32 for(int i=0; i < cach->max_cache_level + 1; i++) {
33 cach->cach_arr[i]->exists = true;
34 cach->cach_arr[i]->num_caches = 1;
35 cach->cach_arr[i]->size = 0;
36 }
37
38 return cach;
39 }
40
get_frequency_info(uint32_t core)41 struct frequency* get_frequency_info(uint32_t core) {
42 struct frequency* freq = emalloc(sizeof(struct frequency));
43
44 freq->base = UNKNOWN_FREQ;
45 freq->max = get_max_freq_from_file(core);
46
47 return freq;
48 }
49
get_topology_info(struct cpuInfo * cpu,struct cache * cach,uint32_t * midr_array,int socket_idx,int ncores)50 struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, uint32_t* midr_array, int socket_idx, int ncores) {
51 struct topology* topo = emalloc(sizeof(struct topology));
52 init_topology_struct(topo, cach);
53
54 int sockets_seen = 0;
55 int first_core_idx = 0;
56 int currrent_core_idx = 0;
57 int cores_in_socket = 0;
58
59 while(socket_idx + 1 > sockets_seen) {
60 if(currrent_core_idx < ncores && midr_array[first_core_idx] == midr_array[currrent_core_idx]) {
61 currrent_core_idx++;
62 cores_in_socket++;
63 }
64 else {
65 topo->total_cores = cores_in_socket;
66 cores_in_socket = 0;
67 first_core_idx = currrent_core_idx;
68 sockets_seen++;
69 }
70 }
71
72 return topo;
73 }
74
get_peak_performance(struct cpuInfo * cpu)75 int64_t get_peak_performance(struct cpuInfo* cpu) {
76 struct cpuInfo* ptr = cpu;
77
78 //First check we have consistent data
79 for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
80 if(get_freq(ptr->freq) == UNKNOWN_FREQ) {
81 return -1;
82 }
83 }
84
85 int64_t flops = 0;
86
87 ptr = cpu;
88 for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
89 flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
90 }
91 if(cpu->feat->NEON) flops = flops * 4;
92
93 return flops;
94 }
95
cores_are_equal(int c1pos,int c2pos,uint32_t * midr_array,int32_t * freq_array)96 bool cores_are_equal(int c1pos, int c2pos, uint32_t* midr_array, int32_t* freq_array) {
97 return midr_array[c1pos] == midr_array[c2pos] && freq_array[c1pos] == freq_array[c2pos];
98 }
99
fill_ids_from_midr(uint32_t * midr_array,int32_t * freq_array,uint32_t * ids_array,int len)100 uint32_t fill_ids_from_midr(uint32_t* midr_array, int32_t* freq_array, uint32_t* ids_array, int len) {
101 uint32_t latest_id = 0;
102 bool found;
103 ids_array[0] = latest_id;
104
105 for (int i = 1; i < len; i++) {
106 int j = 0;
107 found = false;
108
109 for (j = 0; j < len && !found; j++) {
110 if (i != j && cores_are_equal(i, j, midr_array, freq_array)) {
111 if(j > i) {
112 latest_id++;
113 ids_array[i] = latest_id;
114 }
115 else {
116 ids_array[i] = ids_array[j];
117 }
118 found = true;
119 }
120 }
121 if(!found) {
122 latest_id++;
123 ids_array[i] = latest_id;
124 }
125 }
126
127 return latest_id+1;
128 }
129
init_cpu_info(struct cpuInfo * cpu)130 void init_cpu_info(struct cpuInfo* cpu) {
131 cpu->next_cpu = NULL;
132 }
133
134 // We assume all cpus share the same hardware
135 // capabilities but I'm not sure it is always
136 // true...
137 // ARM32 https://elixir.bootlin.com/linux/latest/source/arch/arm/include/uapi/asm/hwcap.h
138 // ARM64 https://elixir.bootlin.com/linux/latest/source/arch/arm64/include/uapi/asm/hwcap.h
get_features_info()139 struct features* get_features_info() {
140 struct features* feat = emalloc(sizeof(struct features));
141 bool *ptr = &(feat->AES);
142 for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
143 *ptr = false;
144 }
145
146 #ifdef __linux__
147 errno = 0;
148 long hwcaps = getauxval(AT_HWCAP);
149
150 if(errno == ENOENT) {
151 printWarn("Unable to retrieve AT_HWCAP using getauxval");
152 }
153 #ifdef __aarch64__
154 else {
155 feat->AES = hwcaps & HWCAP_AES;
156 feat->CRC32 = hwcaps & HWCAP_CRC32;
157 feat->SHA1 = hwcaps & HWCAP_SHA1;
158 feat->SHA2 = hwcaps & HWCAP_SHA2;
159 feat->NEON = hwcaps & HWCAP_ASIMD;
160 }
161 #else
162 else {
163 feat->NEON = hwcaps & HWCAP_NEON;
164 }
165
166 hwcaps = getauxval(AT_HWCAP2);
167 if(errno == ENOENT) {
168 printWarn("Unable to retrieve AT_HWCAP2 using getauxval");
169 }
170 else {
171 feat->AES = hwcaps & HWCAP2_AES;
172 feat->CRC32 = hwcaps & HWCAP2_CRC32;
173 feat->SHA1 = hwcaps & HWCAP2_SHA1;
174 feat->SHA2 = hwcaps & HWCAP2_SHA2;
175 }
176 #endif // ifdef __aarch64__
177 #elif defined __APPLE__ || __MACH__
178 // Must be M1
179 feat->AES = true;
180 feat->CRC32 = true;
181 feat->SHA1 = true;
182 feat->SHA2 = true;
183 feat->NEON = true;
184 #endif // ifdef __linux__
185
186 return feat;
187 }
188
189 #ifdef __linux__
get_cpu_info_linux(struct cpuInfo * cpu)190 struct cpuInfo* get_cpu_info_linux(struct cpuInfo* cpu) {
191 init_cpu_info(cpu);
192 int ncores = get_ncores_from_cpuinfo();
193 bool success = false;
194 int32_t* freq_array = emalloc(sizeof(uint32_t) * ncores);
195 uint32_t* midr_array = emalloc(sizeof(uint32_t) * ncores);
196 uint32_t* ids_array = emalloc(sizeof(uint32_t) * ncores);
197
198 for(int i=0; i < ncores; i++) {
199 midr_array[i] = get_midr_from_cpuinfo(i, &success);
200
201 if(!success) {
202 printWarn("Unable to fetch MIDR for core %d. This is probably because the core is offline", i);
203 midr_array[i] = midr_array[0];
204 }
205
206 freq_array[i] = get_max_freq_from_file(i);
207 if(freq_array[i] == UNKNOWN_FREQ) {
208 printWarn("Unable to fetch max frequency for core %d. This is probably because the core is offline", i);
209 freq_array[i] = freq_array[0];
210 }
211 }
212 uint32_t sockets = fill_ids_from_midr(midr_array, freq_array, ids_array, ncores);
213
214 struct cpuInfo* ptr = cpu;
215 int midr_idx = 0;
216 int tmp_midr_idx = 0;
217 for(uint32_t i=0; i < sockets; i++) {
218 if(i > 0) {
219 ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
220 ptr = ptr->next_cpu;
221 init_cpu_info(ptr);
222
223 tmp_midr_idx = midr_idx;
224 while(cores_are_equal(midr_idx, tmp_midr_idx, midr_array, freq_array)) tmp_midr_idx++;
225 midr_idx = tmp_midr_idx;
226 }
227
228 ptr->midr = midr_array[midr_idx];
229 ptr->arch = get_uarch_from_midr(ptr->midr, ptr);
230
231 ptr->feat = get_features_info();
232 ptr->freq = get_frequency_info(midr_idx);
233 ptr->cach = get_cache_info(ptr);
234 ptr->topo = get_topology_info(ptr, ptr->cach, midr_array, i, ncores);
235 }
236
237 cpu->num_cpus = sockets;
238 cpu->hv = emalloc(sizeof(struct hypervisor));
239 cpu->hv->present = false;
240 cpu->soc = get_soc();
241 cpu->peak_performance = get_peak_performance(cpu);
242
243 return cpu;
244 }
245
246 #elif defined __APPLE__ || __MACH__
fill_cpu_info_firestorm_icestorm(struct cpuInfo * cpu)247 void fill_cpu_info_firestorm_icestorm(struct cpuInfo* cpu) {
248 // 1. Fill ICESTORM
249 struct cpuInfo* ice = cpu;
250
251 ice->midr = MIDR_APPLE_M1_ICESTORM;
252 ice->arch = get_uarch_from_midr(ice->midr, ice);
253 ice->cach = get_cache_info(ice);
254 ice->feat = get_features_info();
255 ice->topo = malloc(sizeof(struct topology));
256 ice->topo->cach = ice->cach;
257 ice->topo->total_cores = 4;
258 ice->freq = malloc(sizeof(struct frequency));
259 ice->freq->base = UNKNOWN_FREQ;
260 ice->freq->max = 2064;
261 ice->hv = malloc(sizeof(struct hypervisor));
262 ice->hv->present = false;
263 ice->next_cpu = malloc(sizeof(struct cpuInfo));
264
265 // 2. Fill FIRESTORM
266 struct cpuInfo* fire = ice->next_cpu;
267 fire->midr = MIDR_APPLE_M1_FIRESTORM;
268 fire->arch = get_uarch_from_midr(fire->midr, fire);
269 fire->cach = get_cache_info(fire);
270 fire->feat = get_features_info();
271 fire->topo = malloc(sizeof(struct topology));
272 fire->topo->cach = fire->cach;
273 fire->topo->total_cores = 4;
274 fire->freq = malloc(sizeof(struct frequency));
275 fire->freq->base = UNKNOWN_FREQ;
276 fire->freq->max = 3200;
277 fire->hv = malloc(sizeof(struct hypervisor));
278 fire->hv->present = false;
279 fire->next_cpu = NULL;
280 }
281
get_cpu_info_mach(struct cpuInfo * cpu)282 struct cpuInfo* get_cpu_info_mach(struct cpuInfo* cpu) {
283 uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
284
285 // Manually fill the cpuInfo assuming that the CPU
286 // is a ARM_FIRESTORM_ICESTORM (Apple M1)
287 if(cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) {
288 cpu->num_cpus = 2;
289 cpu->soc = get_soc();
290 fill_cpu_info_firestorm_icestorm(cpu);
291 cpu->peak_performance = get_peak_performance(cpu);
292 }
293 else {
294 printBug("Found invalid cpu_family: 0x%.8X", cpu_family);
295 return NULL;
296 }
297
298 return cpu;
299 }
300 #endif
301
get_cpu_info()302 struct cpuInfo* get_cpu_info() {
303 struct cpuInfo* cpu = malloc(sizeof(struct cpuInfo));
304 init_cpu_info(cpu);
305
306 #ifdef __linux__
307 return get_cpu_info_linux(cpu);
308 #elif defined __APPLE__ || __MACH__
309 return get_cpu_info_mach(cpu);
310 #endif
311 }
312
get_str_topology(struct cpuInfo * cpu,struct topology * topo,bool dual_socket)313 char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket) {
314 uint32_t size = 3+7+1;
315 char* string = emalloc(sizeof(char)*size);
316 snprintf(string, size, "%d cores", topo->total_cores);
317
318 return string;
319 }
320
get_str_features(struct cpuInfo * cpu)321 char* get_str_features(struct cpuInfo* cpu) {
322 struct features* feat = cpu->feat;
323 uint32_t max_len = strlen("NEON,SHA1,SHA2,AES,CRC32,") + 1;
324 uint32_t len = 0;
325 char* string = ecalloc(max_len, sizeof(char));
326
327 if(feat->NEON) {
328 strcat(string, "NEON,");
329 len += 5;
330 }
331 if(feat->SHA1) {
332 strcat(string, "SHA1,");
333 len += 5;
334 }
335 if(feat->SHA2) {
336 strcat(string, "SHA2,");
337 len += 5;
338 }
339 if(feat->AES) {
340 strcat(string, "AES,");
341 len += 4;
342 }
343 if(feat->CRC32) {
344 strcat(string, "CRC32,");
345 len += 6;
346 }
347
348 if(len > 0) {
349 string[len-1] = '\0';
350 return string;
351 }
352 else
353 return NULL;
354 }
355
print_debug(struct cpuInfo * cpu)356 void print_debug(struct cpuInfo* cpu) {
357 int ncores = get_ncores_from_cpuinfo();
358 bool success = false;
359
360 for(int i=0; i < ncores; i++) {
361 printf("[Core %d] ", i);
362 long freq = get_max_freq_from_file(i);
363 uint32_t midr = get_midr_from_cpuinfo(i, &success);
364 if(!success) {
365 printWarn("Unable to fetch MIDR for core %d. This is probably because the core is offline", i);
366 printf("0x%.8X ", get_midr_from_cpuinfo(0, &success));
367 }
368 else {
369 printf("0x%.8X ", midr);
370 }
371 if(freq == UNKNOWN_FREQ) {
372 printWarn("Unable to fetch max frequency for core %d. This is probably because the core is offline", i);
373 printf("%ld MHz\n", get_max_freq_from_file(0));
374 }
375 else {
376 printf("%ld MHz\n", freq);
377 }
378 }
379 }
380
free_topo_struct(struct topology * topo)381 void free_topo_struct(struct topology* topo) {
382 free(topo);
383 }
384