1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <unistd.h>
6 #include <assert.h>
7
8 #include "ppc.h"
9 #include "uarch.h"
10 #include "udev.h"
11 #include "../common/udev.h"
12 #include "../common/global.h"
13
get_cache_info(struct cpuInfo * cpu)14 struct cache* get_cache_info(struct cpuInfo* cpu) {
15 struct cache* cach = emalloc(sizeof(struct cache));
16 init_cache_struct(cach);
17
18 cach->L1i->size = get_l1i_cache_size(0);
19 cach->L1d->size = get_l1d_cache_size(0);
20 cach->L2->size = get_l2_cache_size(0);
21 cach->L3->size = get_l3_cache_size(0);
22
23 if(cach->L1i->size > 0) {
24 cach->L1i->exists = true;
25 cach->L1i->num_caches = get_num_caches_by_level(cpu, 0);
26 cach->max_cache_level = 1;
27 }
28 if(cach->L1d->size > 0) {
29 cach->L1d->exists = true;
30 cach->L1d->num_caches = get_num_caches_by_level(cpu, 1);
31 cach->max_cache_level = 2;
32 }
33 if(cach->L2->size > 0) {
34 cach->L2->exists = true;
35 cach->L2->num_caches = get_num_caches_by_level(cpu, 2);
36 cach->max_cache_level = 3;
37 }
38 if(cach->L3->size > 0) {
39 cach->L3->exists = true;
40 cach->L3->num_caches = get_num_caches_by_level(cpu, 3);
41 cach->max_cache_level = 4;
42 }
43
44 return cach;
45 }
46
get_topology_info(struct cache * cach)47 struct topology* get_topology_info(struct cache* cach) {
48 struct topology* topo = emalloc(sizeof(struct topology));
49 init_topology_struct(topo, cach);
50
51 // 1. Total cores detection
52 if((topo->total_cores = sysconf(_SC_NPROCESSORS_ONLN)) == -1) {
53 printWarn("sysconf(_SC_NPROCESSORS_ONLN): %s", strerror(errno));
54 topo->total_cores = 1; // fallback
55 }
56
57 // To find physical cores, we use topo->total_cores and core_ids
58 // To find number of sockets, we use package_ids
59 int* core_ids = emalloc(sizeof(int) * topo->total_cores);
60 int* package_ids = emalloc(sizeof(int) * topo->total_cores);
61
62 if(!fill_core_ids_from_sys(core_ids, topo->total_cores)) {
63 printWarn("fill_core_ids_from_sys failed, output may be incomplete/invalid");
64 for(int i=0; i < topo->total_cores; i++) core_ids[i] = 0;
65 }
66 if(!fill_package_ids_from_sys(package_ids, topo->total_cores)) {
67 printWarn("fill_package_ids_from_sys failed, output may be incomplete/invalid");
68 for(int i=0; i < topo->total_cores; i++) package_ids[i] = 0;
69 }
70
71 // 2. Socket detection
72 int *package_ids_count = emalloc(sizeof(int) * topo->total_cores);
73 for(int i=0; i < topo->total_cores; i++) {
74 package_ids_count[i] = 0;
75 }
76 for(int i=0; i < topo->total_cores; i++) {
77 package_ids_count[package_ids[i]]++;
78 }
79 for(int i=0; i < topo->total_cores; i++) {
80 if(package_ids_count[i] != 0) {
81 topo->sockets++;
82 }
83 }
84
85 // 3. Physical cores detection
86 int *core_ids_unified = emalloc(sizeof(int) * topo->total_cores);
87 for(int i=0; i < topo->total_cores; i++) {
88 core_ids_unified[i] = -1;
89 }
90 bool found = false;
91 for(int i=0; i < topo->total_cores; i++) {
92 for(int j=0; j < topo->total_cores && !found; j++) {
93 if(core_ids_unified[j] == core_ids[i]) found = true;
94 }
95 if(!found) {
96 core_ids_unified[topo->physical_cores] = core_ids[i];
97 topo->physical_cores++;
98 }
99 found = false;
100 }
101
102 topo->physical_cores = topo->physical_cores / topo->sockets; // only count cores on one socket
103 topo->logical_cores = topo->total_cores / topo->sockets; // only count threads on one socket
104 topo->smt_supported = topo->logical_cores / topo->physical_cores;
105
106 free(core_ids);
107 free(package_ids);
108 free(package_ids_count);
109 free(core_ids_unified);
110
111 return topo;
112 }
113
mfpvr()114 static inline uint32_t mfpvr() {
115 uint32_t pvr;
116
117 asm ("mfpvr %0"
118 : "=r"(pvr));
119 return pvr;
120 }
121
get_cpu_uarch(struct cpuInfo * cpu)122 struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
123 return get_uarch_from_pvr(cpu->pvr);
124 }
125
get_frequency_info()126 struct frequency* get_frequency_info() {
127 struct frequency* freq = emalloc(sizeof(struct frequency));
128
129 freq->max = get_max_freq_from_file(0);
130 freq->base = get_min_freq_from_file(0);
131
132 return freq;
133 }
134
get_peak_performance(struct cpuInfo * cpu,struct topology * topo,int64_t freq)135 int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) {
136 /*
137 * Not sure about this
138 * PP(SP) = N_CORES * FREQUENCY * 4(If altivec)
139 */
140
141 //First check we have consistent data
142 if(freq == UNKNOWN_FREQ) {
143 return -1;
144 }
145
146 struct features* feat = cpu->feat;
147 int64_t flops = topo->physical_cores * topo->sockets * (freq * 1000000);
148 if(feat->altivec) flops = flops * 4;
149
150 // POWER9 has the concept called "slices". Each SMT4 core has two super-slices,
151 // and each super-slice is capable of doing two FLOPS per cycle. In the case of
152 // SMT8, it has 4 super-slices, thus four FLOPS per cycle.
153 if(is_power9(cpu->arch)) {
154 int threads_per_core = topo->logical_cores / topo->physical_cores;
155 flops = flops * (threads_per_core / 2);
156 }
157
158 return flops;
159 }
160
get_cpu_info()161 struct cpuInfo* get_cpu_info() {
162 struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
163 struct features* feat = emalloc(sizeof(struct features));
164 cpu->feat = feat;
165
166 bool *ptr = &(feat->AES);
167 for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
168 *ptr = false;
169 }
170
171 int len;
172 char* path = emalloc(sizeof(char) * (strlen(_PATH_DT) + strlen(_PATH_DT_PART) + 1));
173 sprintf(path, "%s%s", _PATH_DT, _PATH_DT_PART);
174
175 cpu->cpu_name = read_file(path, &len);
176 cpu->pvr = mfpvr();
177 cpu->arch = get_cpu_uarch(cpu);
178 cpu->freq = get_frequency_info();
179 cpu->topo = get_topology_info(cpu->cach);
180 cpu->cach = get_cache_info(cpu);
181 feat->altivec = has_altivec(cpu->arch);
182 cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq));
183
184 if(cpu->cach == NULL || cpu->topo == NULL) {
185 return NULL;
186 }
187 return cpu;
188 }
189
get_str_altivec(struct cpuInfo * cpu)190 char* get_str_altivec(struct cpuInfo* cpu) {
191 char* string = ecalloc(4, sizeof(char));
192
193 if(cpu->feat->altivec) strcpy(string, "Yes");
194 else strcpy(string, "No");
195
196 return string;
197 }
198
get_str_topology(struct topology * topo,bool dual_socket)199 char* get_str_topology(struct topology* topo, bool dual_socket) {
200 char* string;
201 if(topo->smt_supported > 1) {
202 uint32_t size = 3+3+17+1;
203 string = emalloc(sizeof(char)*size);
204 if(dual_socket)
205 snprintf(string, size, "%d cores (%d threads)", topo->physical_cores * topo->sockets, topo->logical_cores * topo->sockets);
206 else
207 snprintf(string, size, "%d cores (%d threads)",topo->physical_cores,topo->logical_cores);
208 }
209 else {
210 uint32_t size = 3+7+1;
211 string = emalloc(sizeof(char)*size);
212 if(dual_socket)
213 snprintf(string, size, "%d cores",topo->physical_cores * topo->sockets);
214 else
215 snprintf(string, size, "%d cores",topo->physical_cores);
216 }
217 return string;
218 }
219
220
print_debug(struct cpuInfo * cpu)221 void print_debug(struct cpuInfo* cpu) {
222 printf("PVR: 0x%.8X\n", cpu->pvr);
223 }
224