1 /*
2  * aarch64.cpp
3  *
4  *  Created on: 28 февр. 2020 г.
5  *      Author: Vladimir Sadovnikov <lsp.plugin@gmail.com>
6  *
7  * This file is part of tamgamp.lv2 <https://github.com/sadko4u/tamgamp.lv2>.
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 3 of the License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public License
20  * along with this program; if not, write to the Free Software Foundation,
21  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
22  */
23 
24 #include <dsp/dsp.h>
25 
26 #ifdef ARCH_AARCH64
27 
28 #include <string.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <errno.h>
32 
33 #include <dsp/arch/aarch64/features.h>
34 #include <dsp/arch/aarch64/fpcr.h>
35 
36 namespace aarch64
37 {
38     typedef struct cpu_part_t
39     {
40         uint32_t    id;
41         const char *name;
42     } cpu_part_t;
43 
44     typedef struct feature_t
45     {
46         uint32_t    mask;
47         const char *name;
48     } feature_t;
49 
50     static const cpu_part_t cpu_parts[] =
51     {
52         { 0xb02, "ARM11 MPCore" },
53         { 0xb36, "ARM1136" },
54         { 0xb56, "ARM1156" },
55         { 0xb76, "ARM1176" },
56 
57         { 0xc05, "Cortex-A5" },
58         { 0xc07, "Cortex-A7" },
59         { 0xc08, "Cortex-A8" },
60         { 0xc09, "Cortex-A9" },
61         { 0xc0d, "Cortex-A12" },
62         { 0xc0e, "Cortex-A17" },
63         { 0xc0f, "Cortex-A15" },
64         { 0xc14, "Cortex-R4" },
65         { 0xc15, "Cortex-R5" },
66         { 0xc17, "Cortex-R7" },
67         { 0xc18, "Cortex-R8" },
68 
69         { 0xc20, "Cortex-M0" },
70         { 0xc21, "Cortex-M1" },
71         { 0xc23, "Cortex-M3" },
72         { 0xc24, "Cortex-M4" },
73         { 0xc27, "Cortex-M7" },
74         { 0xc60, "Cortex-M0+" },
75 
76         { 0xd01, "Cortex-A32" },
77         { 0xd03, "Cortex-A53" },
78         { 0xd04, "Cortex-A35" },
79         { 0xd05, "Cortex-A55" },
80         { 0xd07, "Cortex-A57" },
81         { 0xd08, "Cortex-A72" },
82         { 0xd09, "Cortex-A73" },
83         { 0xd0a, "Cortex-A75" },
84         { 0xd13, "Cortex-R52" },
85 
86         { 0xd20, "Cortex-M23" },
87         { 0xd21, "Cortex-M33" }
88     };
89 
90     static const feature_t cpu_features[] =
91     {
92 IF_ARCH_AARCH64(
93         { HWCAP_AARCH64_FP, "FP" },
94         { HWCAP_AARCH64_ASIMD, "ASIMD" },
95         { HWCAP_AARCH64_EVTSTRM, "EVTSTRM" },
96         { HWCAP_AARCH64_AES, "AES" },
97         { HWCAP_AARCH64_PMULL, "PMULL" },
98         { HWCAP_AARCH64_SHA1, "SHA1" },
99         { HWCAP_AARCH64_SHA2, "SHA2" },
100         { HWCAP_AARCH64_CRC32, "CRC32" },
101         { HWCAP_AARCH64_ATOMICS, "ATOMICS" },
102         { HWCAP_AARCH64_FPHP, "FPHP" },
103         { HWCAP_AARCH64_ASIMDHP, "ASIMDHP" },
104         { HWCAP_AARCH64_CPUID, "CPUID" },
105         { HWCAP_AARCH64_ASIMDRDM, "ASIMDRDM" },
106         { HWCAP_AARCH64_JSCVT, "JSCVT" },
107         { HWCAP_AARCH64_FCMA, "FCMA" },
108         { HWCAP_AARCH64_LRCPC, "LSCPC" },
109         { HWCAP_AARCH64_DCPOP, "DCPOP" },
110         { HWCAP_AARCH64_SHA3, "SHA3" },
111         { HWCAP_AARCH64_SM3, "SM3" },
112         { HWCAP_AARCH64_SM4, "SM4" },
113         { HWCAP_AARCH64_ASIMDDP, "ASMIDDP" },
114         { HWCAP_AARCH64_SHA512, "SHA512" },
115         { HWCAP_AARCH64_SVE, "SVE" },
116         { HWCAP_AARCH64_ASIMDFHM, "ASIMDFHM" },
117         { HWCAP_AARCH64_DIT, "DIT" },
118         { HWCAP_AARCH64_USCAT, "USCAT" },
119         { HWCAP_AARCH64_ILRCPC, "ILRCPC" },
120         { HWCAP_AARCH64_FLAGM, "FLAGM" }
121 )
122     };
123 
find_cpu_name(uint32_t id)124     const char *find_cpu_name(uint32_t id)
125     {
126         ssize_t first = 0, last = (sizeof(cpu_parts) / sizeof(cpu_part_t)) - 1;
127         while (first <= last)
128         {
129             ssize_t mid     = (first + last) >> 1;
130             uint32_t xmid   = cpu_parts[mid].id;
131             if (id < xmid)
132                 last = mid - 1;
133             else if (id > xmid)
134                 first = mid + 1;
135             else
136                 return cpu_parts[mid].name;
137         }
138         return "Generic ARM processor";
139     }
140 
detect_cpu_features(cpu_features_t * f)141     void detect_cpu_features(cpu_features_t *f)  // must be at least 13 bytes
142     {
143         f->implementer      = 0;
144         f->architecture     = 0;
145         IF_ARCH_ARM6(f->architecture = 6);
146         IF_ARCH_ARM7(f->architecture = 7);
147         IF_ARCH_ARM8(f->architecture = 8);
148         f->variant          = 0;
149         f->part             = 0;
150         f->revision         = 0;
151         f->hwcap            = getauxval(AT_HWCAP);
152 
153 //        processor       : 0
154 //        BogoMIPS        : 38.40
155 //        Features        : fp asimd evtstrm crc32 cpuid
156 //        CPU implementer : 0x41
157 //        CPU architecture: 8
158 //        CPU variant     : 0x0
159 //        CPU part        : 0xd03
160 //        CPU revision    : 4
161 
162         // Read /proc/cpuinfo
163         FILE *cpuinfo = fopen("/proc/cpuinfo", "r");
164         if (cpuinfo == NULL)
165             return;  // handle error
166 
167         size_t size = 0;
168         char *line  = NULL;
169 
170         while (getline(&line, &size, cpuinfo) >= 0)
171         {
172             size_t *field = NULL;
173 
174             // Find field
175             if (strncasecmp(line, "CPU implementer", 15) == 0)
176                 field = &f->implementer;
177             else if (strncasecmp(line, "CPU architecture", 16) == 0)
178                 field = &f->architecture;
179             else if (strncasecmp(line, "CPU variant", 11) == 0)
180                 field = &f->variant;
181             else if (strncasecmp(line, "CPU part", 8) == 0)
182                 field = &f->part;
183             else if (strncasecmp(line, "CPU revision", 12) == 0)
184                 field = &f->revision;
185 
186             if (field == NULL) // Field not found ?
187                 continue;
188             char *colon = strchr(line, ':'); // Colon not found ?
189             if (colon++ == NULL)
190                 continue;
191 
192             while ((*colon) == ' ')
193                 colon++;
194             if ((*colon) == '\0') // No data ?
195                 continue;
196 
197             // Detect number base
198             size_t base = 10;
199             if (strncasecmp(colon, "0x", 2) == 0)
200             {
201                 colon  += 2;
202                 base    = 16;
203             }
204 
205             // Parse value
206             errno       = 0;
207             long value  = strtol(colon, &colon, base);
208             if (errno != 0) // Failed parse ?
209                 continue;
210             if (((*colon) != '\0') && (*colon) != '\n') // Additional data?
211                 continue;
212 
213             // Store parsed value
214             *field      = value;
215         }
216 
217         // if we got here, handle error
218         if (line != NULL)
219             free(line);
220         fclose(cpuinfo);
221     }
222 
estimate_features_size(const cpu_features_t * f)223     static size_t estimate_features_size(const cpu_features_t *f)
224     {
225         // Estimate the string length
226         size_t estimate = 1; // End of string character
227         for (size_t i = 0, n=sizeof(cpu_features)/sizeof(feature_t); i < n; i++)
228         {
229             if (!(f->hwcap & cpu_features[i].mask))
230                 continue;
231 
232             if (estimate > 0)
233                 estimate++;
234             estimate += strlen(cpu_features[i].name);
235         }
236         return estimate;
237     }
238 
build_features_list(char * dst,const cpu_features_t * f)239     static char *build_features_list(char *dst, const cpu_features_t *f)
240     {
241         // Build string
242         char *s = dst;
243 
244         for (size_t i = 0, n=sizeof(cpu_features)/sizeof(feature_t); i < n; i++)
245         {
246             if (!(f->hwcap & cpu_features[i].mask))
247                 continue;
248             if (s != dst)
249                 s = stpcpy(s, " ");
250             s = stpcpy(s, cpu_features[i].name);
251         }
252         *s = '\0';
253 
254         return s;
255     }
256 
info()257     dsp::info_t *info()
258     {
259         cpu_features_t f;
260         detect_cpu_features(&f);
261 
262         const char *cpu = find_cpu_name(f.part);
263         char *model     = NULL;
264         int n = asprintf(&model, "vendor=0x%x, architecture=%d, variant=%d, part=0x%x, revision=%d",
265                 int(f.implementer), int(f.architecture), int(f.variant), int(f.part), int(f.revision));
266         if ((n < 0) || (model == NULL))
267             return NULL;
268 
269         size_t size     = sizeof(dsp::info_t);
270         size           += strlen(ARCH_STRING) + 1;
271         size           += strlen(cpu) + 1;
272         size           += strlen(model) + 1;
273         size           += estimate_features_size(&f);
274 
275         dsp::info_t *res = reinterpret_cast<dsp::info_t *>(malloc(size));
276         if (res == NULL)
277         {
278             free(model);
279             return res;
280         }
281 
282         char *text      = reinterpret_cast<char *>(&res[1]);
283         res->arch       = text;
284         text            = stpcpy(text, ARCH_STRING) + 1;
285         res->cpu        = text;
286         text            = stpcpy(text, cpu) + 1;
287         res->model      = text;
288         text            = stpcpy(text, model) + 1;
289         res->features   = text;
290         build_features_list(text, &f);
291 
292         free(model);
293         return res;
294     }
295 
296     static dsp::start_t     dsp_start       = NULL;
297     static dsp::finish_t    dsp_finish      = NULL;
298 
start(dsp::context_t * ctx)299     void start(dsp::context_t *ctx)
300     {
301         dsp_start(ctx);
302         uint64_t fpcr           = read_fpcr();
303         ctx->data[ctx->top++]   = uint32_t(fpcr);
304         ctx->data[ctx->top++]   = uint32_t(fpcr >> 32);
305         write_fpcr(fpcr | FPCR_FZ | FPCR_DN | FPCR_FZ16);
306     }
307 
finish(dsp::context_t * ctx)308     void finish(dsp::context_t *ctx)
309     {
310         uint64_t lo, hi;
311         hi = ctx->data[--ctx->top];
312         lo = ctx->data[--ctx->top];
313         write_fpcr(lo | (hi << 32));
314         dsp_finish(ctx);
315     }
316 
317 #define EXPORT2(function, export)           dsp::function = aarch64::export;
318 #define EXPORT1(function)                   EXPORT2(function, function)
319 
dsp_init()320     void dsp_init()
321     {
322         cpu_features_t f;
323         detect_cpu_features(&f);
324 
325         if (f.hwcap & HWCAP_AARCH64_ASIMD)
326         {
327             // Save previous entry points
328             dsp_start                       = dsp::start;
329             dsp_finish                      = dsp::finish;
330 
331             // Export routines
332             EXPORT1(start);
333             EXPORT1(finish);
334         }
335 
336         // Export functions
337         EXPORT1(info);
338     }
339 }
340 
341 #endif /* ARCH_AARCH64 */
342