1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Copyright (C) 2019 Intel Corporation.
5 ** Contact: https://www.qt.io/licensing/
6 **
7 ** This file is part of the QtCore module of the Qt Toolkit.
8 **
9 ** $QT_BEGIN_LICENSE:LGPL$
10 ** Commercial License Usage
11 ** Licensees holding valid commercial Qt licenses may use this file in
12 ** accordance with the commercial license agreement provided with the
13 ** Software or, alternatively, in accordance with the terms contained in
14 ** a written agreement between you and The Qt Company. For licensing terms
15 ** and conditions see https://www.qt.io/terms-conditions. For further
16 ** information use the contact form at https://www.qt.io/contact-us.
17 **
18 ** GNU Lesser General Public License Usage
19 ** Alternatively, this file may be used under the terms of the GNU Lesser
20 ** General Public License version 3 as published by the Free Software
21 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
22 ** packaging of this file. Please review the following information to
23 ** ensure the GNU Lesser General Public License version 3 requirements
24 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25 **
26 ** GNU General Public License Usage
27 ** Alternatively, this file may be used under the terms of the GNU
28 ** General Public License version 2.0 or (at your option) the GNU General
29 ** Public license version 3 or any later version approved by the KDE Free
30 ** Qt Foundation. The licenses are as published by the Free Software
31 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32 ** included in the packaging of this file. Please review the following
33 ** information to ensure the GNU General Public License requirements will
34 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35 ** https://www.gnu.org/licenses/gpl-3.0.html.
36 **
37 ** $QT_END_LICENSE$
38 **
39 ****************************************************************************/
40 
41 // we need ICC to define the prototype for _rdseed64_step
42 #define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES
43 
44 #include "qsimd_p.h"
45 #include "qalgorithms.h"
46 #include <QByteArray>
47 #include <stdio.h>
48 
49 #ifdef Q_OS_LINUX
50 #  include "../testlib/3rdparty/valgrind_p.h"
51 #endif
52 
53 #if defined(Q_OS_WIN)
54 #  if !defined(Q_CC_GNU)
55 #    include <intrin.h>
56 #  endif
57 #elif defined(Q_OS_LINUX) && (defined(Q_PROCESSOR_ARM) || defined(Q_PROCESSOR_MIPS_32))
58 #include "private/qcore_unix_p.h"
59 
60 // the kernel header definitions for HWCAP_*
61 // (the ones we need/may need anyway)
62 
63 // copied from <asm/hwcap.h> (ARM)
64 #define HWCAP_CRUNCH    1024
65 #define HWCAP_THUMBEE   2048
66 #define HWCAP_NEON      4096
67 #define HWCAP_VFPv3     8192
68 #define HWCAP_VFPv3D16  16384
69 
70 // copied from <asm/hwcap.h> (ARM):
71 #define HWCAP2_CRC32 (1 << 4)
72 
73 // copied from <asm/hwcap.h> (Aarch64)
74 #define HWCAP_CRC32             (1 << 7)
75 
76 // copied from <linux/auxvec.h>
77 #define AT_HWCAP  16    /* arch dependent hints at CPU capabilities */
78 #define AT_HWCAP2 26    /* extension of AT_HWCAP */
79 
80 #elif defined(Q_CC_GHS)
81 #include <INTEGRITY_types.h>
82 #endif
83 
84 QT_BEGIN_NAMESPACE
85 
86 /*
87  * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
88  * we remove the terminating -1 that the script adds.
89  */
90 
91 // begin generated
92 #if defined(Q_PROCESSOR_ARM)
93 /* Data:
94  neon
95  crc32
96  */
97 static const char features_string[] =
98         " neon\0"
99         " crc32\0"
100         "\0";
101 static const int features_indices[] = { 0, 6 };
102 #elif defined(Q_PROCESSOR_MIPS)
103 /* Data:
104  dsp
105  dspr2
106 */
107 static const char features_string[] =
108     " dsp\0"
109     " dspr2\0"
110     "\0";
111 
112 static const int features_indices[] = {
113        0,    5
114 };
115 #elif defined(Q_PROCESSOR_X86)
116 #  include "qsimd_x86.cpp"                  // generated by util/x86simdgen
117 #else
118 static const char features_string[] = "";
119 static const int features_indices[] = { };
120 #endif
121 // end generated
122 
123 #if defined (Q_OS_NACL)
detectProcessorFeatures()124 static inline uint detectProcessorFeatures()
125 {
126     return 0;
127 }
128 #elif defined(Q_PROCESSOR_ARM)
detectProcessorFeatures()129 static inline quint64 detectProcessorFeatures()
130 {
131     quint64 features = 0;
132 
133 #if defined(Q_OS_LINUX)
134 #  if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64)
135     features |= Q_UINT64_C(1) << CpuFeatureNEON; // NEON is always available on ARMv8 64bit.
136 #  endif
137     int auxv = qt_safe_open("/proc/self/auxv", O_RDONLY);
138     if (auxv != -1) {
139         unsigned long vector[64];
140         int nread;
141         while (features == 0) {
142             nread = qt_safe_read(auxv, (char *)vector, sizeof vector);
143             if (nread <= 0) {
144                 // EOF or error
145                 break;
146             }
147 
148             int max = nread / (sizeof vector[0]);
149             for (int i = 0; i < max; i += 2) {
150                 if (vector[i] == AT_HWCAP) {
151 #  if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64)
152                     // For Aarch64:
153                     if (vector[i+1] & HWCAP_CRC32)
154                         features |= Q_UINT64_C(1) << CpuFeatureCRC32;
155 #  endif
156                     // Aarch32, or ARMv7 or before:
157                     if (vector[i+1] & HWCAP_NEON)
158                         features |= Q_UINT64_C(1) << CpuFeatureNEON;
159                 }
160 #  if defined(Q_PROCESSOR_ARM_32)
161                 // For Aarch32:
162                 if (vector[i] == AT_HWCAP2) {
163                     if (vector[i+1] & HWCAP2_CRC32)
164                         features |= Q_UINT64_C(1) << CpuFeatureCRC32;
165                 }
166 #  endif
167             }
168         }
169 
170         qt_safe_close(auxv);
171         return features;
172     }
173     // fall back if /proc/self/auxv wasn't found
174 #endif
175 
176 #if defined(__ARM_NEON__)
177     features |= Q_UINT64_C(1) << CpuFeatureNEON;
178 #endif
179 #if defined(__ARM_FEATURE_CRC32)
180     features |= Q_UINT64_C(1) << CpuFeatureCRC32;
181 #endif
182 
183     return features;
184 }
185 
186 #elif defined(Q_PROCESSOR_X86)
187 
188 #ifdef Q_PROCESSOR_X86_32
189 # define PICreg "%%ebx"
190 #else
191 # define PICreg "%%rbx"
192 #endif
193 
194 static bool checkRdrndWorks() noexcept;
195 
maxBasicCpuidSupported()196 static int maxBasicCpuidSupported()
197 {
198 #if defined(Q_CC_EMSCRIPTEN)
199     return 6; // All features supported by Emscripten
200 #elif defined(Q_CC_GNU)
201     qregisterint tmp1;
202 
203 # if Q_PROCESSOR_X86 < 5
204     // check if the CPUID instruction is supported
205     long cpuid_supported;
206     asm ("pushf\n"
207          "pop %0\n"
208          "mov %0, %1\n"
209          "xor $0x00200000, %0\n"
210          "push %0\n"
211          "popf\n"
212          "pushf\n"
213          "pop %0\n"
214          "xor %1, %0\n" // %eax is now 0 if CPUID is not supported
215          : "=a" (cpuid_supported), "=r" (tmp1)
216          );
217     if (!cpuid_supported)
218         return 0;
219 # endif
220 
221     int result;
222     asm ("xchg " PICreg", %1\n"
223          "cpuid\n"
224          "xchg " PICreg", %1\n"
225         : "=&a" (result), "=&r" (tmp1)
226         : "0" (0)
227         : "ecx", "edx");
228     return result;
229 #elif defined(Q_OS_WIN)
230     // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
231     int info[4];
232     __cpuid(info, 0);
233     return info[0];
234 #elif defined(Q_CC_GHS)
235     unsigned int info[4];
236     __CPUID(0, info);
237     return info[0];
238 #else
239     return 0;
240 #endif
241 }
242 
cpuidFeatures01(uint & ecx,uint & edx)243 static void cpuidFeatures01(uint &ecx, uint &edx)
244 {
245 #if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
246     qregisterint tmp1;
247     asm ("xchg " PICreg", %2\n"
248          "cpuid\n"
249          "xchg " PICreg", %2\n"
250         : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
251         : "a" (1));
252 #elif defined(Q_OS_WIN)
253     int info[4];
254     __cpuid(info, 1);
255     ecx = info[2];
256     edx = info[3];
257 #elif defined(Q_CC_GHS)
258     unsigned int info[4];
259     __CPUID(1, info);
260     ecx = info[2];
261     edx = info[3];
262 #else
263     Q_UNUSED(ecx);
264     Q_UNUSED(edx);
265 #endif
266 }
267 
268 #ifdef Q_OS_WIN
__cpuidex(int info[4],int,__int64)269 inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
270 #endif
271 
cpuidFeatures07_00(uint & ebx,uint & ecx,uint & edx)272 static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
273 {
274 #if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
275     qregisteruint rbx; // in case it's 64-bit
276     qregisteruint rcx = 0;
277     qregisteruint rdx = 0;
278     asm ("xchg " PICreg", %0\n"
279          "cpuid\n"
280          "xchg " PICreg", %0\n"
281         : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx)
282         : "a" (7));
283     ebx = rbx;
284     ecx = rcx;
285     edx = rdx;
286 #elif defined(Q_OS_WIN)
287     int info[4];
288     __cpuidex(info, 7, 0);
289     ebx = info[1];
290     ecx = info[2];
291     edx = info[3];
292 #elif defined(Q_CC_GHS)
293     unsigned int info[4];
294     __CPUIDEX(7, 0, info);
295     ebx = info[1];
296     ecx = info[2];
297     edx = info[3];
298 #else
299     Q_UNUSED(ebx);
300     Q_UNUSED(ecx);
301     Q_UNUSED(edx);
302 #endif
303 }
304 
305 #if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS))
306 // fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int);
_xgetbv(__int64)307 inline quint64 _xgetbv(__int64) { return 0; }
308 #endif
xgetbv(uint in,uint & eax,uint & edx)309 static void xgetbv(uint in, uint &eax, uint &edx)
310 {
311 #if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS)
312     asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction
313         : "=a" (eax), "=d" (edx)
314         : "c" (in));
315 #elif defined(Q_OS_WIN)
316     quint64 result = _xgetbv(in);
317     eax = result;
318     edx = result >> 32;
319 #else
320     Q_UNUSED(in);
321     Q_UNUSED(eax);
322     Q_UNUSED(edx);
323 #endif
324 }
325 
detectProcessorFeatures()326 static quint64 detectProcessorFeatures()
327 {
328     // Flags from the CR0 / XCR0 state register
329     enum XCR0Flags {
330         X87             = 1 << 0,
331         XMM0_15         = 1 << 1,
332         YMM0_15Hi128    = 1 << 2,
333         BNDRegs         = 1 << 3,
334         BNDCSR          = 1 << 4,
335         OpMask          = 1 << 5,
336         ZMM0_15Hi256    = 1 << 6,
337         ZMM16_31        = 1 << 7,
338 
339         SSEState        = XMM0_15,
340         AVXState        = XMM0_15 | YMM0_15Hi128,
341         AVX512State     = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
342     };
343     static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512;
344     static const quint64 AllAVX = CpuFeatureAVX | AllAVX2;
345 
346     quint64 features = 0;
347     int cpuidLevel = maxBasicCpuidSupported();
348 #if Q_PROCESSOR_X86 < 5
349     if (cpuidLevel < 1)
350         return 0;
351 #else
352     Q_ASSERT(cpuidLevel >= 1);
353 #endif
354 
355     uint results[X86CpuidMaxLeaf] = {};
356     cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
357     if (cpuidLevel >= 7)
358         cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
359 
360     // populate our feature list
361     for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
362         uint word = x86_locators[i] / 32;
363         uint bit = 1U << (x86_locators[i] % 32);
364         quint64 feature = Q_UINT64_C(1) << (i + 1);
365         if (results[word] & bit)
366             features |= feature;
367     }
368 
369     // now check the AVX state
370     uint xgetbvA = 0, xgetbvD = 0;
371     if (results[Leaf1ECX] & (1u << 27)) {
372         // XGETBV enabled
373         xgetbv(0, xgetbvA, xgetbvD);
374     }
375 
376     if ((xgetbvA & AVXState) != AVXState) {
377         // support for YMM registers is disabled, disable all AVX
378         features &= ~AllAVX;
379     } else if ((xgetbvA & AVX512State) != AVX512State) {
380         // support for ZMM registers or mask registers is disabled, disable all AVX512
381         features &= ~AllAVX512;
382     }
383 
384     if (features & CpuFeatureRDRND && !checkRdrndWorks())
385         features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED);
386 
387     return features;
388 }
389 
390 #elif defined(Q_PROCESSOR_MIPS_32)
391 
392 #if defined(Q_OS_LINUX)
393 //
394 // Do not use QByteArray: it could use SIMD instructions itself at
395 // some point, thus creating a recursive dependency. Instead, use a
396 // QSimpleBuffer, which has the bare minimum needed to use memory
397 // dynamically and read lines from /proc/cpuinfo of arbitrary sizes.
398 //
399 struct QSimpleBuffer {
400     static const int chunk_size = 256;
401     char *data;
402     unsigned alloc;
403     unsigned size;
404 
QSimpleBufferQSimpleBuffer405     QSimpleBuffer(): data(0), alloc(0), size(0) {}
~QSimpleBufferQSimpleBuffer406     ~QSimpleBuffer() { ::free(data); }
407 
resizeQSimpleBuffer408     void resize(unsigned newsize) {
409         if (newsize > alloc) {
410             unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1);
411             if (newalloc < newsize) newalloc = newsize;
412             if (newalloc != alloc) {
413                 data = static_cast<char*>(::realloc(data, newalloc));
414                 alloc = newalloc;
415             }
416         }
417         size = newsize;
418     }
appendQSimpleBuffer419     void append(const QSimpleBuffer &other, unsigned appendsize) {
420         unsigned oldsize = size;
421         resize(oldsize + appendsize);
422         ::memcpy(data + oldsize, other.data, appendsize);
423     }
popleftQSimpleBuffer424     void popleft(unsigned amount) {
425         if (amount >= size) return resize(0);
426         size -= amount;
427         ::memmove(data, data + amount, size);
428     }
cStringQSimpleBuffer429     char* cString() {
430         if (!alloc) resize(1);
431         return (data[size] = '\0', data);
432     }
433 };
434 
435 //
436 // Uses a scratch "buffer" (which must be used for all reads done in the
437 // same file descriptor) to read chunks of data from a file, to read
438 // one line at a time. Lines include the trailing newline character ('\n').
439 // On EOF, line.size is zero.
440 //
bufReadLine(int fd,QSimpleBuffer & line,QSimpleBuffer & buffer)441 static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer)
442 {
443     for (;;) {
444         char *newline = static_cast<char*>(::memchr(buffer.data, '\n', buffer.size));
445         if (newline) {
446             unsigned piece_size = newline - buffer.data + 1;
447             line.append(buffer, piece_size);
448             buffer.popleft(piece_size);
449             line.resize(line.size - 1);
450             return;
451         }
452         if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) {
453             int oldsize = buffer.size;
454             buffer.resize(buffer.size + QSimpleBuffer::chunk_size);
455             buffer.size = oldsize;
456         }
457         ssize_t read_bytes = ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size);
458         if (read_bytes > 0) buffer.size += read_bytes;
459         else return;
460     }
461 }
462 
463 //
464 // Checks if any line with a given prefix from /proc/cpuinfo contains
465 // a certain string, surrounded by spaces.
466 //
procCpuinfoContains(const char * prefix,const char * string)467 static bool procCpuinfoContains(const char *prefix, const char *string)
468 {
469     int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY);
470     if (cpuinfo_fd == -1)
471         return false;
472 
473     unsigned string_len = ::strlen(string);
474     unsigned prefix_len = ::strlen(prefix);
475     QSimpleBuffer line, buffer;
476     bool present = false;
477     do {
478         line.resize(0);
479         bufReadLine(cpuinfo_fd, line, buffer);
480         char *colon = static_cast<char*>(::memchr(line.data, ':', line.size));
481         if (colon && line.size > prefix_len + string_len) {
482             if (!::strncmp(prefix, line.data, prefix_len)) {
483                 // prefix matches, next character must be ':' or space
484                 if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) {
485                     // Does it contain the string?
486                     char *found = ::strstr(line.cString(), string);
487                     if (found && ::isspace(found[-1]) &&
488                             (::isspace(found[string_len]) || found[string_len] == '\0')) {
489                         present = true;
490                         break;
491                     }
492                 }
493             }
494         }
495     } while (line.size);
496 
497     ::qt_safe_close(cpuinfo_fd);
498     return present;
499 }
500 #endif
501 
detectProcessorFeatures()502 static inline quint64 detectProcessorFeatures()
503 {
504     // NOTE: MIPS 74K cores are the only ones supporting DSPr2.
505     quint64 flags = 0;
506 
507 #if defined __mips_dsp
508     flags |= Q_UINT64_C(1) << CpuFeatureDSP;
509 #  if defined __mips_dsp_rev && __mips_dsp_rev >= 2
510     flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
511 #  elif defined(Q_OS_LINUX)
512     if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
513         flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
514 #  endif
515 #elif defined(Q_OS_LINUX)
516     if (procCpuinfoContains("ASEs implemented", "dsp")) {
517         flags |= Q_UINT64_C(1) << CpuFeatureDSP;
518         if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
519             flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
520     }
521 #endif
522 
523     return flags;
524 }
525 
526 #else
detectProcessorFeatures()527 static inline uint detectProcessorFeatures()
528 {
529     return 0;
530 }
531 #endif
532 
533 static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
534 
535 // record what CPU features were enabled by default in this Qt build
536 static const quint64 minFeature = qCompilerCpuFeatures;
537 
538 #ifdef Q_ATOMIC_INT64_IS_SUPPORTED
539 Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) };
540 #else
541 Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) };
542 #endif
543 
qDetectCpuFeatures()544 quint64 qDetectCpuFeatures()
545 {
546     quint64 f = detectProcessorFeatures();
547     QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
548     if (!disable.isEmpty()) {
549         disable.prepend(' ');
550         for (int i = 0; i < features_count; ++i) {
551             if (disable.contains(features_string + features_indices[i]))
552                 f &= ~(Q_UINT64_C(1) << i);
553         }
554     }
555 
556 #ifdef RUNNING_ON_VALGRIND
557     bool runningOnValgrind = RUNNING_ON_VALGRIND;
558 #else
559     bool runningOnValgrind = false;
560 #endif
561     if (Q_UNLIKELY(!runningOnValgrind && minFeature != 0 && (f & minFeature) != minFeature)) {
562         quint64 missing = minFeature & ~f;
563         fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n   ");
564         for (int i = 0; i < features_count; ++i) {
565             if (missing & (Q_UINT64_C(1) << i))
566                 fprintf(stderr, "%s", features_string + features_indices[i]);
567         }
568         fprintf(stderr, "\n");
569         fflush(stderr);
570         qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing,
571                features_string + features_indices[qCountTrailingZeroBits(missing)]);
572     }
573 
574     qt_cpu_features[0].storeRelaxed(f | quint32(QSimdInitialized));
575 #ifndef Q_ATOMIC_INT64_IS_SUPPORTED
576     qt_cpu_features[1].storeRelaxed(f >> 32);
577 #endif
578     return f;
579 }
580 
qDumpCPUFeatures()581 void qDumpCPUFeatures()
582 {
583     quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized);
584     printf("Processor features: ");
585     for (int i = 0; i < features_count; ++i) {
586         if (features & (Q_UINT64_C(1) << i))
587             printf("%s%s", features_string + features_indices[i],
588                    minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "");
589     }
590     if ((features = (qCompilerCpuFeatures & ~features))) {
591         printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:");
592         for (int i = 0; i < features_count; ++i) {
593             if (features & (Q_UINT64_C(1) << i))
594                 printf("%s", features_string + features_indices[i]);
595         }
596         printf("\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!");
597     }
598     puts("");
599 }
600 
601 #if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND)
602 
603 #  ifdef Q_PROCESSOR_X86_64
604 #    define _rdrandXX_step _rdrand64_step
605 #    define _rdseedXX_step _rdseed64_step
606 #  else
607 #    define _rdrandXX_step _rdrand32_step
608 #    define _rdseedXX_step _rdseed32_step
609 #  endif
610 
611 // The parameter to _rdrand64_step & _rdseed64_step is unsigned long long for
612 // Clang and GCC but unsigned __int64 for MSVC and ICC, which is unsigned long
613 // long on Windows, but unsigned long on Linux.
614 namespace {
615 template <typename F> struct ExtractParameter;
616 template <typename T> struct ExtractParameter<int (T *)> { using Type = T; };
617 using randuint = ExtractParameter<decltype(_rdrandXX_step)>::Type;
618 }
619 
620 #  if QT_COMPILER_SUPPORTS_HERE(RDSEED)
QT_FUNCTION_TARGET(RDSEED)621 static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept
622 {
623     // Unlike for the RDRAND code below, the Intel whitepaper describing the
624     // use of the RDSEED instruction indicates we should not retry in a loop.
625     // If the independent bit generator used by RDSEED is out of entropy, it
626     // may take time to replenish.
627     // https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide
628     while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) {
629         if (_rdseedXX_step(reinterpret_cast<randuint *>(ptr)) == 0)
630             goto out;
631         ptr += sizeof(randuint)/sizeof(*ptr);
632     }
633 
634     if (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
635         if (_rdseed32_step(ptr) == 0)
636             goto out;
637         ++ptr;
638     }
639 
640 out:
641     return ptr;
642 }
643 #  else
qt_random_rdseed(unsigned * ptr,unsigned *)644 static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *)
645 {
646     return ptr;
647 }
648 #  endif
649 
QT_FUNCTION_TARGET(RDRND)650 static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept
651 {
652     int retries = 10;
653     while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) {
654         if (_rdrandXX_step(reinterpret_cast<randuint *>(ptr)))
655             ptr += sizeof(randuint)/sizeof(*ptr);
656         else if (--retries == 0)
657             goto out;
658     }
659 
660     while (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
661         bool ok = _rdrand32_step(ptr);
662         if (!ok && --retries)
663             continue;
664         if (ok)
665             ++ptr;
666         break;
667     }
668 
669 out:
670     return ptr;
671 }
672 
QT_FUNCTION_TARGET(RDRND)673 static QT_FUNCTION_TARGET(RDRND) Q_DECL_COLD_FUNCTION bool checkRdrndWorks() noexcept
674 {
675     /*
676      * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a
677      * failing random generation instruction, which always returns
678      * 0xffffffff, even when generation was "successful".
679      *
680      * This code checks if hardware random generator generates four consecutive
681      * equal numbers. If it does, then we probably have a failing one and
682      * should disable it completely.
683      *
684      * https://bugreports.qt.io/browse/QTBUG-69423
685      */
686     constexpr qsizetype TestBufferSize = 4;
687     unsigned testBuffer[TestBufferSize] = {};
688 
689     unsigned *end = qt_random_rdrnd(testBuffer, testBuffer + TestBufferSize);
690     if (end < testBuffer + 3) {
691         // Random generation didn't produce enough data for us to make a
692         // determination whether it's working or not. Assume it isn't, but
693         // don't print a warning.
694         return false;
695     }
696 
697     // Check the results for equality
698     if (testBuffer[0] == testBuffer[1]
699         && testBuffer[0] == testBuffer[2]
700         && (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) {
701         fprintf(stderr, "WARNING: CPU random generator seem to be failing, "
702                         "disabling hardware random number generation\n"
703                         "WARNING: RDRND generated:");
704         for (unsigned *ptr = testBuffer; ptr < end; ++ptr)
705             fprintf(stderr, " 0x%x", *ptr);
706         fprintf(stderr, "\n");
707         return false;
708     }
709 
710     // We're good
711     return true;
712 }
713 
QT_FUNCTION_TARGET(RDRND)714 QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept
715 {
716     unsigned *ptr = reinterpret_cast<unsigned *>(buffer);
717     unsigned *end = ptr + count;
718 
719     if (qCpuHasFeature(RDSEED))
720         ptr = qt_random_rdseed(ptr, end);
721 
722     // fill the buffer with RDRND if RDSEED didn't
723     ptr = qt_random_rdrnd(ptr, end);
724     return ptr - reinterpret_cast<unsigned *>(buffer);
725 }
726 #elif defined(Q_PROCESSOR_X86) && !defined(Q_OS_NACL) && !defined(Q_PROCESSOR_ARM)
checkRdrndWorks()727 static bool checkRdrndWorks() noexcept { return false; }
728 #endif // Q_PROCESSOR_X86 && RDRND
729 
730 QT_END_NAMESPACE
731