1 // This code is derived from code distributed as part of Google Perftools.
2 // The original is available in from google-perftools-2.0 in the file
3 // src/base/atomicops-internals-x86.cc.  This file was retrieved Feb 06, 2012 by
4 // Robert Escriva.
5 
6 /* Copyright (c) 2007, Google Inc.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are
11  * met:
12  *
13  *     * Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  *     * Redistributions in binary form must reproduce the above
16  * copyright notice, this list of conditions and the following disclaimer
17  * in the documentation and/or other materials provided with the
18  * distribution.
19  *     * Neither the name of Google Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived from
21  * this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  * ---
36  * This module gets enough CPU information to optimize the
37  * atomicops module on x86.
38  */
39 
40 // C
41 #include <string.h>
42 
43 // e
44 #include "e/atomic.h"
45 
46 // Inline cpuid instruction.  In PIC compilations, %ebx contains the address
47 // of the global offset table.  To avoid breaking such executables, this code
48 // must preserve that register's value across cpuid instructions.
49 #if defined(__i386__)
50 #define cpuid(a, b, c, d, inp) \
51   asm ("mov %%ebx, %%edi\n"    \
52        "cpuid\n"               \
53        "xchg %%edi, %%ebx\n"   \
54        : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
55 #elif defined (__x86_64__)
56 #define cpuid(a, b, c, d, inp) \
57   asm ("mov %%rbx, %%rdi\n"    \
58        "cpuid\n"               \
59        "xchg %%rdi, %%rbx\n"   \
60        : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
61 #endif
62 
63 #if defined(cpuid)        // initialize the struct only on x86
64 
65 // Set the flags so that code will run correctly and conservatively
66 // until InitGoogle() is called.
67 struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures =
68 {
69     false,          // bug can't exist before process spawns multiple threads
70     false,          // no SSE2
71     false,          // no cmpxchg16b
72 };
73 
74 // Initialize the AtomicOps_Internalx86CPUFeatures struct.
AtomicOps_Internalx86CPUFeaturesInit()75 static void AtomicOps_Internalx86CPUFeaturesInit()
76 {
77     uint32_t eax;
78     uint32_t ebx;
79     uint32_t ecx;
80     uint32_t edx;
81 
82     // Get vendor string (issue CPUID with eax = 0)
83     cpuid(eax, ebx, ecx, edx, 0);
84     char vendor[13];
85     memcpy(vendor, &ebx, 4);
86     memcpy(vendor + 4, &edx, 4);
87     memcpy(vendor + 8, &ecx, 4);
88     vendor[12] = 0;
89 
90     // get feature flags in ecx/edx, and family/model in eax
91     cpuid(eax, ebx, ecx, edx, 1);
92 
93     int family = (eax >> 8) & 0xf;        // family and model fields
94     int model = (eax >> 4) & 0xf;
95 
96     // use extended family and model fields
97     if (family == 0xf)
98     {
99         family += (eax >> 20) & 0xff;
100         model += ((eax >> 16) & 0xf) << 4;
101     }
102 
103     // Opteron Rev E has a bug in which on very rare occasions a locked
104     // instruction doesn't act as a read-acquire barrier if followed by a
105     // non-locked read-modify-write instruction.  Rev F has this bug in
106     // pre-release versions, but not in versions released to customers,
107     // so we test only for Rev E, which is family 15, model 32..63 inclusive.
108     if (strcmp(vendor, "AuthenticAMD") == 0 && // AMD
109         family == 15 &&
110         32 <= model && model <= 63)
111     {
112         AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = true;
113     }
114     else
115     {
116         AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = false;
117     }
118 
119     // edx bit 26 is SSE2 which we use to tell use whether we can use mfence
120     AtomicOps_Internalx86CPUFeatures.has_sse2 = ((edx >> 26) & 1);
121 
122     // ecx bit 13 indicates whether the cmpxchg16b instruction is supported
123     AtomicOps_Internalx86CPUFeatures.has_cmpxchg16b = ((ecx >> 13) & 1);
124 }
125 
126 class __attribute__ ((visibility ("hidden"))) initializer
127 {
128     public:
initializer()129         initializer() { AtomicOps_Internalx86CPUFeaturesInit(); }
130 };
131 
132 static initializer init;
133 
134 #endif
135