1 // This code is derived from code distributed as part of Google Perftools.
2 // The original is available in from google-perftools-2.0 in the file
3 // src/base/atomicops-internals-x86.cc. This file was retrieved Feb 06, 2012 by
4 // Robert Escriva.
5
6 /* Copyright (c) 2007, Google Inc.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are
11 * met:
12 *
13 * * Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * * Redistributions in binary form must reproduce the above
16 * copyright notice, this list of conditions and the following disclaimer
17 * in the documentation and/or other materials provided with the
18 * distribution.
19 * * Neither the name of Google Inc. nor the names of its
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 * ---
36 * This module gets enough CPU information to optimize the
37 * atomicops module on x86.
38 */
39
40 // C
41 #include <string.h>
42
43 // e
44 #include "e/atomic.h"
45
46 // Inline cpuid instruction. In PIC compilations, %ebx contains the address
47 // of the global offset table. To avoid breaking such executables, this code
48 // must preserve that register's value across cpuid instructions.
49 #if defined(__i386__)
50 #define cpuid(a, b, c, d, inp) \
51 asm ("mov %%ebx, %%edi\n" \
52 "cpuid\n" \
53 "xchg %%edi, %%ebx\n" \
54 : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
55 #elif defined (__x86_64__)
56 #define cpuid(a, b, c, d, inp) \
57 asm ("mov %%rbx, %%rdi\n" \
58 "cpuid\n" \
59 "xchg %%rdi, %%rbx\n" \
60 : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))
61 #endif
62
63 #if defined(cpuid) // initialize the struct only on x86
64
65 // Set the flags so that code will run correctly and conservatively
66 // until InitGoogle() is called.
67 struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures =
68 {
69 false, // bug can't exist before process spawns multiple threads
70 false, // no SSE2
71 false, // no cmpxchg16b
72 };
73
74 // Initialize the AtomicOps_Internalx86CPUFeatures struct.
AtomicOps_Internalx86CPUFeaturesInit()75 static void AtomicOps_Internalx86CPUFeaturesInit()
76 {
77 uint32_t eax;
78 uint32_t ebx;
79 uint32_t ecx;
80 uint32_t edx;
81
82 // Get vendor string (issue CPUID with eax = 0)
83 cpuid(eax, ebx, ecx, edx, 0);
84 char vendor[13];
85 memcpy(vendor, &ebx, 4);
86 memcpy(vendor + 4, &edx, 4);
87 memcpy(vendor + 8, &ecx, 4);
88 vendor[12] = 0;
89
90 // get feature flags in ecx/edx, and family/model in eax
91 cpuid(eax, ebx, ecx, edx, 1);
92
93 int family = (eax >> 8) & 0xf; // family and model fields
94 int model = (eax >> 4) & 0xf;
95
96 // use extended family and model fields
97 if (family == 0xf)
98 {
99 family += (eax >> 20) & 0xff;
100 model += ((eax >> 16) & 0xf) << 4;
101 }
102
103 // Opteron Rev E has a bug in which on very rare occasions a locked
104 // instruction doesn't act as a read-acquire barrier if followed by a
105 // non-locked read-modify-write instruction. Rev F has this bug in
106 // pre-release versions, but not in versions released to customers,
107 // so we test only for Rev E, which is family 15, model 32..63 inclusive.
108 if (strcmp(vendor, "AuthenticAMD") == 0 && // AMD
109 family == 15 &&
110 32 <= model && model <= 63)
111 {
112 AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = true;
113 }
114 else
115 {
116 AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = false;
117 }
118
119 // edx bit 26 is SSE2 which we use to tell use whether we can use mfence
120 AtomicOps_Internalx86CPUFeatures.has_sse2 = ((edx >> 26) & 1);
121
122 // ecx bit 13 indicates whether the cmpxchg16b instruction is supported
123 AtomicOps_Internalx86CPUFeatures.has_cmpxchg16b = ((ecx >> 13) & 1);
124 }
125
126 class __attribute__ ((visibility ("hidden"))) initializer
127 {
128 public:
initializer()129 initializer() { AtomicOps_Internalx86CPUFeaturesInit(); }
130 };
131
132 static initializer init;
133
134 #endif
135