1 // Copyright 2018, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_CPU_FEATURES_H
28 #define VIXL_CPU_FEATURES_H
29 
30 #include <ostream>
31 
32 #include "jit/arm64/vixl/Globals-vixl.h"
33 
34 
35 namespace vixl {
36 
37 
38 // clang-format off
39 #define VIXL_CPU_FEATURE_LIST(V)                                               \
40   /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_*  */ \
41   /* registers, so that the detailed feature registers can be read          */ \
42   /* directly.                                                              */ \
43   V(kIDRegisterEmulation, "ID register emulation",  "cpuid")                   \
44                                                                                \
45   V(kFP,                  "FP",                     "fp")                      \
46   V(kNEON,                "NEON",                   "asimd")                   \
47   V(kCRC32,               "CRC32",                  "crc32")                   \
48   /* Cryptographic support instructions.                                    */ \
49   V(kAES,                 "AES",                    "aes")                     \
50   V(kSHA1,                "SHA1",                   "sha1")                    \
51   V(kSHA2,                "SHA2",                   "sha2")                    \
52   /* A form of PMULL{2} with a 128-bit (1Q) result.                         */ \
53   V(kPmull1Q,             "Pmull1Q",                "pmull")                   \
54   /* Atomic operations on memory: CAS, LDADD, STADD, SWP, etc.              */ \
55   V(kAtomics,             "Atomics",                "atomics")                 \
56   /* Limited ordering regions: LDLAR, STLLR and their variants.             */ \
57   V(kLORegions,           "LORegions",              NULL)                      \
58   /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH.        */ \
59   V(kRDM,                 "RDM",                    "asimdrdm")                \
60   /* Scalable Vector Extension.                                             */ \
61   V(kSVE,                 "SVE",                    "sve")                     \
62   /* SDOT and UDOT support (in NEON).                                       */ \
63   V(kDotProduct,          "DotProduct",             "asimddp")                 \
64   /* Half-precision (FP16) support for FP and NEON, respectively.           */ \
65   V(kFPHalf,              "FPHalf",                 "fphp")                    \
66   V(kNEONHalf,            "NEONHalf",               "asimdhp")                 \
67   /* The RAS extension, including the ESB instruction.                      */ \
68   V(kRAS,                 "RAS",                    NULL)                      \
69   /* Data cache clean to the point of persistence: DC CVAP.                 */ \
70   V(kDCPoP,               "DCPoP",                  "dcpop")                   \
71   /* Data cache clean to the point of deep persistence: DC CVADP.           */ \
72   V(kDCCVADP,             "DCCVADP",                NULL)                      \
73   /* Cryptographic support instructions.                                    */ \
74   V(kSHA3,                "SHA3",                   "sha3")                    \
75   V(kSHA512,              "SHA512",                 "sha512")                  \
76   V(kSM3,                 "SM3",                    "sm3")                     \
77   V(kSM4,                 "SM4",                    "sm4")                     \
78   /* Pointer authentication for addresses.                                  */ \
79   V(kPAuth,               "PAuth",                  NULL)                      \
80   /* Pointer authentication for addresses uses QARMA.                       */ \
81   V(kPAuthQARMA,          "PAuthQARMA",             NULL)                      \
82   /* Generic authentication (using the PACGA instruction).                  */ \
83   V(kPAuthGeneric,        "PAuthGeneric",           NULL)                      \
84   /* Generic authentication uses QARMA.                                     */ \
85   V(kPAuthGenericQARMA,   "PAuthGenericQARMA",      NULL)                      \
86   /* JavaScript-style FP -> integer conversion instruction: FJCVTZS.        */ \
87   V(kJSCVT,               "JSCVT",                  "jscvt")                   \
88   /* Complex number support for NEON: FCMLA and FCADD.                      */ \
89   V(kFcma,                "Fcma",                   "fcma")                    \
90   /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \
91   V(kRCpc,                "RCpc",                   "lrcpc")                   \
92   V(kRCpcImm,             "RCpc (imm)",             "ilrcpc")                  \
93   /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF.               */ \
94   V(kFlagM,               "FlagM",                  "flagm")                   \
95   /* Unaligned single-copy atomicity.                                       */ \
96   V(kUSCAT,               "USCAT",                  "uscat")                   \
97   /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}.         */ \
98   V(kFHM,                 "FHM",                    "asimdfhm")                \
99   /* Data-independent timing (for selected instructions).                   */ \
100   V(kDIT,                 "DIT",                    "dit")                     \
101   /* Branch target identification.                                          */ \
102   V(kBTI,                 "BTI",                    NULL)                      \
103   /* Flag manipulation instructions: {AX,XA}FLAG                            */ \
104   V(kAXFlag,              "AXFlag",                 NULL)                      \
105   /* Random number generation extension,                                    */ \
106   V(kRNG,                 "RNG",                    NULL)                      \
107   /* Floating-point round to {32,64}-bit integer.                           */ \
108   V(kFrintToFixedSizedInt,"Frint (bounded)",        NULL)
109 // clang-format on
110 
111 
112 class CPUFeaturesConstIterator;
113 
114 // A representation of the set of features known to be supported by the target
115 // device. Each feature is represented by a simple boolean flag.
116 //
117 //   - When the Assembler is asked to assemble an instruction, it asserts (in
118 //     debug mode) that the necessary features are available.
119 //
120 //   - TODO: The MacroAssembler relies on the Assembler's assertions, but in
121 //     some cases it may be useful for macros to generate a fall-back sequence
122 //     in case features are not available.
123 //
124 //   - The Simulator assumes by default that all features are available, but it
125 //     is possible to configure it to fail if the simulated code uses features
126 //     that are not enabled.
127 //
128 //     The Simulator also offers pseudo-instructions to allow features to be
129 //     enabled and disabled dynamically. This is useful when you want to ensure
130 //     that some features are constrained to certain areas of code.
131 //
132 //   - The base Disassembler knows nothing about CPU features, but the
133 //     PrintDisassembler can be configured to annotate its output with warnings
134 //     about unavailable features. The Simulator uses this feature when
135 //     instruction trace is enabled.
136 //
137 //   - The Decoder-based components -- the Simulator and PrintDisassembler --
138 //     rely on a CPUFeaturesAuditor visitor. This visitor keeps a list of
139 //     features actually encountered so that a large block of code can be
140 //     examined (either directly or through simulation), and the required
141 //     features analysed later.
142 //
143 // Expected usage:
144 //
145 //     // By default, VIXL uses CPUFeatures::AArch64LegacyBaseline(), for
146 //     // compatibility with older version of VIXL.
147 //     MacroAssembler masm;
148 //
149 //     // Generate code only for the current CPU.
150 //     masm.SetCPUFeatures(CPUFeatures::InferFromOS());
151 //
152 //     // Turn off feature checking entirely.
153 //     masm.SetCPUFeatures(CPUFeatures::All());
154 //
155 // Feature set manipulation:
156 //
157 //     CPUFeatures f;  // The default constructor gives an empty set.
158 //     // Individual features can be added (or removed).
159 //     f.Combine(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::AES);
160 //     f.Remove(CPUFeatures::kNEON);
161 //
162 //     // Some helpers exist for extensions that provide several features.
163 //     f.Remove(CPUFeatures::All());
164 //     f.Combine(CPUFeatures::AArch64LegacyBaseline());
165 //
166 //     // Chained construction is also possible.
167 //     CPUFeatures g =
168 //         f.With(CPUFeatures::kPmull1Q).Without(CPUFeatures::kCRC32);
169 //
170 //     // Features can be queried. Where multiple features are given, they are
171 //     // combined with logical AND.
172 //     if (h.Has(CPUFeatures::kNEON)) { ... }
173 //     if (h.Has(CPUFeatures::kFP, CPUFeatures::kNEON)) { ... }
174 //     if (h.Has(g)) { ... }
175 //     // If the empty set is requested, the result is always 'true'.
176 //     VIXL_ASSERT(h.Has(CPUFeatures()));
177 //
178 //     // For debug and reporting purposes, features can be enumerated (or
179 //     // printed directly):
180 //     std::cout << CPUFeatures::kNEON;  // Prints something like "NEON".
181 //     std::cout << f;  // Prints something like "FP, NEON, CRC32".
182 class CPUFeatures {
183  public:
184   // clang-format off
185   // Individual features.
186   // These should be treated as opaque tokens. User code should not rely on
187   // specific numeric values or ordering.
188   enum Feature {
189     // Refer to VIXL_CPU_FEATURE_LIST (above) for the list of feature names that
190     // this class supports.
191 
192     kNone = -1,
193 #define VIXL_DECLARE_FEATURE(SYMBOL, NAME, CPUINFO) SYMBOL,
194     VIXL_CPU_FEATURE_LIST(VIXL_DECLARE_FEATURE)
195 #undef VIXL_DECLARE_FEATURE
196     kNumberOfFeatures
197   };
198   // clang-format on
199 
200   // By default, construct with no features enabled.
CPUFeatures()201   CPUFeatures() : features_(0) {}
202 
203   // Construct with some features already enabled.
204   CPUFeatures(Feature feature0,
205               Feature feature1 = kNone,
206               Feature feature2 = kNone,
207               Feature feature3 = kNone);
208 
209   // Construct with all features enabled. This can be used to disable feature
210   // checking: `Has(...)` returns true regardless of the argument.
211   static CPUFeatures All();
212 
213   // Construct an empty CPUFeatures. This is equivalent to the default
214   // constructor, but is provided for symmetry and convenience.
None()215   static CPUFeatures None() { return CPUFeatures(); }
216 
217   // The presence of these features was assumed by version of VIXL before this
218   // API was added, so using this set by default ensures API compatibility.
AArch64LegacyBaseline()219   static CPUFeatures AArch64LegacyBaseline() {
220     return CPUFeatures(kFP, kNEON, kCRC32);
221   }
222 
223   // Construct a new CPUFeatures object using ID registers. This assumes that
224   // kIDRegisterEmulation is present.
225   static CPUFeatures InferFromIDRegisters();
226 
227   enum QueryIDRegistersOption {
228     kDontQueryIDRegisters,
229     kQueryIDRegistersIfAvailable
230   };
231 
232   // Construct a new CPUFeatures object based on what the OS reports.
233   static CPUFeatures InferFromOS(
234       QueryIDRegistersOption option = kQueryIDRegistersIfAvailable);
235 
236   // Combine another CPUFeatures object into this one. Features that already
237   // exist in this set are left unchanged.
238   void Combine(const CPUFeatures& other);
239 
240   // Combine specific features into this set. Features that already exist in
241   // this set are left unchanged.
242   void Combine(Feature feature0,
243                Feature feature1 = kNone,
244                Feature feature2 = kNone,
245                Feature feature3 = kNone);
246 
247   // Remove features in another CPUFeatures object from this one.
248   void Remove(const CPUFeatures& other);
249 
250   // Remove specific features from this set.
251   void Remove(Feature feature0,
252               Feature feature1 = kNone,
253               Feature feature2 = kNone,
254               Feature feature3 = kNone);
255 
256   // Chaining helpers for convenient construction.
257   CPUFeatures With(const CPUFeatures& other) const;
258   CPUFeatures With(Feature feature0,
259                    Feature feature1 = kNone,
260                    Feature feature2 = kNone,
261                    Feature feature3 = kNone) const;
262   CPUFeatures Without(const CPUFeatures& other) const;
263   CPUFeatures Without(Feature feature0,
264                       Feature feature1 = kNone,
265                       Feature feature2 = kNone,
266                       Feature feature3 = kNone) const;
267 
268   // Query features.
269   // Note that an empty query (like `Has(kNone)`) always returns true.
270   bool Has(const CPUFeatures& other) const;
271   bool Has(Feature feature0,
272            Feature feature1 = kNone,
273            Feature feature2 = kNone,
274            Feature feature3 = kNone) const;
275 
276   // Return the number of enabled features.
277   size_t Count() const;
HasNoFeatures()278   bool HasNoFeatures() const { return Count() == 0; }
279 
280   // Check for equivalence.
281   bool operator==(const CPUFeatures& other) const {
282     return Has(other) && other.Has(*this);
283   }
284   bool operator!=(const CPUFeatures& other) const { return !(*this == other); }
285 
286   typedef CPUFeaturesConstIterator const_iterator;
287 
288   const_iterator begin() const;
289   const_iterator end() const;
290 
291  private:
292   // Each bit represents a feature. This field will be replaced as needed if
293   // features are added.
294   uint64_t features_;
295 
296   friend std::ostream& operator<<(std::ostream& os,
297                                   const vixl::CPUFeatures& features);
298 };
299 
300 std::ostream& operator<<(std::ostream& os, vixl::CPUFeatures::Feature feature);
301 std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features);
302 
303 // This is not a proper C++ iterator type, but it simulates enough of
304 // ForwardIterator that simple loops can be written.
305 class CPUFeaturesConstIterator {
306  public:
307   CPUFeaturesConstIterator(const CPUFeatures* cpu_features = NULL,
308                            CPUFeatures::Feature start = CPUFeatures::kNone)
cpu_features_(cpu_features)309       : cpu_features_(cpu_features), feature_(start) {
310     VIXL_ASSERT(IsValid());
311   }
312 
313   bool operator==(const CPUFeaturesConstIterator& other) const;
314   bool operator!=(const CPUFeaturesConstIterator& other) const {
315     return !(*this == other);
316   }
317   CPUFeatures::Feature operator++();
318   CPUFeatures::Feature operator++(int);
319 
320   CPUFeatures::Feature operator*() const {
321     VIXL_ASSERT(IsValid());
322     return feature_;
323   }
324 
325   // For proper support of C++'s simplest "Iterator" concept, this class would
326   // have to define member types (such as CPUFeaturesIterator::pointer) to make
327   // it appear as if it iterates over Feature objects in memory. That is, we'd
328   // need CPUFeatures::iterator to behave like std::vector<Feature>::iterator.
329   // This is at least partially possible -- the std::vector<bool> specialisation
330   // does something similar -- but it doesn't seem worthwhile for a
331   // special-purpose debug helper, so they are omitted here.
332  private:
333   const CPUFeatures* cpu_features_;
334   CPUFeatures::Feature feature_;
335 
IsValid()336   bool IsValid() const {
337     return ((cpu_features_ == NULL) && (feature_ == CPUFeatures::kNone)) ||
338            cpu_features_->Has(feature_);
339   }
340 };
341 
342 // A convenience scope for temporarily modifying a CPU features object. This
343 // allows features to be enabled for short sequences.
344 //
345 // Expected usage:
346 //
347 //  {
348 //    CPUFeaturesScope cpu(&masm, CPUFeatures::kCRC32);
349 //    // This scope can now use CRC32, as well as anything else that was enabled
350 //    // before the scope.
351 //
352 //    ...
353 //
354 //    // At the end of the scope, the original CPU features are restored.
355 //  }
356 class CPUFeaturesScope {
357  public:
358   // Start a CPUFeaturesScope on any object that implements
359   // `CPUFeatures* GetCPUFeatures()`.
360   template <typename T>
361   explicit CPUFeaturesScope(T* cpu_features_wrapper,
362                             CPUFeatures::Feature feature0 = CPUFeatures::kNone,
363                             CPUFeatures::Feature feature1 = CPUFeatures::kNone,
364                             CPUFeatures::Feature feature2 = CPUFeatures::kNone,
365                             CPUFeatures::Feature feature3 = CPUFeatures::kNone)
366       : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
367         old_features_(*cpu_features_) {
368     cpu_features_->Combine(feature0, feature1, feature2, feature3);
369   }
370 
371   template <typename T>
CPUFeaturesScope(T * cpu_features_wrapper,const CPUFeatures & other)372   CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other)
373       : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
374         old_features_(*cpu_features_) {
375     cpu_features_->Combine(other);
376   }
377 
~CPUFeaturesScope()378   ~CPUFeaturesScope() { *cpu_features_ = old_features_; }
379 
380   // For advanced usage, the CPUFeatures object can be accessed directly.
381   // The scope will restore the original state when it ends.
382 
GetCPUFeatures()383   CPUFeatures* GetCPUFeatures() const { return cpu_features_; }
384 
SetCPUFeatures(const CPUFeatures & cpu_features)385   void SetCPUFeatures(const CPUFeatures& cpu_features) {
386     *cpu_features_ = cpu_features;
387   }
388 
389  private:
390   CPUFeatures* const cpu_features_;
391   const CPUFeatures old_features_;
392 };
393 
394 
395 }  // namespace vixl
396 
397 #endif  // VIXL_CPU_FEATURES_H
398