1 // Copyright 2018, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifndef VIXL_CPU_FEATURES_H 28 #define VIXL_CPU_FEATURES_H 29 30 #include <ostream> 31 32 #include "jit/arm64/vixl/Globals-vixl.h" 33 34 35 namespace vixl { 36 37 38 // clang-format off 39 #define VIXL_CPU_FEATURE_LIST(V) \ 40 /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_* */ \ 41 /* registers, so that the detailed feature registers can be read */ \ 42 /* directly. */ \ 43 V(kIDRegisterEmulation, "ID register emulation", "cpuid") \ 44 \ 45 V(kFP, "FP", "fp") \ 46 V(kNEON, "NEON", "asimd") \ 47 V(kCRC32, "CRC32", "crc32") \ 48 /* Cryptographic support instructions. */ \ 49 V(kAES, "AES", "aes") \ 50 V(kSHA1, "SHA1", "sha1") \ 51 V(kSHA2, "SHA2", "sha2") \ 52 /* A form of PMULL{2} with a 128-bit (1Q) result. */ \ 53 V(kPmull1Q, "Pmull1Q", "pmull") \ 54 /* Atomic operations on memory: CAS, LDADD, STADD, SWP, etc. */ \ 55 V(kAtomics, "Atomics", "atomics") \ 56 /* Limited ordering regions: LDLAR, STLLR and their variants. */ \ 57 V(kLORegions, "LORegions", NULL) \ 58 /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH. */ \ 59 V(kRDM, "RDM", "asimdrdm") \ 60 /* Scalable Vector Extension. */ \ 61 V(kSVE, "SVE", "sve") \ 62 /* SDOT and UDOT support (in NEON). */ \ 63 V(kDotProduct, "DotProduct", "asimddp") \ 64 /* Half-precision (FP16) support for FP and NEON, respectively. */ \ 65 V(kFPHalf, "FPHalf", "fphp") \ 66 V(kNEONHalf, "NEONHalf", "asimdhp") \ 67 /* The RAS extension, including the ESB instruction. */ \ 68 V(kRAS, "RAS", NULL) \ 69 /* Data cache clean to the point of persistence: DC CVAP. */ \ 70 V(kDCPoP, "DCPoP", "dcpop") \ 71 /* Data cache clean to the point of deep persistence: DC CVADP. */ \ 72 V(kDCCVADP, "DCCVADP", NULL) \ 73 /* Cryptographic support instructions. */ \ 74 V(kSHA3, "SHA3", "sha3") \ 75 V(kSHA512, "SHA512", "sha512") \ 76 V(kSM3, "SM3", "sm3") \ 77 V(kSM4, "SM4", "sm4") \ 78 /* Pointer authentication for addresses. */ \ 79 V(kPAuth, "PAuth", NULL) \ 80 /* Pointer authentication for addresses uses QARMA. */ \ 81 V(kPAuthQARMA, "PAuthQARMA", NULL) \ 82 /* Generic authentication (using the PACGA instruction). */ \ 83 V(kPAuthGeneric, "PAuthGeneric", NULL) \ 84 /* Generic authentication uses QARMA. */ \ 85 V(kPAuthGenericQARMA, "PAuthGenericQARMA", NULL) \ 86 /* JavaScript-style FP -> integer conversion instruction: FJCVTZS. */ \ 87 V(kJSCVT, "JSCVT", "jscvt") \ 88 /* Complex number support for NEON: FCMLA and FCADD. */ \ 89 V(kFcma, "Fcma", "fcma") \ 90 /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \ 91 V(kRCpc, "RCpc", "lrcpc") \ 92 V(kRCpcImm, "RCpc (imm)", "ilrcpc") \ 93 /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF. */ \ 94 V(kFlagM, "FlagM", "flagm") \ 95 /* Unaligned single-copy atomicity. */ \ 96 V(kUSCAT, "USCAT", "uscat") \ 97 /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}. */ \ 98 V(kFHM, "FHM", "asimdfhm") \ 99 /* Data-independent timing (for selected instructions). */ \ 100 V(kDIT, "DIT", "dit") \ 101 /* Branch target identification. */ \ 102 V(kBTI, "BTI", NULL) \ 103 /* Flag manipulation instructions: {AX,XA}FLAG */ \ 104 V(kAXFlag, "AXFlag", NULL) \ 105 /* Random number generation extension, */ \ 106 V(kRNG, "RNG", NULL) \ 107 /* Floating-point round to {32,64}-bit integer. */ \ 108 V(kFrintToFixedSizedInt,"Frint (bounded)", NULL) 109 // clang-format on 110 111 112 class CPUFeaturesConstIterator; 113 114 // A representation of the set of features known to be supported by the target 115 // device. Each feature is represented by a simple boolean flag. 116 // 117 // - When the Assembler is asked to assemble an instruction, it asserts (in 118 // debug mode) that the necessary features are available. 119 // 120 // - TODO: The MacroAssembler relies on the Assembler's assertions, but in 121 // some cases it may be useful for macros to generate a fall-back sequence 122 // in case features are not available. 123 // 124 // - The Simulator assumes by default that all features are available, but it 125 // is possible to configure it to fail if the simulated code uses features 126 // that are not enabled. 127 // 128 // The Simulator also offers pseudo-instructions to allow features to be 129 // enabled and disabled dynamically. This is useful when you want to ensure 130 // that some features are constrained to certain areas of code. 131 // 132 // - The base Disassembler knows nothing about CPU features, but the 133 // PrintDisassembler can be configured to annotate its output with warnings 134 // about unavailable features. The Simulator uses this feature when 135 // instruction trace is enabled. 136 // 137 // - The Decoder-based components -- the Simulator and PrintDisassembler -- 138 // rely on a CPUFeaturesAuditor visitor. This visitor keeps a list of 139 // features actually encountered so that a large block of code can be 140 // examined (either directly or through simulation), and the required 141 // features analysed later. 142 // 143 // Expected usage: 144 // 145 // // By default, VIXL uses CPUFeatures::AArch64LegacyBaseline(), for 146 // // compatibility with older version of VIXL. 147 // MacroAssembler masm; 148 // 149 // // Generate code only for the current CPU. 150 // masm.SetCPUFeatures(CPUFeatures::InferFromOS()); 151 // 152 // // Turn off feature checking entirely. 153 // masm.SetCPUFeatures(CPUFeatures::All()); 154 // 155 // Feature set manipulation: 156 // 157 // CPUFeatures f; // The default constructor gives an empty set. 158 // // Individual features can be added (or removed). 159 // f.Combine(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::AES); 160 // f.Remove(CPUFeatures::kNEON); 161 // 162 // // Some helpers exist for extensions that provide several features. 163 // f.Remove(CPUFeatures::All()); 164 // f.Combine(CPUFeatures::AArch64LegacyBaseline()); 165 // 166 // // Chained construction is also possible. 167 // CPUFeatures g = 168 // f.With(CPUFeatures::kPmull1Q).Without(CPUFeatures::kCRC32); 169 // 170 // // Features can be queried. Where multiple features are given, they are 171 // // combined with logical AND. 172 // if (h.Has(CPUFeatures::kNEON)) { ... } 173 // if (h.Has(CPUFeatures::kFP, CPUFeatures::kNEON)) { ... } 174 // if (h.Has(g)) { ... } 175 // // If the empty set is requested, the result is always 'true'. 176 // VIXL_ASSERT(h.Has(CPUFeatures())); 177 // 178 // // For debug and reporting purposes, features can be enumerated (or 179 // // printed directly): 180 // std::cout << CPUFeatures::kNEON; // Prints something like "NEON". 181 // std::cout << f; // Prints something like "FP, NEON, CRC32". 182 class CPUFeatures { 183 public: 184 // clang-format off 185 // Individual features. 186 // These should be treated as opaque tokens. User code should not rely on 187 // specific numeric values or ordering. 188 enum Feature { 189 // Refer to VIXL_CPU_FEATURE_LIST (above) for the list of feature names that 190 // this class supports. 191 192 kNone = -1, 193 #define VIXL_DECLARE_FEATURE(SYMBOL, NAME, CPUINFO) SYMBOL, 194 VIXL_CPU_FEATURE_LIST(VIXL_DECLARE_FEATURE) 195 #undef VIXL_DECLARE_FEATURE 196 kNumberOfFeatures 197 }; 198 // clang-format on 199 200 // By default, construct with no features enabled. CPUFeatures()201 CPUFeatures() : features_(0) {} 202 203 // Construct with some features already enabled. 204 CPUFeatures(Feature feature0, 205 Feature feature1 = kNone, 206 Feature feature2 = kNone, 207 Feature feature3 = kNone); 208 209 // Construct with all features enabled. This can be used to disable feature 210 // checking: `Has(...)` returns true regardless of the argument. 211 static CPUFeatures All(); 212 213 // Construct an empty CPUFeatures. This is equivalent to the default 214 // constructor, but is provided for symmetry and convenience. None()215 static CPUFeatures None() { return CPUFeatures(); } 216 217 // The presence of these features was assumed by version of VIXL before this 218 // API was added, so using this set by default ensures API compatibility. AArch64LegacyBaseline()219 static CPUFeatures AArch64LegacyBaseline() { 220 return CPUFeatures(kFP, kNEON, kCRC32); 221 } 222 223 // Construct a new CPUFeatures object using ID registers. This assumes that 224 // kIDRegisterEmulation is present. 225 static CPUFeatures InferFromIDRegisters(); 226 227 enum QueryIDRegistersOption { 228 kDontQueryIDRegisters, 229 kQueryIDRegistersIfAvailable 230 }; 231 232 // Construct a new CPUFeatures object based on what the OS reports. 233 static CPUFeatures InferFromOS( 234 QueryIDRegistersOption option = kQueryIDRegistersIfAvailable); 235 236 // Combine another CPUFeatures object into this one. Features that already 237 // exist in this set are left unchanged. 238 void Combine(const CPUFeatures& other); 239 240 // Combine specific features into this set. Features that already exist in 241 // this set are left unchanged. 242 void Combine(Feature feature0, 243 Feature feature1 = kNone, 244 Feature feature2 = kNone, 245 Feature feature3 = kNone); 246 247 // Remove features in another CPUFeatures object from this one. 248 void Remove(const CPUFeatures& other); 249 250 // Remove specific features from this set. 251 void Remove(Feature feature0, 252 Feature feature1 = kNone, 253 Feature feature2 = kNone, 254 Feature feature3 = kNone); 255 256 // Chaining helpers for convenient construction. 257 CPUFeatures With(const CPUFeatures& other) const; 258 CPUFeatures With(Feature feature0, 259 Feature feature1 = kNone, 260 Feature feature2 = kNone, 261 Feature feature3 = kNone) const; 262 CPUFeatures Without(const CPUFeatures& other) const; 263 CPUFeatures Without(Feature feature0, 264 Feature feature1 = kNone, 265 Feature feature2 = kNone, 266 Feature feature3 = kNone) const; 267 268 // Query features. 269 // Note that an empty query (like `Has(kNone)`) always returns true. 270 bool Has(const CPUFeatures& other) const; 271 bool Has(Feature feature0, 272 Feature feature1 = kNone, 273 Feature feature2 = kNone, 274 Feature feature3 = kNone) const; 275 276 // Return the number of enabled features. 277 size_t Count() const; HasNoFeatures()278 bool HasNoFeatures() const { return Count() == 0; } 279 280 // Check for equivalence. 281 bool operator==(const CPUFeatures& other) const { 282 return Has(other) && other.Has(*this); 283 } 284 bool operator!=(const CPUFeatures& other) const { return !(*this == other); } 285 286 typedef CPUFeaturesConstIterator const_iterator; 287 288 const_iterator begin() const; 289 const_iterator end() const; 290 291 private: 292 // Each bit represents a feature. This field will be replaced as needed if 293 // features are added. 294 uint64_t features_; 295 296 friend std::ostream& operator<<(std::ostream& os, 297 const vixl::CPUFeatures& features); 298 }; 299 300 std::ostream& operator<<(std::ostream& os, vixl::CPUFeatures::Feature feature); 301 std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features); 302 303 // This is not a proper C++ iterator type, but it simulates enough of 304 // ForwardIterator that simple loops can be written. 305 class CPUFeaturesConstIterator { 306 public: 307 CPUFeaturesConstIterator(const CPUFeatures* cpu_features = NULL, 308 CPUFeatures::Feature start = CPUFeatures::kNone) cpu_features_(cpu_features)309 : cpu_features_(cpu_features), feature_(start) { 310 VIXL_ASSERT(IsValid()); 311 } 312 313 bool operator==(const CPUFeaturesConstIterator& other) const; 314 bool operator!=(const CPUFeaturesConstIterator& other) const { 315 return !(*this == other); 316 } 317 CPUFeatures::Feature operator++(); 318 CPUFeatures::Feature operator++(int); 319 320 CPUFeatures::Feature operator*() const { 321 VIXL_ASSERT(IsValid()); 322 return feature_; 323 } 324 325 // For proper support of C++'s simplest "Iterator" concept, this class would 326 // have to define member types (such as CPUFeaturesIterator::pointer) to make 327 // it appear as if it iterates over Feature objects in memory. That is, we'd 328 // need CPUFeatures::iterator to behave like std::vector<Feature>::iterator. 329 // This is at least partially possible -- the std::vector<bool> specialisation 330 // does something similar -- but it doesn't seem worthwhile for a 331 // special-purpose debug helper, so they are omitted here. 332 private: 333 const CPUFeatures* cpu_features_; 334 CPUFeatures::Feature feature_; 335 IsValid()336 bool IsValid() const { 337 return ((cpu_features_ == NULL) && (feature_ == CPUFeatures::kNone)) || 338 cpu_features_->Has(feature_); 339 } 340 }; 341 342 // A convenience scope for temporarily modifying a CPU features object. This 343 // allows features to be enabled for short sequences. 344 // 345 // Expected usage: 346 // 347 // { 348 // CPUFeaturesScope cpu(&masm, CPUFeatures::kCRC32); 349 // // This scope can now use CRC32, as well as anything else that was enabled 350 // // before the scope. 351 // 352 // ... 353 // 354 // // At the end of the scope, the original CPU features are restored. 355 // } 356 class CPUFeaturesScope { 357 public: 358 // Start a CPUFeaturesScope on any object that implements 359 // `CPUFeatures* GetCPUFeatures()`. 360 template <typename T> 361 explicit CPUFeaturesScope(T* cpu_features_wrapper, 362 CPUFeatures::Feature feature0 = CPUFeatures::kNone, 363 CPUFeatures::Feature feature1 = CPUFeatures::kNone, 364 CPUFeatures::Feature feature2 = CPUFeatures::kNone, 365 CPUFeatures::Feature feature3 = CPUFeatures::kNone) 366 : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), 367 old_features_(*cpu_features_) { 368 cpu_features_->Combine(feature0, feature1, feature2, feature3); 369 } 370 371 template <typename T> CPUFeaturesScope(T * cpu_features_wrapper,const CPUFeatures & other)372 CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other) 373 : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), 374 old_features_(*cpu_features_) { 375 cpu_features_->Combine(other); 376 } 377 ~CPUFeaturesScope()378 ~CPUFeaturesScope() { *cpu_features_ = old_features_; } 379 380 // For advanced usage, the CPUFeatures object can be accessed directly. 381 // The scope will restore the original state when it ends. 382 GetCPUFeatures()383 CPUFeatures* GetCPUFeatures() const { return cpu_features_; } 384 SetCPUFeatures(const CPUFeatures & cpu_features)385 void SetCPUFeatures(const CPUFeatures& cpu_features) { 386 *cpu_features_ = cpu_features; 387 } 388 389 private: 390 CPUFeatures* const cpu_features_; 391 const CPUFeatures old_features_; 392 }; 393 394 395 } // namespace vixl 396 397 #endif // VIXL_CPU_FEATURES_H 398