1 //===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the NumericLiteralParser, CharLiteralParser, and 10 // StringLiteralParser interfaces. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H 15 #define LLVM_CLANG_LEX_LITERALSUPPORT_H 16 17 #include "clang/Basic/CharInfo.h" 18 #include "clang/Basic/LLVM.h" 19 #include "clang/Basic/TokenKinds.h" 20 #include "llvm/ADT/APFloat.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/DataTypes.h" 25 26 namespace clang { 27 28 class DiagnosticsEngine; 29 class Preprocessor; 30 class Token; 31 class SourceLocation; 32 class TargetInfo; 33 class SourceManager; 34 class LangOptions; 35 36 /// Copy characters from Input to Buf, expanding any UCNs. 37 void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input); 38 39 /// Return true if the token corresponds to a function local predefined macro, 40 /// which expands to a string literal, that can be concatenated with other 41 /// string literals (only in Microsoft mode). 42 bool isFunctionLocalStringLiteralMacro(tok::TokenKind K, const LangOptions &LO); 43 44 /// Return true if the token is a string literal, or a function local 45 /// predefined macro, which expands to a string literal. 46 bool tokenIsLikeStringLiteral(const Token &Tok, const LangOptions &LO); 47 48 /// NumericLiteralParser - This performs strict semantic analysis of the content 49 /// of a ppnumber, classifying it as either integer, floating, or erroneous, 50 /// determines the radix of the value and can convert it to a useful value. 51 class NumericLiteralParser { 52 const SourceManager &SM; 53 const LangOptions &LangOpts; 54 DiagnosticsEngine &Diags; 55 56 const char *const ThisTokBegin; 57 const char *const ThisTokEnd; 58 const char *DigitsBegin, *SuffixBegin; // markers 59 const char *s; // cursor 60 61 unsigned radix; 62 63 bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix; 64 65 SmallString<32> UDSuffixBuf; 66 67 public: 68 NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc, 69 const SourceManager &SM, const LangOptions &LangOpts, 70 const TargetInfo &Target, DiagnosticsEngine &Diags); 71 bool hadError : 1; 72 bool isUnsigned : 1; 73 bool isLong : 1; // This is *not* set for long long. 74 bool isLongLong : 1; 75 bool isSizeT : 1; // 1z, 1uz (C++23) 76 bool isHalf : 1; // 1.0h 77 bool isFloat : 1; // 1.0f 78 bool isImaginary : 1; // 1.0i 79 bool isFloat16 : 1; // 1.0f16 80 bool isFloat128 : 1; // 1.0q 81 bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr 82 bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk 83 bool isBitInt : 1; // 1wb, 1uwb (C23) 84 uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. 85 86 87 bool isFixedPointLiteral() const { 88 return (saw_period || saw_exponent) && saw_fixed_point_suffix; 89 } 90 91 bool isIntegerLiteral() const { 92 return !saw_period && !saw_exponent && !isFixedPointLiteral(); 93 } 94 bool isFloatingLiteral() const { 95 return (saw_period || saw_exponent) && !isFixedPointLiteral(); 96 } 97 98 bool hasUDSuffix() const { 99 return saw_ud_suffix; 100 } 101 StringRef getUDSuffix() const { 102 assert(saw_ud_suffix); 103 return UDSuffixBuf; 104 } 105 unsigned getUDSuffixOffset() const { 106 assert(saw_ud_suffix); 107 return SuffixBegin - ThisTokBegin; 108 } 109 110 static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); 111 112 unsigned getRadix() const { return radix; } 113 114 /// GetIntegerValue - Convert this numeric literal value to an APInt that 115 /// matches Val's input width. If there is an overflow (i.e., if the unsigned 116 /// value read is larger than the APInt's bits will hold), set Val to the low 117 /// bits of the result and return true. Otherwise, return false. 118 bool GetIntegerValue(llvm::APInt &Val); 119 120 /// GetFloatValue - Convert this numeric literal to a floating value, using 121 /// the specified APFloat fltSemantics (specifying float, double, etc). 122 /// The optional bool isExact (passed-by-reference) has its value 123 /// set to true if the returned APFloat can represent the number in the 124 /// literal exactly, and false otherwise. 125 llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); 126 127 /// GetFixedPointValue - Convert this numeric literal value into a 128 /// scaled integer that represents this value. Returns true if an overflow 129 /// occurred when calculating the integral part of the scaled integer or 130 /// calculating the digit sequence of the exponent. 131 bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale); 132 133 /// Get the digits that comprise the literal. This excludes any prefix or 134 /// suffix associated with the literal. 135 StringRef getLiteralDigits() const { 136 assert(!hadError && "cannot reliably get the literal digits with an error"); 137 return StringRef(DigitsBegin, SuffixBegin - DigitsBegin); 138 } 139 140 private: 141 142 void ParseNumberStartingWithZero(SourceLocation TokLoc); 143 void ParseDecimalOrOctalCommon(SourceLocation TokLoc); 144 145 static bool isDigitSeparator(char C) { return C == '\''; } 146 147 /// Determine whether the sequence of characters [Start, End) contains 148 /// any real digits (not digit separators). 149 bool containsDigits(const char *Start, const char *End) { 150 return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0])); 151 } 152 153 enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits }; 154 155 /// Ensure that we don't have a digit separator here. 156 void checkSeparator(SourceLocation TokLoc, const char *Pos, 157 CheckSeparatorKind IsAfterDigits); 158 159 /// SkipHexDigits - Read and skip over any hex digits, up to End. 160 /// Return a pointer to the first non-hex digit or End. 161 const char *SkipHexDigits(const char *ptr) { 162 while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr))) 163 ptr++; 164 return ptr; 165 } 166 167 /// SkipOctalDigits - Read and skip over any octal digits, up to End. 168 /// Return a pointer to the first non-hex digit or End. 169 const char *SkipOctalDigits(const char *ptr) { 170 while (ptr != ThisTokEnd && 171 ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr))) 172 ptr++; 173 return ptr; 174 } 175 176 /// SkipDigits - Read and skip over any digits, up to End. 177 /// Return a pointer to the first non-hex digit or End. 178 const char *SkipDigits(const char *ptr) { 179 while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr))) 180 ptr++; 181 return ptr; 182 } 183 184 /// SkipBinaryDigits - Read and skip over any binary digits, up to End. 185 /// Return a pointer to the first non-binary digit or End. 186 const char *SkipBinaryDigits(const char *ptr) { 187 while (ptr != ThisTokEnd && 188 (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr))) 189 ptr++; 190 return ptr; 191 } 192 193 }; 194 195 /// CharLiteralParser - Perform interpretation and semantic analysis of a 196 /// character literal. 197 class CharLiteralParser { 198 uint64_t Value; 199 tok::TokenKind Kind; 200 bool IsMultiChar; 201 bool HadError; 202 SmallString<32> UDSuffixBuf; 203 unsigned UDSuffixOffset; 204 public: 205 CharLiteralParser(const char *begin, const char *end, 206 SourceLocation Loc, Preprocessor &PP, 207 tok::TokenKind kind); 208 209 bool hadError() const { return HadError; } 210 bool isOrdinary() const { return Kind == tok::char_constant; } 211 bool isWide() const { return Kind == tok::wide_char_constant; } 212 bool isUTF8() const { return Kind == tok::utf8_char_constant; } 213 bool isUTF16() const { return Kind == tok::utf16_char_constant; } 214 bool isUTF32() const { return Kind == tok::utf32_char_constant; } 215 bool isMultiChar() const { return IsMultiChar; } 216 uint64_t getValue() const { return Value; } 217 StringRef getUDSuffix() const { return UDSuffixBuf; } 218 unsigned getUDSuffixOffset() const { 219 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 220 return UDSuffixOffset; 221 } 222 }; 223 224 enum class StringLiteralEvalMethod { 225 Evaluated, 226 Unevaluated, 227 }; 228 229 /// StringLiteralParser - This decodes string escape characters and performs 230 /// wide string analysis and Translation Phase #6 (concatenation of string 231 /// literals) (C99 5.1.1.2p1). 232 class StringLiteralParser { 233 const SourceManager &SM; 234 const LangOptions &Features; 235 const TargetInfo &Target; 236 DiagnosticsEngine *Diags; 237 238 unsigned MaxTokenLength; 239 unsigned SizeBound; 240 unsigned CharByteWidth; 241 tok::TokenKind Kind; 242 SmallString<512> ResultBuf; 243 char *ResultPtr; // cursor 244 SmallString<32> UDSuffixBuf; 245 unsigned UDSuffixToken; 246 unsigned UDSuffixOffset; 247 StringLiteralEvalMethod EvalMethod; 248 249 public: 250 StringLiteralParser(ArrayRef<Token> StringToks, Preprocessor &PP, 251 StringLiteralEvalMethod StringMethod = 252 StringLiteralEvalMethod::Evaluated); 253 StringLiteralParser(ArrayRef<Token> StringToks, const SourceManager &sm, 254 const LangOptions &features, const TargetInfo &target, 255 DiagnosticsEngine *diags = nullptr) 256 : SM(sm), Features(features), Target(target), Diags(diags), 257 MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), 258 ResultPtr(ResultBuf.data()), 259 EvalMethod(StringLiteralEvalMethod::Evaluated), hadError(false), 260 Pascal(false) { 261 init(StringToks); 262 } 263 264 bool hadError; 265 bool Pascal; 266 267 StringRef GetString() const { 268 return StringRef(ResultBuf.data(), GetStringLength()); 269 } 270 unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } 271 272 unsigned GetNumStringChars() const { 273 return GetStringLength() / CharByteWidth; 274 } 275 /// getOffsetOfStringByte - This function returns the offset of the 276 /// specified byte of the string data represented by Token. This handles 277 /// advancing over escape sequences in the string. 278 /// 279 /// If the Diagnostics pointer is non-null, then this will do semantic 280 /// checking of the string literal and emit errors and warnings. 281 unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; 282 283 bool isOrdinary() const { return Kind == tok::string_literal; } 284 bool isWide() const { return Kind == tok::wide_string_literal; } 285 bool isUTF8() const { return Kind == tok::utf8_string_literal; } 286 bool isUTF16() const { return Kind == tok::utf16_string_literal; } 287 bool isUTF32() const { return Kind == tok::utf32_string_literal; } 288 bool isPascal() const { return Pascal; } 289 bool isUnevaluated() const { 290 return EvalMethod == StringLiteralEvalMethod::Unevaluated; 291 } 292 293 StringRef getUDSuffix() const { return UDSuffixBuf; } 294 295 /// Get the index of a token containing a ud-suffix. 296 unsigned getUDSuffixToken() const { 297 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 298 return UDSuffixToken; 299 } 300 /// Get the spelling offset of the first byte of the ud-suffix. 301 unsigned getUDSuffixOffset() const { 302 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 303 return UDSuffixOffset; 304 } 305 306 static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); 307 308 private: 309 void init(ArrayRef<Token> StringToks); 310 bool CopyStringFragment(const Token &Tok, const char *TokBegin, 311 StringRef Fragment); 312 void DiagnoseLexingError(SourceLocation Loc); 313 }; 314 315 } // end namespace clang 316 317 #endif 318