1 //===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the NumericLiteralParser, CharLiteralParser, and
11 // StringLiteralParser interfaces.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H
16 #define LLVM_CLANG_LEX_LITERALSUPPORT_H
17 
18 #include "clang/Basic/CharInfo.h"
19 #include "clang/Basic/LLVM.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/DataTypes.h"
26 
27 namespace clang {
28 
29 class DiagnosticsEngine;
30 class Preprocessor;
31 class Token;
32 class SourceLocation;
33 class TargetInfo;
34 class SourceManager;
35 class LangOptions;
36 
37 /// Copy characters from Input to Buf, expanding any UCNs.
38 void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
39 
40 /// NumericLiteralParser - This performs strict semantic analysis of the content
41 /// of a ppnumber, classifying it as either integer, floating, or erroneous,
42 /// determines the radix of the value and can convert it to a useful value.
43 class NumericLiteralParser {
44   Preprocessor &PP; // needed for diagnostics
45 
46   const char *const ThisTokBegin;
47   const char *const ThisTokEnd;
48   const char *DigitsBegin, *SuffixBegin; // markers
49   const char *s; // cursor
50 
51   unsigned radix;
52 
53   bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix;
54 
55   SmallString<32> UDSuffixBuf;
56 
57 public:
58   NumericLiteralParser(StringRef TokSpelling,
59                        SourceLocation TokLoc,
60                        Preprocessor &PP);
61   bool hadError : 1;
62   bool isUnsigned : 1;
63   bool isLong : 1;          // This is *not* set for long long.
64   bool isLongLong : 1;
65   bool isHalf : 1;          // 1.0h
66   bool isFloat : 1;         // 1.0f
67   bool isImaginary : 1;     // 1.0i
68   bool isFloat16 : 1;       // 1.0f16
69   bool isFloat128 : 1;      // 1.0q
70   uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
71 
72   bool isFract : 1;         // 1.0hr/r/lr/uhr/ur/ulr
73   bool isAccum : 1;         // 1.0hk/k/lk/uhk/uk/ulk
74 
isFixedPointLiteral()75   bool isFixedPointLiteral() const { return saw_fixed_point_suffix; }
76 
isIntegerLiteral()77   bool isIntegerLiteral() const {
78     return !saw_period && !saw_exponent && !isFixedPointLiteral();
79   }
isFloatingLiteral()80   bool isFloatingLiteral() const {
81     return (saw_period || saw_exponent) && !isFixedPointLiteral();
82   }
83 
hasUDSuffix()84   bool hasUDSuffix() const {
85     return saw_ud_suffix;
86   }
getUDSuffix()87   StringRef getUDSuffix() const {
88     assert(saw_ud_suffix);
89     return UDSuffixBuf;
90   }
getUDSuffixOffset()91   unsigned getUDSuffixOffset() const {
92     assert(saw_ud_suffix);
93     return SuffixBegin - ThisTokBegin;
94   }
95 
96   static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
97 
getRadix()98   unsigned getRadix() const { return radix; }
99 
100   /// GetIntegerValue - Convert this numeric literal value to an APInt that
101   /// matches Val's input width.  If there is an overflow (i.e., if the unsigned
102   /// value read is larger than the APInt's bits will hold), set Val to the low
103   /// bits of the result and return true.  Otherwise, return false.
104   bool GetIntegerValue(llvm::APInt &Val);
105 
106   /// GetFloatValue - Convert this numeric literal to a floating value, using
107   /// the specified APFloat fltSemantics (specifying float, double, etc).
108   /// The optional bool isExact (passed-by-reference) has its value
109   /// set to true if the returned APFloat can represent the number in the
110   /// literal exactly, and false otherwise.
111   llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);
112 
113   /// GetFixedPointValue - Convert this numeric literal value into a
114   /// scaled integer that represents this value. Returns true if an overflow
115   /// occurred when calculating the integral part of the scaled integer or
116   /// calculating the digit sequence of the exponent.
117   bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);
118 
119 private:
120 
121   void ParseNumberStartingWithZero(SourceLocation TokLoc);
122   void ParseDecimalOrOctalCommon(SourceLocation TokLoc);
123 
isDigitSeparator(char C)124   static bool isDigitSeparator(char C) { return C == '\''; }
125 
126   /// Determine whether the sequence of characters [Start, End) contains
127   /// any real digits (not digit separators).
containsDigits(const char * Start,const char * End)128   bool containsDigits(const char *Start, const char *End) {
129     return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0]));
130   }
131 
132   enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };
133 
134   /// Ensure that we don't have a digit separator here.
135   void checkSeparator(SourceLocation TokLoc, const char *Pos,
136                       CheckSeparatorKind IsAfterDigits);
137 
138   /// SkipHexDigits - Read and skip over any hex digits, up to End.
139   /// Return a pointer to the first non-hex digit or End.
SkipHexDigits(const char * ptr)140   const char *SkipHexDigits(const char *ptr) {
141     while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr)))
142       ptr++;
143     return ptr;
144   }
145 
146   /// SkipOctalDigits - Read and skip over any octal digits, up to End.
147   /// Return a pointer to the first non-hex digit or End.
SkipOctalDigits(const char * ptr)148   const char *SkipOctalDigits(const char *ptr) {
149     while (ptr != ThisTokEnd &&
150            ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr)))
151       ptr++;
152     return ptr;
153   }
154 
155   /// SkipDigits - Read and skip over any digits, up to End.
156   /// Return a pointer to the first non-hex digit or End.
SkipDigits(const char * ptr)157   const char *SkipDigits(const char *ptr) {
158     while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr)))
159       ptr++;
160     return ptr;
161   }
162 
163   /// SkipBinaryDigits - Read and skip over any binary digits, up to End.
164   /// Return a pointer to the first non-binary digit or End.
SkipBinaryDigits(const char * ptr)165   const char *SkipBinaryDigits(const char *ptr) {
166     while (ptr != ThisTokEnd &&
167            (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr)))
168       ptr++;
169     return ptr;
170   }
171 
172 };
173 
174 /// CharLiteralParser - Perform interpretation and semantic analysis of a
175 /// character literal.
176 class CharLiteralParser {
177   uint64_t Value;
178   tok::TokenKind Kind;
179   bool IsMultiChar;
180   bool HadError;
181   SmallString<32> UDSuffixBuf;
182   unsigned UDSuffixOffset;
183 public:
184   CharLiteralParser(const char *begin, const char *end,
185                     SourceLocation Loc, Preprocessor &PP,
186                     tok::TokenKind kind);
187 
hadError()188   bool hadError() const { return HadError; }
isAscii()189   bool isAscii() const { return Kind == tok::char_constant; }
isWide()190   bool isWide() const { return Kind == tok::wide_char_constant; }
isUTF8()191   bool isUTF8() const { return Kind == tok::utf8_char_constant; }
isUTF16()192   bool isUTF16() const { return Kind == tok::utf16_char_constant; }
isUTF32()193   bool isUTF32() const { return Kind == tok::utf32_char_constant; }
isMultiChar()194   bool isMultiChar() const { return IsMultiChar; }
getValue()195   uint64_t getValue() const { return Value; }
getUDSuffix()196   StringRef getUDSuffix() const { return UDSuffixBuf; }
getUDSuffixOffset()197   unsigned getUDSuffixOffset() const {
198     assert(!UDSuffixBuf.empty() && "no ud-suffix");
199     return UDSuffixOffset;
200   }
201 };
202 
203 /// StringLiteralParser - This decodes string escape characters and performs
204 /// wide string analysis and Translation Phase #6 (concatenation of string
205 /// literals) (C99 5.1.1.2p1).
206 class StringLiteralParser {
207   const SourceManager &SM;
208   const LangOptions &Features;
209   const TargetInfo &Target;
210   DiagnosticsEngine *Diags;
211 
212   unsigned MaxTokenLength;
213   unsigned SizeBound;
214   unsigned CharByteWidth;
215   tok::TokenKind Kind;
216   SmallString<512> ResultBuf;
217   char *ResultPtr; // cursor
218   SmallString<32> UDSuffixBuf;
219   unsigned UDSuffixToken;
220   unsigned UDSuffixOffset;
221 public:
222   StringLiteralParser(ArrayRef<Token> StringToks,
223                       Preprocessor &PP, bool Complain = true);
224   StringLiteralParser(ArrayRef<Token> StringToks,
225                       const SourceManager &sm, const LangOptions &features,
226                       const TargetInfo &target,
227                       DiagnosticsEngine *diags = nullptr)
SM(sm)228     : SM(sm), Features(features), Target(target), Diags(diags),
229       MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
230       ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
231     init(StringToks);
232   }
233 
234 
235   bool hadError;
236   bool Pascal;
237 
GetString()238   StringRef GetString() const {
239     return StringRef(ResultBuf.data(), GetStringLength());
240   }
GetStringLength()241   unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
242 
GetNumStringChars()243   unsigned GetNumStringChars() const {
244     return GetStringLength() / CharByteWidth;
245   }
246   /// getOffsetOfStringByte - This function returns the offset of the
247   /// specified byte of the string data represented by Token.  This handles
248   /// advancing over escape sequences in the string.
249   ///
250   /// If the Diagnostics pointer is non-null, then this will do semantic
251   /// checking of the string literal and emit errors and warnings.
252   unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
253 
isAscii()254   bool isAscii() const { return Kind == tok::string_literal; }
isWide()255   bool isWide() const { return Kind == tok::wide_string_literal; }
isUTF8()256   bool isUTF8() const { return Kind == tok::utf8_string_literal; }
isUTF16()257   bool isUTF16() const { return Kind == tok::utf16_string_literal; }
isUTF32()258   bool isUTF32() const { return Kind == tok::utf32_string_literal; }
isPascal()259   bool isPascal() const { return Pascal; }
260 
getUDSuffix()261   StringRef getUDSuffix() const { return UDSuffixBuf; }
262 
263   /// Get the index of a token containing a ud-suffix.
getUDSuffixToken()264   unsigned getUDSuffixToken() const {
265     assert(!UDSuffixBuf.empty() && "no ud-suffix");
266     return UDSuffixToken;
267   }
268   /// Get the spelling offset of the first byte of the ud-suffix.
getUDSuffixOffset()269   unsigned getUDSuffixOffset() const {
270     assert(!UDSuffixBuf.empty() && "no ud-suffix");
271     return UDSuffixOffset;
272   }
273 
274   static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
275 
276 private:
277   void init(ArrayRef<Token> StringToks);
278   bool CopyStringFragment(const Token &Tok, const char *TokBegin,
279                           StringRef Fragment);
280   void DiagnoseLexingError(SourceLocation Loc);
281 };
282 
283 }  // end namespace clang
284 
285 #endif
286