1 //
2 // Copyright (C) 2002-2005  3Dlabs Inc. Ltd.
3 // Copyright (C) 2013 LunarG, Inc.
4 // Copyright (C) 2017 ARM Limited.
5 // Copyright (C) 2015-2018 Google, Inc.
6 //
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions
11 // are met:
12 //
13 //    Redistributions of source code must retain the above copyright
14 //    notice, this list of conditions and the following disclaimer.
15 //
16 //    Redistributions in binary form must reproduce the above
17 //    copyright notice, this list of conditions and the following
18 //    disclaimer in the documentation and/or other materials provided
19 //    with the distribution.
20 //
21 //    Neither the name of 3Dlabs Inc. Ltd. nor the names of its
22 //    contributors may be used to endorse or promote products derived
23 //    from this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
28 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
29 // COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
35 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 // POSSIBILITY OF SUCH DAMAGE.
37 //
38 /****************************************************************************\
39 Copyright (c) 2002, NVIDIA Corporation.
40 
41 NVIDIA Corporation("NVIDIA") supplies this software to you in
42 consideration of your agreement to the following terms, and your use,
43 installation, modification or redistribution of this NVIDIA software
44 constitutes acceptance of these terms.  If you do not agree with these
45 terms, please do not use, install, modify or redistribute this NVIDIA
46 software.
47 
48 In consideration of your agreement to abide by the following terms, and
49 subject to these terms, NVIDIA grants you a personal, non-exclusive
50 license, under NVIDIA's copyrights in this original NVIDIA software (the
51 "NVIDIA Software"), to use, reproduce, modify and redistribute the
52 NVIDIA Software, with or without modifications, in source and/or binary
53 forms; provided that if you redistribute the NVIDIA Software, you must
54 retain the copyright notice of NVIDIA, this notice and the following
55 text and disclaimers in all such redistributions of the NVIDIA Software.
56 Neither the name, trademarks, service marks nor logos of NVIDIA
57 Corporation may be used to endorse or promote products derived from the
58 NVIDIA Software without specific prior written permission from NVIDIA.
59 Except as expressly stated in this notice, no other rights or licenses
60 express or implied, are granted by NVIDIA herein, including but not
61 limited to any patent rights that may be infringed by your derivative
62 works or by other works in which the NVIDIA Software may be
63 incorporated. No hardware is licensed hereunder.
64 
65 THE NVIDIA SOFTWARE IS BEING PROVIDED ON AN "AS IS" BASIS, WITHOUT
66 WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED,
67 INCLUDING WITHOUT LIMITATION, WARRANTIES OR CONDITIONS OF TITLE,
68 NON-INFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
69 ITS USE AND OPERATION EITHER ALONE OR IN COMBINATION WITH OTHER
70 PRODUCTS.
71 
72 IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT,
73 INCIDENTAL, EXEMPLARY, CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
74 TO, LOST PROFITS; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
75 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) OR ARISING IN ANY WAY
76 OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE
77 NVIDIA SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT,
78 TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF
79 NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
80 \****************************************************************************/
81 
82 #ifndef _CRT_SECURE_NO_WARNINGS
83 #define _CRT_SECURE_NO_WARNINGS
84 #endif
85 
86 #include <cstdlib>
87 #include <cstring>
88 
89 #include "PpContext.h"
90 #include "PpTokens.h"
91 #include "../Scan.h"
92 
93 namespace glslang {
94 
95 ///////////////////////////////////////////////////////////////////////////////////////////////
96 /////////////////////////////////// Floating point constants: /////////////////////////////////
97 ///////////////////////////////////////////////////////////////////////////////////////////////
98 
99 //
100 // Scan a single- or double-precision floating point constant.
101 // Assumes that the scanner has seen at least one digit,
102 // followed by either a decimal '.' or the letter 'e', or a
103 // precision ending (e.g., F or LF).
104 //
105 // This is technically not correct, as the preprocessor should just
106 // accept the numeric literal along with whatever suffix it has, but
107 // currently, it stops on seeing a bad suffix, treating that as the
108 // next token. This effects things like token pasting, where it is
109 // relevant how many tokens something was broken into.
110 //
111 // See peekContinuedPasting().
lFloatConst(int len,int ch,TPpToken * ppToken)112 int TPpContext::lFloatConst(int len, int ch, TPpToken* ppToken)
113 {
114     const auto saveName = [&](int ch) {
115         if (len <= MaxTokenLength)
116             ppToken->name[len++] = static_cast<char>(ch);
117     };
118 
119     // find the range of non-zero digits before the decimal point
120     int startNonZero = 0;
121     while (startNonZero < len && ppToken->name[startNonZero] == '0')
122         ++startNonZero;
123     int endNonZero = len;
124     while (endNonZero > startNonZero && ppToken->name[endNonZero-1] == '0')
125         --endNonZero;
126     int numWholeNumberDigits = endNonZero - startNonZero;
127 
128     // accumulate the range's value
129     bool fastPath = numWholeNumberDigits <= 15;  // when the number gets too complex, set to false
130     unsigned long long wholeNumber = 0;
131     if (fastPath) {
132         for (int i = startNonZero; i < endNonZero; ++i)
133             wholeNumber = wholeNumber * 10 + (ppToken->name[i] - '0');
134     }
135     int decimalShift = len - endNonZero;
136 
137     // Decimal point:
138     bool hasDecimalOrExponent = false;
139     if (ch == '.') {
140         hasDecimalOrExponent = true;
141         saveName(ch);
142         ch = getChar();
143         int firstDecimal = len;
144 
145 #ifdef ENABLE_HLSL
146         // 1.#INF or -1.#INF
147         if (ch == '#' && (ifdepth > 0 || parseContext.intermediate.getSource() == EShSourceHlsl)) {
148             if ((len <  2) ||
149                 (len == 2 && ppToken->name[0] != '1') ||
150                 (len == 3 && ppToken->name[1] != '1' && !(ppToken->name[0] == '-' || ppToken->name[0] == '+')) ||
151                 (len >  3))
152                 parseContext.ppError(ppToken->loc, "unexpected use of", "#", "");
153             else {
154                 // we have 1.# or -1.# or +1.#, check for 'INF'
155                 if ((ch = getChar()) != 'I' ||
156                     (ch = getChar()) != 'N' ||
157                     (ch = getChar()) != 'F')
158                     parseContext.ppError(ppToken->loc, "expected 'INF'", "#", "");
159                 else {
160                     // we have [+-].#INF, and we are targeting IEEE 754, so wrap it up:
161                     saveName('I');
162                     saveName('N');
163                     saveName('F');
164                     ppToken->name[len] = '\0';
165                     if (ppToken->name[0] == '-')
166                         ppToken->i64val = 0xfff0000000000000; // -Infinity
167                     else
168                         ppToken->i64val = 0x7ff0000000000000; // +Infinity
169                     return PpAtomConstFloat;
170                 }
171             }
172         }
173 #endif
174 
175         // Consume leading-zero digits after the decimal point
176         while (ch == '0') {
177             saveName(ch);
178             ch = getChar();
179         }
180         int startNonZeroDecimal = len;
181         int endNonZeroDecimal = len;
182 
183         // Consume remaining digits, up to the exponent
184         while (ch >= '0' && ch <= '9') {
185             saveName(ch);
186             if (ch != '0')
187                 endNonZeroDecimal = len;
188             ch = getChar();
189         }
190 
191         // Compute accumulation up to the last non-zero digit
192         if (endNonZeroDecimal > startNonZeroDecimal) {
193             numWholeNumberDigits += endNonZeroDecimal - endNonZero - 1; // don't include the "."
194             if (numWholeNumberDigits > 15)
195                 fastPath = false;
196             if (fastPath) {
197                 for (int i = endNonZero; i < endNonZeroDecimal; ++i) {
198                     if (ppToken->name[i] != '.')
199                         wholeNumber = wholeNumber * 10 + (ppToken->name[i] - '0');
200                 }
201             }
202             decimalShift = firstDecimal - endNonZeroDecimal;
203         }
204     }
205 
206     // Exponent:
207     bool negativeExponent = false;
208     double exponentValue = 0.0;
209     int exponent = 0;
210     {
211         if (ch == 'e' || ch == 'E') {
212             hasDecimalOrExponent = true;
213             saveName(ch);
214             ch = getChar();
215             if (ch == '+' || ch == '-') {
216                 negativeExponent = ch == '-';
217                 saveName(ch);
218                 ch = getChar();
219             }
220             if (ch >= '0' && ch <= '9') {
221                 while (ch >= '0' && ch <= '9') {
222                     exponent = exponent * 10 + (ch - '0');
223                     saveName(ch);
224                     ch = getChar();
225                 }
226             } else {
227                 parseContext.ppError(ppToken->loc, "bad character in float exponent", "", "");
228             }
229         }
230 
231         // Compensate for location of decimal
232         if (negativeExponent)
233             exponent -= decimalShift;
234         else {
235             exponent += decimalShift;
236             if (exponent < 0) {
237                 negativeExponent = true;
238                 exponent = -exponent;
239             }
240         }
241         if (exponent > 22)
242             fastPath = false;
243 
244         if (fastPath) {
245             // Compute the floating-point value of the exponent
246             exponentValue = 1.0;
247             if (exponent > 0) {
248                 double expFactor = 10;
249                 while (exponent > 0) {
250                     if (exponent & 0x1)
251                         exponentValue *= expFactor;
252                     expFactor *= expFactor;
253                     exponent >>= 1;
254                 }
255             }
256         }
257     }
258 
259     // Suffix:
260     bool isDouble = false;
261     bool isFloat16 = false;
262 #ifndef GLSLANG_WEB
263     if (ch == 'l' || ch == 'L') {
264         if (ifdepth == 0 && parseContext.intermediate.getSource() == EShSourceGlsl)
265             parseContext.doubleCheck(ppToken->loc, "double floating-point suffix");
266         if (ifdepth == 0 && !hasDecimalOrExponent)
267             parseContext.ppError(ppToken->loc, "float literal needs a decimal point or exponent", "", "");
268         if (parseContext.intermediate.getSource() == EShSourceGlsl) {
269             int ch2 = getChar();
270             if (ch2 != 'f' && ch2 != 'F') {
271                 ungetChar();
272                 ungetChar();
273             } else {
274                 saveName(ch);
275                 saveName(ch2);
276                 isDouble = true;
277             }
278         } else if (parseContext.intermediate.getSource() == EShSourceHlsl) {
279             saveName(ch);
280             isDouble = true;
281         }
282     } else if (ch == 'h' || ch == 'H') {
283         if (ifdepth == 0 && parseContext.intermediate.getSource() == EShSourceGlsl)
284             parseContext.float16Check(ppToken->loc, "half floating-point suffix");
285         if (ifdepth == 0 && !hasDecimalOrExponent)
286             parseContext.ppError(ppToken->loc, "float literal needs a decimal point or exponent", "", "");
287         if (parseContext.intermediate.getSource() == EShSourceGlsl) {
288             int ch2 = getChar();
289             if (ch2 != 'f' && ch2 != 'F') {
290                 ungetChar();
291                 ungetChar();
292             } else {
293                 saveName(ch);
294                 saveName(ch2);
295                 isFloat16 = true;
296             }
297         } else if (parseContext.intermediate.getSource() == EShSourceHlsl) {
298             saveName(ch);
299             isFloat16 = true;
300         }
301     } else
302 #endif
303     if (ch == 'f' || ch == 'F') {
304 #ifndef GLSLANG_WEB
305         if (ifdepth == 0)
306             parseContext.profileRequires(ppToken->loc,  EEsProfile, 300, nullptr, "floating-point suffix");
307         if (ifdepth == 0 && !parseContext.relaxedErrors())
308             parseContext.profileRequires(ppToken->loc, ~EEsProfile, 120, nullptr, "floating-point suffix");
309 #endif
310         if (ifdepth == 0 && !hasDecimalOrExponent)
311             parseContext.ppError(ppToken->loc, "float literal needs a decimal point or exponent", "", "");
312         saveName(ch);
313     } else
314         ungetChar();
315 
316     // Patch up the name and length for overflow
317 
318     if (len > MaxTokenLength) {
319         len = MaxTokenLength;
320         parseContext.ppError(ppToken->loc, "float literal too long", "", "");
321     }
322     ppToken->name[len] = '\0';
323 
324     // Compute the numerical value
325     if (fastPath) {
326         // compute the floating-point value of the exponent
327         if (exponentValue == 0.0)
328             ppToken->dval = (double)wholeNumber;
329         else if (negativeExponent)
330             ppToken->dval = (double)wholeNumber / exponentValue;
331         else
332             ppToken->dval = (double)wholeNumber * exponentValue;
333     } else {
334         // slow path
335         ppToken->dval = 0.0;
336 
337         // remove suffix
338         TString numstr(ppToken->name);
339         if (numstr.back() == 'f' || numstr.back() == 'F')
340             numstr.pop_back();
341         if (numstr.back() == 'h' || numstr.back() == 'H')
342             numstr.pop_back();
343         if (numstr.back() == 'l' || numstr.back() == 'L')
344             numstr.pop_back();
345 
346         // use platform library
347         strtodStream.clear();
348         strtodStream.str(numstr.c_str());
349         strtodStream >> ppToken->dval;
350         if (strtodStream.fail()) {
351             // Assume failure combined with a large exponent was overflow, in
352             // an attempt to set INF.
353             if (!negativeExponent && exponent + numWholeNumberDigits > 300)
354                 ppToken->i64val = 0x7ff0000000000000; // +Infinity
355             // Assume failure combined with a small exponent was overflow.
356             if (negativeExponent && exponent + numWholeNumberDigits > 300)
357                 ppToken->dval = 0.0;
358             // Unknown reason for failure. Theory is that either
359             //  - the 0.0 is still there, or
360             //  - something reasonable was written that is better than 0.0
361         }
362     }
363 
364     // Return the right token type
365     if (isDouble)
366         return PpAtomConstDouble;
367     else if (isFloat16)
368         return PpAtomConstFloat16;
369     else
370         return PpAtomConstFloat;
371 }
372 
373 // Recognize a character literal.
374 //
375 // The first ' has already been accepted, read the rest, through the closing '.
376 //
377 // Always returns PpAtomConstInt.
378 //
characterLiteral(TPpToken * ppToken)379 int TPpContext::characterLiteral(TPpToken* ppToken)
380 {
381     ppToken->name[0] = 0;
382     ppToken->ival = 0;
383 
384     if (parseContext.intermediate.getSource() != EShSourceHlsl) {
385         // illegal, except in macro definition, for which case we report the character
386         return '\'';
387     }
388 
389     int ch = getChar();
390     switch (ch) {
391     case '\'':
392         // As empty sequence:  ''
393         parseContext.ppError(ppToken->loc, "unexpected", "\'", "");
394         return PpAtomConstInt;
395     case '\\':
396         // As escape sequence:  '\XXX'
397         switch (ch = getChar()) {
398         case 'a':
399             ppToken->ival = 7;
400             break;
401         case 'b':
402             ppToken->ival = 8;
403             break;
404         case 't':
405             ppToken->ival = 9;
406             break;
407         case 'n':
408             ppToken->ival = 10;
409             break;
410         case 'v':
411             ppToken->ival = 11;
412             break;
413         case 'f':
414             ppToken->ival = 12;
415             break;
416         case 'r':
417             ppToken->ival = 13;
418             break;
419         case 'x':
420         case '0':
421             parseContext.ppError(ppToken->loc, "octal and hex sequences not supported", "\\", "");
422             break;
423         default:
424             // This catches '\'', '\"', '\?', etc.
425             // Also, things like '\C' mean the same thing as 'C'
426             // (after the above cases are filtered out).
427             ppToken->ival = ch;
428             break;
429         }
430         break;
431     default:
432         ppToken->ival = ch;
433         break;
434     }
435     ppToken->name[0] = (char)ppToken->ival;
436     ppToken->name[1] = '\0';
437     ch = getChar();
438     if (ch != '\'') {
439         parseContext.ppError(ppToken->loc, "expected", "\'", "");
440         // Look ahead for a closing '
441         do {
442             ch = getChar();
443         } while (ch != '\'' && ch != EndOfInput && ch != '\n');
444     }
445 
446     return PpAtomConstInt;
447 }
448 
449 //
450 // Scanner used to tokenize source stream.
451 //
452 // N.B. Invalid numeric suffixes are not consumed.//
453 // This is technically not correct, as the preprocessor should just
454 // accept the numeric literal along with whatever suffix it has, but
455 // currently, it stops on seeing a bad suffix, treating that as the
456 // next token. This effects things like token pasting, where it is
457 // relevant how many tokens something was broken into.
458 // See peekContinuedPasting().
459 //
scan(TPpToken * ppToken)460 int TPpContext::tStringInput::scan(TPpToken* ppToken)
461 {
462     int AlreadyComplained = 0;
463     int len = 0;
464     int ch = 0;
465     int ii = 0;
466     unsigned long long ival = 0;
467     const auto floatingPointChar = [&](int ch) { return ch == '.' || ch == 'e' || ch == 'E' ||
468                                                                      ch == 'f' || ch == 'F' ||
469                                                                      ch == 'h' || ch == 'H'; };
470 
471     static const char* const Int64_Extensions[] = {
472         E_GL_ARB_gpu_shader_int64,
473         E_GL_EXT_shader_explicit_arithmetic_types,
474         E_GL_EXT_shader_explicit_arithmetic_types_int64 };
475     static const int Num_Int64_Extensions = sizeof(Int64_Extensions) / sizeof(Int64_Extensions[0]);
476 
477     static const char* const Int16_Extensions[] = {
478         E_GL_AMD_gpu_shader_int16,
479         E_GL_EXT_shader_explicit_arithmetic_types,
480         E_GL_EXT_shader_explicit_arithmetic_types_int16 };
481     static const int Num_Int16_Extensions = sizeof(Int16_Extensions) / sizeof(Int16_Extensions[0]);
482 
483     ppToken->ival = 0;
484     ppToken->i64val = 0;
485     ppToken->space = false;
486     ch = getch();
487     for (;;) {
488         while (ch == ' ' || ch == '\t') {
489             ppToken->space = true;
490             ch = getch();
491         }
492 
493         ppToken->loc = pp->parseContext.getCurrentLoc();
494         len = 0;
495         switch (ch) {
496         default:
497             // Single character token, including EndOfInput, '#' and '\' (escaped newlines are handled at a lower level, so this is just a '\' token)
498             if (ch > PpAtomMaxSingle)
499                 ch = PpAtomBadToken;
500             return ch;
501 
502         case 'A': case 'B': case 'C': case 'D': case 'E':
503         case 'F': case 'G': case 'H': case 'I': case 'J':
504         case 'K': case 'L': case 'M': case 'N': case 'O':
505         case 'P': case 'Q': case 'R': case 'S': case 'T':
506         case 'U': case 'V': case 'W': case 'X': case 'Y':
507         case 'Z': case '_':
508         case 'a': case 'b': case 'c': case 'd': case 'e':
509         case 'f': case 'g': case 'h': case 'i': case 'j':
510         case 'k': case 'l': case 'm': case 'n': case 'o':
511         case 'p': case 'q': case 'r': case 's': case 't':
512         case 'u': case 'v': case 'w': case 'x': case 'y':
513         case 'z':
514             do {
515                 if (len < MaxTokenLength) {
516                     ppToken->name[len++] = (char)ch;
517                     ch = getch();
518                 } else {
519                     if (! AlreadyComplained) {
520                         pp->parseContext.ppError(ppToken->loc, "name too long", "", "");
521                         AlreadyComplained = 1;
522                     }
523                     ch = getch();
524                 }
525             } while ((ch >= 'a' && ch <= 'z') ||
526                      (ch >= 'A' && ch <= 'Z') ||
527                      (ch >= '0' && ch <= '9') ||
528                      ch == '_');
529 
530             // line continuation with no token before or after makes len == 0, and need to start over skipping white space, etc.
531             if (len == 0)
532                 continue;
533 
534             ppToken->name[len] = '\0';
535             ungetch();
536             return PpAtomIdentifier;
537         case '0':
538             ppToken->name[len++] = (char)ch;
539             ch = getch();
540             if (ch == 'x' || ch == 'X') {
541                 // must be hexadecimal
542 
543                 bool isUnsigned = false;
544                 bool isInt64 = false;
545                 bool isInt16 = false;
546                 ppToken->name[len++] = (char)ch;
547                 ch = getch();
548                 if ((ch >= '0' && ch <= '9') ||
549                     (ch >= 'A' && ch <= 'F') ||
550                     (ch >= 'a' && ch <= 'f')) {
551 
552                     ival = 0;
553                     do {
554                         if (len < MaxTokenLength && ival <= 0x0fffffffffffffffull) {
555                             ppToken->name[len++] = (char)ch;
556                             if (ch >= '0' && ch <= '9') {
557                                 ii = ch - '0';
558                             } else if (ch >= 'A' && ch <= 'F') {
559                                 ii = ch - 'A' + 10;
560                             } else if (ch >= 'a' && ch <= 'f') {
561                                 ii = ch - 'a' + 10;
562                             } else
563                                 pp->parseContext.ppError(ppToken->loc, "bad digit in hexadecimal literal", "", "");
564                             ival = (ival << 4) | ii;
565                         } else {
566                             if (! AlreadyComplained) {
567                                 if(len < MaxTokenLength)
568                                     pp->parseContext.ppError(ppToken->loc, "hexadecimal literal too big", "", "");
569                                 else
570                                     pp->parseContext.ppError(ppToken->loc, "hexadecimal literal too long", "", "");
571                                 AlreadyComplained = 1;
572                             }
573                             ival = 0xffffffffffffffffull;
574                         }
575                         ch = getch();
576                     } while ((ch >= '0' && ch <= '9') ||
577                              (ch >= 'A' && ch <= 'F') ||
578                              (ch >= 'a' && ch <= 'f'));
579                 } else {
580                     pp->parseContext.ppError(ppToken->loc, "bad digit in hexadecimal literal", "", "");
581                 }
582                 if (ch == 'u' || ch == 'U') {
583                     if (len < MaxTokenLength)
584                         ppToken->name[len++] = (char)ch;
585                     isUnsigned = true;
586 
587 #ifndef GLSLANG_WEB
588                     int nextCh = getch();
589                     if (nextCh == 'l' || nextCh == 'L') {
590                         if (len < MaxTokenLength)
591                             ppToken->name[len++] = (char)nextCh;
592                         isInt64 = true;
593                     } else
594                         ungetch();
595 
596                     nextCh = getch();
597                     if ((nextCh == 's' || nextCh == 'S') &&
598                             pp->parseContext.intermediate.getSource() == EShSourceGlsl) {
599                         if (len < MaxTokenLength)
600                             ppToken->name[len++] = (char)nextCh;
601                         isInt16 = true;
602                     } else
603                         ungetch();
604                 } else if (ch == 'l' || ch == 'L') {
605                     if (len < MaxTokenLength)
606                         ppToken->name[len++] = (char)ch;
607                     isInt64 = true;
608                 } else if ((ch == 's' || ch == 'S') &&
609                            pp->parseContext.intermediate.getSource() == EShSourceGlsl) {
610                     if (len < MaxTokenLength)
611                         ppToken->name[len++] = (char)ch;
612                     isInt16 = true;
613 #endif
614                 } else
615                     ungetch();
616                 ppToken->name[len] = '\0';
617 
618                 if (isInt64 && pp->parseContext.intermediate.getSource() == EShSourceGlsl) {
619                     if (pp->ifdepth == 0) {
620                         pp->parseContext.requireProfile(ppToken->loc, ~EEsProfile,
621                                                         "64-bit hexadecimal literal");
622                         pp->parseContext.profileRequires(ppToken->loc, ~EEsProfile, 0,
623                             Num_Int64_Extensions, Int64_Extensions, "64-bit hexadecimal literal");
624                     }
625                     ppToken->i64val = ival;
626                     return isUnsigned ? PpAtomConstUint64 : PpAtomConstInt64;
627                 } else if (isInt16) {
628                     if (pp->ifdepth == 0) {
629                         if (pp->parseContext.intermediate.getSource() == EShSourceGlsl) {
630                             pp->parseContext.requireProfile(ppToken->loc, ~EEsProfile,
631                                                              "16-bit hexadecimal literal");
632                             pp->parseContext.profileRequires(ppToken->loc, ~EEsProfile, 0,
633                                 Num_Int16_Extensions, Int16_Extensions, "16-bit hexadecimal literal");
634                         }
635                     }
636                     ppToken->ival = (int)ival;
637                     return isUnsigned ? PpAtomConstUint16 : PpAtomConstInt16;
638                 } else {
639                     if (ival > 0xffffffffu && !AlreadyComplained)
640                         pp->parseContext.ppError(ppToken->loc, "hexadecimal literal too big", "", "");
641                     ppToken->ival = (int)ival;
642                     return isUnsigned ? PpAtomConstUint : PpAtomConstInt;
643                 }
644             } else {
645                 // could be octal integer or floating point, speculative pursue octal until it must be floating point
646 
647                 bool isUnsigned = false;
648                 bool isInt64 = false;
649                 bool isInt16 = false;
650                 bool octalOverflow = false;
651                 bool nonOctal = false;
652                 ival = 0;
653 
654                 // see how much octal-like stuff we can read
655                 while (ch >= '0' && ch <= '7') {
656                     if (len < MaxTokenLength)
657                         ppToken->name[len++] = (char)ch;
658                     else if (! AlreadyComplained) {
659                         pp->parseContext.ppError(ppToken->loc, "numeric literal too long", "", "");
660                         AlreadyComplained = 1;
661                     }
662                     if (ival <= 0x1fffffffffffffffull) {
663                         ii = ch - '0';
664                         ival = (ival << 3) | ii;
665                     } else
666                         octalOverflow = true;
667                     ch = getch();
668                 }
669 
670                 // could be part of a float...
671                 if (ch == '8' || ch == '9') {
672                     nonOctal = true;
673                     do {
674                         if (len < MaxTokenLength)
675                             ppToken->name[len++] = (char)ch;
676                         else if (! AlreadyComplained) {
677                             pp->parseContext.ppError(ppToken->loc, "numeric literal too long", "", "");
678                             AlreadyComplained = 1;
679                         }
680                         ch = getch();
681                     } while (ch >= '0' && ch <= '9');
682                 }
683                 if (floatingPointChar(ch))
684                     return pp->lFloatConst(len, ch, ppToken);
685 
686                 // wasn't a float, so must be octal...
687                 if (nonOctal)
688                     pp->parseContext.ppError(ppToken->loc, "octal literal digit too large", "", "");
689 
690                 if (ch == 'u' || ch == 'U') {
691                     if (len < MaxTokenLength)
692                         ppToken->name[len++] = (char)ch;
693                     isUnsigned = true;
694 
695 #ifndef GLSLANG_WEB
696                     int nextCh = getch();
697                     if (nextCh == 'l' || nextCh == 'L') {
698                         if (len < MaxTokenLength)
699                             ppToken->name[len++] = (char)nextCh;
700                         isInt64 = true;
701                     } else
702                         ungetch();
703 
704                     nextCh = getch();
705                     if ((nextCh == 's' || nextCh == 'S') &&
706                                 pp->parseContext.intermediate.getSource() == EShSourceGlsl) {
707                         if (len < MaxTokenLength)
708                             ppToken->name[len++] = (char)nextCh;
709                         isInt16 = true;
710                     } else
711                         ungetch();
712                 } else if (ch == 'l' || ch == 'L') {
713                     if (len < MaxTokenLength)
714                         ppToken->name[len++] = (char)ch;
715                     isInt64 = true;
716                 } else if ((ch == 's' || ch == 'S') &&
717                                 pp->parseContext.intermediate.getSource() == EShSourceGlsl) {
718                     if (len < MaxTokenLength)
719                         ppToken->name[len++] = (char)ch;
720                     isInt16 = true;
721 #endif
722                 } else
723                     ungetch();
724                 ppToken->name[len] = '\0';
725 
726                 if (!isInt64 && ival > 0xffffffffu)
727                     octalOverflow = true;
728 
729                 if (octalOverflow)
730                     pp->parseContext.ppError(ppToken->loc, "octal literal too big", "", "");
731 
732                 if (isInt64 && pp->parseContext.intermediate.getSource() == EShSourceGlsl) {
733                     if (pp->ifdepth == 0) {
734                         pp->parseContext.requireProfile(ppToken->loc, ~EEsProfile,
735                                                         "64-bit octal literal");
736                         pp->parseContext.profileRequires(ppToken->loc, ~EEsProfile, 0,
737                             Num_Int64_Extensions, Int64_Extensions, "64-bit octal literal");
738                     }
739                     ppToken->i64val = ival;
740                     return isUnsigned ? PpAtomConstUint64 : PpAtomConstInt64;
741                 } else if (isInt16) {
742                     if (pp->ifdepth == 0) {
743                         if (pp->parseContext.intermediate.getSource() == EShSourceGlsl) {
744                             pp->parseContext.requireProfile(ppToken->loc, ~EEsProfile,
745                                                             "16-bit octal literal");
746                             pp->parseContext.profileRequires(ppToken->loc, ~EEsProfile, 0,
747                                 Num_Int16_Extensions, Int16_Extensions, "16-bit octal literal");
748                         }
749                     }
750                     ppToken->ival = (int)ival;
751                     return isUnsigned ? PpAtomConstUint16 : PpAtomConstInt16;
752                 } else {
753                     ppToken->ival = (int)ival;
754                     return isUnsigned ? PpAtomConstUint : PpAtomConstInt;
755                 }
756             }
757             break;
758         case '1': case '2': case '3': case '4':
759         case '5': case '6': case '7': case '8': case '9':
760             // can't be hexadecimal or octal, is either decimal or floating point
761 
762             do {
763                 if (len < MaxTokenLength)
764                     ppToken->name[len++] = (char)ch;
765                 else if (! AlreadyComplained) {
766                     pp->parseContext.ppError(ppToken->loc, "numeric literal too long", "", "");
767                     AlreadyComplained = 1;
768                 }
769                 ch = getch();
770             } while (ch >= '0' && ch <= '9');
771             if (floatingPointChar(ch))
772                 return pp->lFloatConst(len, ch, ppToken);
773             else {
774                 // Finish handling signed and unsigned integers
775                 int numericLen = len;
776                 bool isUnsigned = false;
777                 bool isInt64 = false;
778                 bool isInt16 = false;
779                 if (ch == 'u' || ch == 'U') {
780                     if (len < MaxTokenLength)
781                         ppToken->name[len++] = (char)ch;
782                     isUnsigned = true;
783 
784 #ifndef GLSLANG_WEB
785                     int nextCh = getch();
786                     if (nextCh == 'l' || nextCh == 'L') {
787                         if (len < MaxTokenLength)
788                             ppToken->name[len++] = (char)nextCh;
789                         isInt64 = true;
790                     } else
791                         ungetch();
792 
793                     nextCh = getch();
794                     if ((nextCh == 's' || nextCh == 'S') &&
795                                 pp->parseContext.intermediate.getSource() == EShSourceGlsl) {
796                         if (len < MaxTokenLength)
797                             ppToken->name[len++] = (char)nextCh;
798                         isInt16 = true;
799                     } else
800                         ungetch();
801                 } else if (ch == 'l' || ch == 'L') {
802                     if (len < MaxTokenLength)
803                         ppToken->name[len++] = (char)ch;
804                     isInt64 = true;
805                 } else if ((ch == 's' || ch == 'S') &&
806                                 pp->parseContext.intermediate.getSource() == EShSourceGlsl) {
807                     if (len < MaxTokenLength)
808                         ppToken->name[len++] = (char)ch;
809                     isInt16 = true;
810 #endif
811                 } else
812                     ungetch();
813 
814                 ppToken->name[len] = '\0';
815                 ival = 0;
816                 const unsigned oneTenthMaxInt  = 0xFFFFFFFFu / 10;
817                 const unsigned remainderMaxInt = 0xFFFFFFFFu - 10 * oneTenthMaxInt;
818                 const unsigned long long oneTenthMaxInt64  = 0xFFFFFFFFFFFFFFFFull / 10;
819                 const unsigned long long remainderMaxInt64 = 0xFFFFFFFFFFFFFFFFull - 10 * oneTenthMaxInt64;
820                 const unsigned short oneTenthMaxInt16  = 0xFFFFu / 10;
821                 const unsigned short remainderMaxInt16 = 0xFFFFu - 10 * oneTenthMaxInt16;
822                 for (int i = 0; i < numericLen; i++) {
823                     ch = ppToken->name[i] - '0';
824                     bool overflow = false;
825                     if (isInt64)
826                         overflow = (ival > oneTenthMaxInt64 || (ival == oneTenthMaxInt64 && (unsigned long long)ch > remainderMaxInt64));
827                     else if (isInt16)
828                         overflow = (ival > oneTenthMaxInt16 || (ival == oneTenthMaxInt16 && (unsigned short)ch > remainderMaxInt16));
829                     else
830                         overflow = (ival > oneTenthMaxInt || (ival == oneTenthMaxInt && (unsigned)ch > remainderMaxInt));
831                     if (overflow) {
832                         pp->parseContext.ppError(ppToken->loc, "numeric literal too big", "", "");
833                         ival = 0xFFFFFFFFFFFFFFFFull;
834                         break;
835                     } else
836                         ival = ival * 10 + ch;
837                 }
838 
839                 if (isInt64 && pp->parseContext.intermediate.getSource() == EShSourceGlsl) {
840                     if (pp->ifdepth == 0) {
841                         pp->parseContext.requireProfile(ppToken->loc, ~EEsProfile,
842                                                         "64-bit literal");
843                         pp->parseContext.profileRequires(ppToken->loc, ~EEsProfile, 0,
844                             Num_Int64_Extensions, Int64_Extensions, "64-bit literal");
845                     }
846                     ppToken->i64val = ival;
847                     return isUnsigned ? PpAtomConstUint64 : PpAtomConstInt64;
848                 } else if (isInt16) {
849                     if (pp->ifdepth == 0 && pp->parseContext.intermediate.getSource() == EShSourceGlsl) {
850                         pp->parseContext.requireProfile(ppToken->loc, ~EEsProfile,
851                                                         "16-bit  literal");
852                         pp->parseContext.profileRequires(ppToken->loc, ~EEsProfile, 0,
853                             Num_Int16_Extensions, Int16_Extensions, "16-bit literal");
854                     }
855                     ppToken->ival = (int)ival;
856                     return isUnsigned ? PpAtomConstUint16 : PpAtomConstInt16;
857                 } else {
858                     ppToken->ival = (int)ival;
859                     return isUnsigned ? PpAtomConstUint : PpAtomConstInt;
860                 }
861             }
862             break;
863         case '-':
864             ch = getch();
865             if (ch == '-') {
866                 return PpAtomDecrement;
867             } else if (ch == '=') {
868                 return PPAtomSubAssign;
869             } else {
870                 ungetch();
871                 return '-';
872             }
873         case '+':
874             ch = getch();
875             if (ch == '+') {
876                 return PpAtomIncrement;
877             } else if (ch == '=') {
878                 return PPAtomAddAssign;
879             } else {
880                 ungetch();
881                 return '+';
882             }
883         case '*':
884             ch = getch();
885             if (ch == '=') {
886                 return PPAtomMulAssign;
887             } else {
888                 ungetch();
889                 return '*';
890             }
891         case '%':
892             ch = getch();
893             if (ch == '=') {
894                 return PPAtomModAssign;
895             } else {
896                 ungetch();
897                 return '%';
898             }
899         case '^':
900             ch = getch();
901             if (ch == '^') {
902                 return PpAtomXor;
903             } else {
904                 if (ch == '=')
905                     return PpAtomXorAssign;
906                 else{
907                     ungetch();
908                     return '^';
909                 }
910             }
911 
912         case '=':
913             ch = getch();
914             if (ch == '=') {
915                 return PpAtomEQ;
916             } else {
917                 ungetch();
918                 return '=';
919             }
920         case '!':
921             ch = getch();
922             if (ch == '=') {
923                 return PpAtomNE;
924             } else {
925                 ungetch();
926                 return '!';
927             }
928         case '|':
929             ch = getch();
930             if (ch == '|') {
931                 return PpAtomOr;
932             } else if (ch == '=') {
933                 return PpAtomOrAssign;
934             } else {
935                 ungetch();
936                 return '|';
937             }
938         case '&':
939             ch = getch();
940             if (ch == '&') {
941                 return PpAtomAnd;
942             } else if (ch == '=') {
943                 return PpAtomAndAssign;
944             } else {
945                 ungetch();
946                 return '&';
947             }
948         case '<':
949             ch = getch();
950             if (ch == '<') {
951                 ch = getch();
952                 if (ch == '=')
953                     return PpAtomLeftAssign;
954                 else {
955                     ungetch();
956                     return PpAtomLeft;
957                 }
958             } else if (ch == '=') {
959                 return PpAtomLE;
960             } else {
961                 ungetch();
962                 return '<';
963             }
964         case '>':
965             ch = getch();
966             if (ch == '>') {
967                 ch = getch();
968                 if (ch == '=')
969                     return PpAtomRightAssign;
970                 else {
971                     ungetch();
972                     return PpAtomRight;
973                 }
974             } else if (ch == '=') {
975                 return PpAtomGE;
976             } else {
977                 ungetch();
978                 return '>';
979             }
980         case '.':
981             ch = getch();
982             if (ch >= '0' && ch <= '9') {
983                 ungetch();
984                 return pp->lFloatConst(0, '.', ppToken);
985             } else {
986                 ungetch();
987                 return '.';
988             }
989         case '/':
990             ch = getch();
991             if (ch == '/') {
992                 pp->inComment = true;
993                 do {
994                     ch = getch();
995                 } while (ch != '\n' && ch != EndOfInput);
996                 ppToken->space = true;
997                 pp->inComment = false;
998 
999                 return ch;
1000             } else if (ch == '*') {
1001                 ch = getch();
1002                 do {
1003                     while (ch != '*') {
1004                         if (ch == EndOfInput) {
1005                             pp->parseContext.ppError(ppToken->loc, "End of input in comment", "comment", "");
1006                             return ch;
1007                         }
1008                         ch = getch();
1009                     }
1010                     ch = getch();
1011                     if (ch == EndOfInput) {
1012                         pp->parseContext.ppError(ppToken->loc, "End of input in comment", "comment", "");
1013                         return ch;
1014                     }
1015                 } while (ch != '/');
1016                 ppToken->space = true;
1017                 // loop again to get the next token...
1018                 break;
1019             } else if (ch == '=') {
1020                 return PPAtomDivAssign;
1021             } else {
1022                 ungetch();
1023                 return '/';
1024             }
1025             break;
1026         case '\'':
1027             return pp->characterLiteral(ppToken);
1028         case '"':
1029             // TODO: If this gets enhanced to handle escape sequences, or
1030             // anything that is different than what #include needs, then
1031             // #include needs to use scanHeaderName() for this.
1032             ch = getch();
1033             while (ch != '"' && ch != '\n' && ch != EndOfInput) {
1034                 if (len < MaxTokenLength) {
1035                     ppToken->name[len] = (char)ch;
1036                     len++;
1037                     ch = getch();
1038                 } else
1039                     break;
1040             };
1041             ppToken->name[len] = '\0';
1042             if (ch != '"') {
1043                 ungetch();
1044                 pp->parseContext.ppError(ppToken->loc, "End of line in string", "string", "");
1045             }
1046             return PpAtomConstString;
1047         case ':':
1048             ch = getch();
1049             if (ch == ':')
1050                 return PpAtomColonColon;
1051             ungetch();
1052             return ':';
1053         }
1054 
1055         ch = getch();
1056     }
1057 }
1058 
1059 //
1060 // The main functional entry point into the preprocessor, which will
1061 // scan the source strings to figure out and return the next processing token.
1062 //
1063 // Return the token, or EndOfInput when no more tokens.
1064 //
tokenize(TPpToken & ppToken)1065 int TPpContext::tokenize(TPpToken& ppToken)
1066 {
1067     for(;;) {
1068         int token = scanToken(&ppToken);
1069 
1070         // Handle token-pasting logic
1071         token = tokenPaste(token, ppToken);
1072 
1073         if (token == EndOfInput) {
1074             missingEndifCheck();
1075             return EndOfInput;
1076         }
1077         if (token == '#') {
1078             if (previous_token == '\n') {
1079                 token = readCPPline(&ppToken);
1080                 if (token == EndOfInput) {
1081                     missingEndifCheck();
1082                     return EndOfInput;
1083                 }
1084                 continue;
1085             } else {
1086                 parseContext.ppError(ppToken.loc, "preprocessor directive cannot be preceded by another token", "#", "");
1087                 return EndOfInput;
1088             }
1089         }
1090         previous_token = token;
1091 
1092         if (token == '\n')
1093             continue;
1094 
1095         // expand macros
1096         if (token == PpAtomIdentifier) {
1097             switch (MacroExpand(&ppToken, false, true)) {
1098             case MacroExpandNotStarted:
1099                 break;
1100             case MacroExpandError:
1101                 return EndOfInput;
1102             case MacroExpandStarted:
1103             case MacroExpandUndef:
1104                 continue;
1105             }
1106         }
1107 
1108         switch (token) {
1109         case PpAtomIdentifier:
1110         case PpAtomConstInt:
1111         case PpAtomConstUint:
1112         case PpAtomConstFloat:
1113         case PpAtomConstInt64:
1114         case PpAtomConstUint64:
1115         case PpAtomConstInt16:
1116         case PpAtomConstUint16:
1117         case PpAtomConstDouble:
1118         case PpAtomConstFloat16:
1119             if (ppToken.name[0] == '\0')
1120                 continue;
1121             break;
1122         case PpAtomConstString:
1123             if (ifdepth == 0 && parseContext.intermediate.getSource() != EShSourceHlsl) {
1124                 // HLSL allows string literals.
1125                 parseContext.ppError(ppToken.loc, "string literals not supported", "\"\"", "");
1126                 continue;
1127             }
1128             break;
1129         case '\'':
1130             parseContext.ppError(ppToken.loc, "character literals not supported", "\'", "");
1131             continue;
1132         default:
1133             snprintf(ppToken.name, sizeof(ppToken.name), "%s", atomStrings.getString(token));
1134             break;
1135         }
1136 
1137         return token;
1138     }
1139 }
1140 
1141 //
1142 // Do all token-pasting related combining of two pasted tokens when getting a
1143 // stream of tokens from a replacement list. Degenerates to no processing if a
1144 // replacement list is not the source of the token stream.
1145 //
tokenPaste(int token,TPpToken & ppToken)1146 int TPpContext::tokenPaste(int token, TPpToken& ppToken)
1147 {
1148     // starting with ## is illegal, skip to next token
1149     if (token == PpAtomPaste) {
1150         parseContext.ppError(ppToken.loc, "unexpected location", "##", "");
1151         return scanToken(&ppToken);
1152     }
1153 
1154     int resultToken = token; // "foo" pasted with "35" is an identifier, not a number
1155 
1156     // ## can be chained, process all in the chain at once
1157     while (peekPasting()) {
1158         TPpToken pastedPpToken;
1159 
1160         // next token has to be ##
1161         token = scanToken(&pastedPpToken);
1162         assert(token == PpAtomPaste);
1163 
1164         // This covers end of macro expansion
1165         if (endOfReplacementList()) {
1166             parseContext.ppError(ppToken.loc, "unexpected location; end of replacement list", "##", "");
1167             break;
1168         }
1169 
1170         // Get the token(s) after the ##.
1171         // Because of "space" semantics, and prior tokenization, what
1172         // appeared a single token, e.g. "3A", might have been tokenized
1173         // into two tokens "3" and "A", but the "A" will have 'space' set to
1174         // false.  Accumulate all of these to recreate the original lexical
1175         // appearing token.
1176         do {
1177             token = scanToken(&pastedPpToken);
1178 
1179             // This covers end of argument expansion
1180             if (token == tMarkerInput::marker) {
1181                 parseContext.ppError(ppToken.loc, "unexpected location; end of argument", "##", "");
1182                 return resultToken;
1183             }
1184 
1185             // get the token text
1186             switch (resultToken) {
1187             case PpAtomIdentifier:
1188                 // already have the correct text in token.names
1189                 break;
1190             case '=':
1191             case '!':
1192             case '-':
1193             case '~':
1194             case '+':
1195             case '*':
1196             case '/':
1197             case '%':
1198             case '<':
1199             case '>':
1200             case '|':
1201             case '^':
1202             case '&':
1203             case PpAtomRight:
1204             case PpAtomLeft:
1205             case PpAtomAnd:
1206             case PpAtomOr:
1207             case PpAtomXor:
1208                 snprintf(ppToken.name, sizeof(ppToken.name), "%s", atomStrings.getString(resultToken));
1209                 snprintf(pastedPpToken.name, sizeof(pastedPpToken.name), "%s", atomStrings.getString(token));
1210                 break;
1211             default:
1212                 parseContext.ppError(ppToken.loc, "not supported for these tokens", "##", "");
1213                 return resultToken;
1214             }
1215 
1216             // combine the tokens
1217             if (strlen(ppToken.name) + strlen(pastedPpToken.name) > MaxTokenLength) {
1218                 parseContext.ppError(ppToken.loc, "combined tokens are too long", "##", "");
1219                 return resultToken;
1220             }
1221             snprintf(&ppToken.name[0] + strlen(ppToken.name), sizeof(ppToken.name) - strlen(ppToken.name),
1222                 "%s", pastedPpToken.name);
1223 
1224             // correct the kind of token we are making, if needed (identifiers stay identifiers)
1225             if (resultToken != PpAtomIdentifier) {
1226                 int newToken = atomStrings.getAtom(ppToken.name);
1227                 if (newToken > 0)
1228                     resultToken = newToken;
1229                 else
1230                     parseContext.ppError(ppToken.loc, "combined token is invalid", "##", "");
1231             }
1232         } while (peekContinuedPasting(resultToken));
1233     }
1234 
1235     return resultToken;
1236 }
1237 
1238 // Checks if we've seen balanced #if...#endif
missingEndifCheck()1239 void TPpContext::missingEndifCheck()
1240 {
1241     if (ifdepth > 0)
1242         parseContext.ppError(parseContext.getCurrentLoc(), "missing #endif", "", "");
1243 }
1244 
1245 } // end namespace glslang
1246