1# -*- coding: utf-8 -*- 2# Copyright JS Foundation and other contributors, https://js.foundation/ 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are met: 6# 7# * Redistributions of source code must retain the above copyright 8# notice, this list of conditions and the following disclaimer. 9# * Redistributions in binary form must reproduce the above copyright 10# notice, this list of conditions and the following disclaimer in the 11# documentation and/or other materials provided with the distribution. 12# 13# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 14# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY 17# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES 19# LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 22# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 24from __future__ import absolute_import, unicode_literals 25 26from collections import deque 27 28from .objects import Object 29from .error_handler import ErrorHandler 30from .scanner import Scanner, SourceLocation, Position, RegExp 31from .token import Token, TokenName 32 33 34class BufferEntry(Object): 35 def __init__(self, type, value, regex=None, range=None, loc=None): 36 self.type = type 37 self.value = value 38 self.regex = regex 39 self.range = range 40 self.loc = loc 41 42 43class Reader(object): 44 def __init__(self): 45 self.values = [] 46 self.curly = self.paren = -1 47 48 # A function following one of those tokens is an expression. 49 def beforeFunctionExpression(self, t): 50 return t in ( 51 '(', '{', '[', 'in', 'typeof', 'instanceof', 'new', 52 'return', 'case', 'delete', 'throw', 'void', 53 # assignment operators 54 '=', '+=', '-=', '*=', '**=', '/=', '%=', '<<=', '>>=', '>>>=', 55 '&=', '|=', '^=', ',', 56 # binary/unary operators 57 '+', '-', '*', '**', '/', '%', '++', '--', '<<', '>>', '>>>', '&', 58 '|', '^', '!', '~', '&&', '||', '?', ':', '===', '==', '>=', 59 '<=', '<', '>', '!=', '!==' 60 ) 61 62 # Determine if forward slash (/) is an operator or part of a regular expression 63 # https://github.com/mozilla/sweet.js/wiki/design 64 def isRegexStart(self): 65 if not self.values: 66 return True 67 68 previous = self.values[-1] 69 regex = previous is not None 70 71 if previous in ( 72 'this', 73 ']', 74 ): 75 regex = False 76 elif previous == ')': 77 keyword = self.values[self.paren - 1] 78 regex = keyword in ('if', 'while', 'for', 'with') 79 80 elif previous == '}': 81 # Dividing a function by anything makes little sense, 82 # but we have to check for that. 83 regex = True 84 if len(self.values) >= 3 and self.values[self.curly - 3] == 'function': 85 # Anonymous function, e.g. function(){} /42 86 check = self.values[self.curly - 4] 87 regex = not self.beforeFunctionExpression(check) if check else False 88 elif len(self.values) >= 4 and self.values[self.curly - 4] == 'function': 89 # Named function, e.g. function f(){} /42/ 90 check = self.values[self.curly - 5] 91 regex = not self.beforeFunctionExpression(check) if check else True 92 93 return regex 94 95 def append(self, token): 96 if token.type in (Token.Punctuator, Token.Keyword): 97 if token.value == '{': 98 self.curly = len(self.values) 99 elif token.value == '(': 100 self.paren = len(self.values) 101 self.values.append(token.value) 102 else: 103 self.values.append(None) 104 105 106class Config(Object): 107 def __init__(self, tolerant=None, comment=None, range=None, loc=None, **options): 108 self.tolerant = tolerant 109 self.comment = comment 110 self.range = range 111 self.loc = loc 112 for k, v in options.items(): 113 setattr(self, k, v) 114 115 116class Tokenizer(object): 117 def __init__(self, code, options): 118 self.config = Config(**options) 119 120 self.errorHandler = ErrorHandler() 121 self.errorHandler.tolerant = self.config.tolerant 122 self.scanner = Scanner(code, self.errorHandler) 123 self.scanner.trackComment = self.config.comment 124 125 self.trackRange = self.config.range 126 self.trackLoc = self.config.loc 127 self.buffer = deque() 128 self.reader = Reader() 129 130 def errors(self): 131 return self.errorHandler.errors 132 133 def getNextToken(self): 134 if not self.buffer: 135 136 comments = self.scanner.scanComments() 137 if self.scanner.trackComment: 138 for e in comments: 139 value = self.scanner.source[e.slice[0]:e.slice[1]] 140 comment = BufferEntry( 141 type='BlockComment' if e.multiLine else 'LineComment', 142 value=value 143 ) 144 if self.trackRange: 145 comment.range = e.range 146 if self.trackLoc: 147 comment.loc = e.loc 148 self.buffer.append(comment) 149 150 if not self.scanner.eof(): 151 if self.trackLoc: 152 loc = SourceLocation( 153 start=Position( 154 line=self.scanner.lineNumber, 155 column=self.scanner.index - self.scanner.lineStart 156 ), 157 end=Position(), 158 ) 159 160 maybeRegex = self.scanner.source[self.scanner.index] == '/' and self.reader.isRegexStart() 161 if maybeRegex: 162 state = self.scanner.saveState() 163 try: 164 token = self.scanner.scanRegExp() 165 except Exception: 166 self.scanner.restoreState(state) 167 token = self.scanner.lex() 168 else: 169 token = self.scanner.lex() 170 171 self.reader.append(token) 172 173 entry = BufferEntry( 174 type=TokenName[token.type], 175 value=self.scanner.source[token.start:token.end] 176 ) 177 if self.trackRange: 178 entry.range = [token.start, token.end] 179 if self.trackLoc: 180 loc.end = Position( 181 line=self.scanner.lineNumber, 182 column=self.scanner.index - self.scanner.lineStart 183 ) 184 entry.loc = loc 185 if token.type is Token.RegularExpression: 186 entry.regex = RegExp( 187 pattern=token.pattern, 188 flags=token.flags, 189 ) 190 191 self.buffer.append(entry) 192 193 return self.buffer.popleft() if self.buffer else None 194