1# -*- coding: utf-8 -*-
2# Copyright JS Foundation and other contributors, https://js.foundation/
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are met:
6#
7#   * Redistributions of source code must retain the above copyright
8#     notice, this list of conditions and the following disclaimer.
9#   * Redistributions in binary form must reproduce the above copyright
10#     notice, this list of conditions and the following disclaimer in the
11#     documentation and/or other materials provided with the distribution.
12#
13# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
17# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES
19# LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23
24from __future__ import absolute_import, unicode_literals
25
26from collections import deque
27
28from .objects import Object
29from .error_handler import ErrorHandler
30from .scanner import Scanner, SourceLocation, Position, RegExp
31from .token import Token, TokenName
32
33
34class BufferEntry(Object):
35    def __init__(self, type, value, regex=None, range=None, loc=None):
36        self.type = type
37        self.value = value
38        self.regex = regex
39        self.range = range
40        self.loc = loc
41
42
43class Reader(object):
44    def __init__(self):
45        self.values = []
46        self.curly = self.paren = -1
47
48    # A function following one of those tokens is an expression.
49    def beforeFunctionExpression(self, t):
50        return t in (
51            '(', '{', '[', 'in', 'typeof', 'instanceof', 'new',
52            'return', 'case', 'delete', 'throw', 'void',
53            # assignment operators
54            '=', '+=', '-=', '*=', '**=', '/=', '%=', '<<=', '>>=', '>>>=',
55            '&=', '|=', '^=', ',',
56            # binary/unary operators
57            '+', '-', '*', '**', '/', '%', '++', '--', '<<', '>>', '>>>', '&',
58            '|', '^', '!', '~', '&&', '||', '?', ':', '===', '==', '>=',
59            '<=', '<', '>', '!=', '!=='
60        )
61
62    # Determine if forward slash (/) is an operator or part of a regular expression
63    # https://github.com/mozilla/sweet.js/wiki/design
64    def isRegexStart(self):
65        if not self.values:
66            return True
67
68        previous = self.values[-1]
69        regex = previous is not None
70
71        if previous in (
72            'this',
73            ']',
74        ):
75            regex = False
76        elif previous == ')':
77            keyword = self.values[self.paren - 1]
78            regex = keyword in ('if', 'while', 'for', 'with')
79
80        elif previous == '}':
81            # Dividing a function by anything makes little sense,
82            # but we have to check for that.
83            regex = True
84            if len(self.values) >= 3 and self.values[self.curly - 3] == 'function':
85                # Anonymous function, e.g. function(){} /42
86                check = self.values[self.curly - 4]
87                regex = not self.beforeFunctionExpression(check) if check else False
88            elif len(self.values) >= 4 and self.values[self.curly - 4] == 'function':
89                # Named function, e.g. function f(){} /42/
90                check = self.values[self.curly - 5]
91                regex = not self.beforeFunctionExpression(check) if check else True
92
93        return regex
94
95    def append(self, token):
96        if token.type in (Token.Punctuator, Token.Keyword):
97            if token.value == '{':
98                self.curly = len(self.values)
99            elif token.value == '(':
100                self.paren = len(self.values)
101            self.values.append(token.value)
102        else:
103            self.values.append(None)
104
105
106class Config(Object):
107    def __init__(self, tolerant=None, comment=None, range=None, loc=None, **options):
108        self.tolerant = tolerant
109        self.comment = comment
110        self.range = range
111        self.loc = loc
112        for k, v in options.items():
113            setattr(self, k, v)
114
115
116class Tokenizer(object):
117    def __init__(self, code, options):
118        self.config = Config(**options)
119
120        self.errorHandler = ErrorHandler()
121        self.errorHandler.tolerant = self.config.tolerant
122        self.scanner = Scanner(code, self.errorHandler)
123        self.scanner.trackComment = self.config.comment
124
125        self.trackRange = self.config.range
126        self.trackLoc = self.config.loc
127        self.buffer = deque()
128        self.reader = Reader()
129
130    def errors(self):
131        return self.errorHandler.errors
132
133    def getNextToken(self):
134        if not self.buffer:
135
136            comments = self.scanner.scanComments()
137            if self.scanner.trackComment:
138                for e in comments:
139                    value = self.scanner.source[e.slice[0]:e.slice[1]]
140                    comment = BufferEntry(
141                        type='BlockComment' if e.multiLine else 'LineComment',
142                        value=value
143                    )
144                    if self.trackRange:
145                        comment.range = e.range
146                    if self.trackLoc:
147                        comment.loc = e.loc
148                    self.buffer.append(comment)
149
150            if not self.scanner.eof():
151                if self.trackLoc:
152                    loc = SourceLocation(
153                        start=Position(
154                            line=self.scanner.lineNumber,
155                            column=self.scanner.index - self.scanner.lineStart
156                        ),
157                        end=Position(),
158                    )
159
160                maybeRegex = self.scanner.source[self.scanner.index] == '/' and self.reader.isRegexStart()
161                if maybeRegex:
162                    state = self.scanner.saveState()
163                    try:
164                        token = self.scanner.scanRegExp()
165                    except Exception:
166                        self.scanner.restoreState(state)
167                        token = self.scanner.lex()
168                else:
169                    token = self.scanner.lex()
170
171                self.reader.append(token)
172
173                entry = BufferEntry(
174                    type=TokenName[token.type],
175                    value=self.scanner.source[token.start:token.end]
176                )
177                if self.trackRange:
178                    entry.range = [token.start, token.end]
179                if self.trackLoc:
180                    loc.end = Position(
181                        line=self.scanner.lineNumber,
182                        column=self.scanner.index - self.scanner.lineStart
183                    )
184                    entry.loc = loc
185                if token.type is Token.RegularExpression:
186                    entry.regex = RegExp(
187                        pattern=token.pattern,
188                        flags=token.flags,
189                    )
190
191                self.buffer.append(entry)
192
193        return self.buffer.popleft() if self.buffer else None
194