1# ------------------------------------------------------------------------------
2#
3# Project: pycql <https://github.com/geopython/pycql>
4# Authors: Fabian Schindler <fabian.schindler@eox.at>
5#
6# ------------------------------------------------------------------------------
7# Copyright (C) 2019 EOX IT Services GmbH
8#
9# Permission is hereby granted, free of charge, to any person obtaining a copy
10# of this software and associated documentation files (the "Software"), to deal
11# in the Software without restriction, including without limitation the rights
12# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the Software is
14# furnished to do so, subject to the following conditions:
15#
16# The above copyright notice and this permission notice shall be included in all
17# copies of this Software or works derived from this Software.
18#
19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25# THE SOFTWARE.
26# ------------------------------------------------------------------------------
27
28import logging
29
30from ply import lex
31from ply.lex import TOKEN
32
33from . import values
34
35LOGGER = logging.getLogger(__name__)
36
37
38class CQLLexer:
39    def __init__(self, geometry_factory=values.Geometry, bbox_factory=values.BBox,
40                 time_factory=values.Time, duration_factory=values.Duration, **kwargs):
41
42        self.lexer = lex.lex(object=self, **kwargs)
43        self.geometry_factory = geometry_factory
44        self.bbox_factory = bbox_factory
45        self.time_factory = time_factory
46        self.duration_factory = duration_factory
47
48    def build(self, **kwargs):
49        pass
50        # self.lexer.build()
51
52    def input(self, *args):
53        self.lexer.input(*args)
54
55    def token(self):
56        self.last_token = self.lexer.token()
57        return self.last_token
58
59    keywords = (
60        "NOT", "AND", "OR",
61        "BETWEEN", "LIKE", "ILIKE", "IN", "IS", "NULL",
62        "BEFORE", "AFTER", "DURING", "INTERSECTS", "DISJOINT", "CONTAINS",
63        "WITHIN", "TOUCHES", "CROSSES", "OVERLAPS", "EQUALS", "RELATE",
64        "DWITHIN", "BEYOND", "BBOX",
65        "feet", "meters", "statute miles", "nautical miles", "kilometers"
66    )
67
68    tokens = keywords + (
69        # Operators
70        'PLUS', 'MINUS', 'TIMES', 'DIVIDE',
71        'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
72
73        'LPAREN', 'RPAREN',
74        'LBRACKET', 'RBRACKET',
75        'COMMA',
76
77        'GEOMETRY',
78        'ENVELOPE',
79
80        'UNITS',
81
82        'ATTRIBUTE',
83        'TIME',
84        'DURATION',
85        'FLOAT',
86        'INTEGER',
87        'QUOTED',
88    )
89
90    keyword_map = dict((keyword, keyword) for keyword in keywords)
91
92    identifier_pattern = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
93
94    int_pattern = r'-?[0-9]+'
95    # float_pattern = r'(?:[0-9]+[.][0-9]*|[.][0-9]+)(?:[Ee][-+]?[0-9]+)?'
96    float_pattern = r'[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?'
97
98    time_pattern = r"\d{4}-\d{2}-\d{2}T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]Z"
99    duration_pattern = (
100        # "P(?=[YMDHMS])"  # positive lookahead here... TODO: does not work
101        # "((\d+Y)?(\d+M)?(\d+D)?)?(T(\d+H)?(\d+M)?(\d+S)?)?"
102        r"P((\d+Y)?(\d+M)?(\d+D)?)?(T(\d+H)?(\d+M)?(\d+S)?)?"
103    )
104    quoted_string_pattern = r'(\"[^"]*\")|(\'[^\']*\')'
105
106    # for geometry parsing
107
108    # a simple pattern that allows the simple float and integer notations (but
109    # not the scientific ones). Maybe TODO
110    number_pattern = r'-?[0-9]*\.?[0-9]+'
111
112    coordinate_2d_pattern = r'%s\s+%s\s*' % (number_pattern, number_pattern)
113    coordinate_3d_pattern = r'%s\s+%s\s*' % (
114        coordinate_2d_pattern, number_pattern
115    )
116    coordinate_4d_pattern = r'%s\s+%s\s*' % (
117        coordinate_3d_pattern, number_pattern
118    )
119    coordinate_pattern = r'((%s)|(%s)|(%s))' % (
120        coordinate_2d_pattern, coordinate_3d_pattern, coordinate_4d_pattern
121    )
122
123    coordinates_pattern = r'%s(\s*,\s*%s)*' % (
124        coordinate_pattern, coordinate_pattern
125    )
126
127    coordinate_group_pattern = r'\(\s*%s\s*\)' % coordinates_pattern
128    coordinate_groups_pattern = r'%s(\s*,\s*%s)*' % (
129        coordinate_group_pattern, coordinate_group_pattern
130    )
131
132    nested_coordinate_group_pattern = r'\(\s*%s\s*\)' % coordinate_groups_pattern
133    nested_coordinate_groups_pattern = r'%s(\s*,\s*%s)*' % (
134        nested_coordinate_group_pattern, nested_coordinate_group_pattern
135    )
136
137    geometry_pattern = (
138        r'(POINT\s*\(%s\))|' % coordinate_pattern +
139        r'((MULTIPOINT|LINESTRING)\s*\(%s\))|' % coordinates_pattern +
140        r'((MULTIPOINT|MULTILINESTRING|POLYGON)\s*\(%s\))|' % (
141            coordinate_groups_pattern
142        ) +
143        r'(MULTIPOLYGON\s*\(%s\))' % nested_coordinate_groups_pattern
144    )
145    envelope_pattern = r'ENVELOPE\s*\((\s*%s\s*){4}\)' % number_pattern
146
147    t_PLUS = r'\+'
148    t_MINUS = r'-'
149    t_TIMES = r'\*'
150    t_DIVIDE = r'/'
151    t_OR = r'OR'
152    t_AND = r'AND'
153    t_LT = r'<'
154    t_GT = r'>'
155    t_LE = r'<='
156    t_GE = r'>='
157    t_EQ = r'='
158    t_NE = r'<>'
159
160    # Delimeters
161    t_LPAREN = r'\('
162    t_RPAREN = r'\)'
163    t_LBRACKET = r'\['
164    t_RBRACKET = r'\]'
165    t_COMMA = r','
166
167    @TOKEN(geometry_pattern)
168    def t_GEOMETRY(self, t):
169        t.value = self.geometry_factory(t.value)
170        return t
171
172    @TOKEN(envelope_pattern)
173    def t_ENVELOPE(self, t):
174        bbox = [
175            float(number) for number in
176            t.value.partition('(')[2].partition(')')[0].split()
177        ]
178        t.value = self.bbox_factory(bbox)
179        return t
180
181    @TOKEN(r'(feet)|(meters)|(statute miles)|(nautical miles)|(kilometers)')
182    def t_UNITS(self, t):
183        return t
184
185    @TOKEN(time_pattern)
186    def t_TIME(self, t):
187        t.value = self.time_factory(t.value)
188        return t
189
190    @TOKEN(duration_pattern)
191    def t_DURATION(self, t):
192        t.value = self.duration_factory(t.value)
193        return t
194
195    @TOKEN(float_pattern)
196    def t_FLOAT(self, t):
197        t.value = float(t.value)
198        return t
199
200    @TOKEN(int_pattern)
201    def t_INTEGER(self, t):
202        t.value = int(t.value)
203        return t
204
205    @TOKEN(quoted_string_pattern)
206    def t_QUOTED(self, t):
207        t.value = t.value[1:-1]
208        return t
209
210    @TOKEN(identifier_pattern)
211    def t_ATTRIBUTE(self, t):
212        t.type = self.keyword_map.get(t.value, "ATTRIBUTE")
213        return t
214
215    def t_newline(self, t):
216        r'\n+'
217        t.lexer.lineno += len(t.value)
218
219    # A string containing ignored characters (spaces and tabs)
220    t_ignore = ' \t'
221
222    def t_error(self, t):
223        LOGGER.debug(t)
224