1# ------------------------------------------------------------------------------ 2# 3# Project: pycql <https://github.com/geopython/pycql> 4# Authors: Fabian Schindler <fabian.schindler@eox.at> 5# 6# ------------------------------------------------------------------------------ 7# Copyright (C) 2019 EOX IT Services GmbH 8# 9# Permission is hereby granted, free of charge, to any person obtaining a copy 10# of this software and associated documentation files (the "Software"), to deal 11# in the Software without restriction, including without limitation the rights 12# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13# copies of the Software, and to permit persons to whom the Software is 14# furnished to do so, subject to the following conditions: 15# 16# The above copyright notice and this permission notice shall be included in all 17# copies of this Software or works derived from this Software. 18# 19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25# THE SOFTWARE. 26# ------------------------------------------------------------------------------ 27 28import logging 29 30from ply import lex 31from ply.lex import TOKEN 32 33from . import values 34 35LOGGER = logging.getLogger(__name__) 36 37 38class CQLLexer: 39 def __init__(self, geometry_factory=values.Geometry, bbox_factory=values.BBox, 40 time_factory=values.Time, duration_factory=values.Duration, **kwargs): 41 42 self.lexer = lex.lex(object=self, **kwargs) 43 self.geometry_factory = geometry_factory 44 self.bbox_factory = bbox_factory 45 self.time_factory = time_factory 46 self.duration_factory = duration_factory 47 48 def build(self, **kwargs): 49 pass 50 # self.lexer.build() 51 52 def input(self, *args): 53 self.lexer.input(*args) 54 55 def token(self): 56 self.last_token = self.lexer.token() 57 return self.last_token 58 59 keywords = ( 60 "NOT", "AND", "OR", 61 "BETWEEN", "LIKE", "ILIKE", "IN", "IS", "NULL", 62 "BEFORE", "AFTER", "DURING", "INTERSECTS", "DISJOINT", "CONTAINS", 63 "WITHIN", "TOUCHES", "CROSSES", "OVERLAPS", "EQUALS", "RELATE", 64 "DWITHIN", "BEYOND", "BBOX", 65 "feet", "meters", "statute miles", "nautical miles", "kilometers" 66 ) 67 68 tokens = keywords + ( 69 # Operators 70 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 71 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', 72 73 'LPAREN', 'RPAREN', 74 'LBRACKET', 'RBRACKET', 75 'COMMA', 76 77 'GEOMETRY', 78 'ENVELOPE', 79 80 'UNITS', 81 82 'ATTRIBUTE', 83 'TIME', 84 'DURATION', 85 'FLOAT', 86 'INTEGER', 87 'QUOTED', 88 ) 89 90 keyword_map = dict((keyword, keyword) for keyword in keywords) 91 92 identifier_pattern = r'[a-zA-Z_$][0-9a-zA-Z_$]*' 93 94 int_pattern = r'-?[0-9]+' 95 # float_pattern = r'(?:[0-9]+[.][0-9]*|[.][0-9]+)(?:[Ee][-+]?[0-9]+)?' 96 float_pattern = r'[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?' 97 98 time_pattern = r"\d{4}-\d{2}-\d{2}T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]Z" 99 duration_pattern = ( 100 # "P(?=[YMDHMS])" # positive lookahead here... TODO: does not work 101 # "((\d+Y)?(\d+M)?(\d+D)?)?(T(\d+H)?(\d+M)?(\d+S)?)?" 102 r"P((\d+Y)?(\d+M)?(\d+D)?)?(T(\d+H)?(\d+M)?(\d+S)?)?" 103 ) 104 quoted_string_pattern = r'(\"[^"]*\")|(\'[^\']*\')' 105 106 # for geometry parsing 107 108 # a simple pattern that allows the simple float and integer notations (but 109 # not the scientific ones). Maybe TODO 110 number_pattern = r'-?[0-9]*\.?[0-9]+' 111 112 coordinate_2d_pattern = r'%s\s+%s\s*' % (number_pattern, number_pattern) 113 coordinate_3d_pattern = r'%s\s+%s\s*' % ( 114 coordinate_2d_pattern, number_pattern 115 ) 116 coordinate_4d_pattern = r'%s\s+%s\s*' % ( 117 coordinate_3d_pattern, number_pattern 118 ) 119 coordinate_pattern = r'((%s)|(%s)|(%s))' % ( 120 coordinate_2d_pattern, coordinate_3d_pattern, coordinate_4d_pattern 121 ) 122 123 coordinates_pattern = r'%s(\s*,\s*%s)*' % ( 124 coordinate_pattern, coordinate_pattern 125 ) 126 127 coordinate_group_pattern = r'\(\s*%s\s*\)' % coordinates_pattern 128 coordinate_groups_pattern = r'%s(\s*,\s*%s)*' % ( 129 coordinate_group_pattern, coordinate_group_pattern 130 ) 131 132 nested_coordinate_group_pattern = r'\(\s*%s\s*\)' % coordinate_groups_pattern 133 nested_coordinate_groups_pattern = r'%s(\s*,\s*%s)*' % ( 134 nested_coordinate_group_pattern, nested_coordinate_group_pattern 135 ) 136 137 geometry_pattern = ( 138 r'(POINT\s*\(%s\))|' % coordinate_pattern + 139 r'((MULTIPOINT|LINESTRING)\s*\(%s\))|' % coordinates_pattern + 140 r'((MULTIPOINT|MULTILINESTRING|POLYGON)\s*\(%s\))|' % ( 141 coordinate_groups_pattern 142 ) + 143 r'(MULTIPOLYGON\s*\(%s\))' % nested_coordinate_groups_pattern 144 ) 145 envelope_pattern = r'ENVELOPE\s*\((\s*%s\s*){4}\)' % number_pattern 146 147 t_PLUS = r'\+' 148 t_MINUS = r'-' 149 t_TIMES = r'\*' 150 t_DIVIDE = r'/' 151 t_OR = r'OR' 152 t_AND = r'AND' 153 t_LT = r'<' 154 t_GT = r'>' 155 t_LE = r'<=' 156 t_GE = r'>=' 157 t_EQ = r'=' 158 t_NE = r'<>' 159 160 # Delimeters 161 t_LPAREN = r'\(' 162 t_RPAREN = r'\)' 163 t_LBRACKET = r'\[' 164 t_RBRACKET = r'\]' 165 t_COMMA = r',' 166 167 @TOKEN(geometry_pattern) 168 def t_GEOMETRY(self, t): 169 t.value = self.geometry_factory(t.value) 170 return t 171 172 @TOKEN(envelope_pattern) 173 def t_ENVELOPE(self, t): 174 bbox = [ 175 float(number) for number in 176 t.value.partition('(')[2].partition(')')[0].split() 177 ] 178 t.value = self.bbox_factory(bbox) 179 return t 180 181 @TOKEN(r'(feet)|(meters)|(statute miles)|(nautical miles)|(kilometers)') 182 def t_UNITS(self, t): 183 return t 184 185 @TOKEN(time_pattern) 186 def t_TIME(self, t): 187 t.value = self.time_factory(t.value) 188 return t 189 190 @TOKEN(duration_pattern) 191 def t_DURATION(self, t): 192 t.value = self.duration_factory(t.value) 193 return t 194 195 @TOKEN(float_pattern) 196 def t_FLOAT(self, t): 197 t.value = float(t.value) 198 return t 199 200 @TOKEN(int_pattern) 201 def t_INTEGER(self, t): 202 t.value = int(t.value) 203 return t 204 205 @TOKEN(quoted_string_pattern) 206 def t_QUOTED(self, t): 207 t.value = t.value[1:-1] 208 return t 209 210 @TOKEN(identifier_pattern) 211 def t_ATTRIBUTE(self, t): 212 t.type = self.keyword_map.get(t.value, "ATTRIBUTE") 213 return t 214 215 def t_newline(self, t): 216 r'\n+' 217 t.lexer.lineno += len(t.value) 218 219 # A string containing ignored characters (spaces and tabs) 220 t_ignore = ' \t' 221 222 def t_error(self, t): 223 LOGGER.debug(t) 224