1import bz2 2import json 3import itertools 4import os 5from RLTest import Env 6import pprint 7from includes import * 8from common import getConnectionByEnv, waitForIndex, sortedResults, toSortedFlatList 9 10def to_dict(res): 11 d = {res[i]: res[i + 1] for i in range(0, len(res), 2)} 12 return d 13 14 15GAMES_JSON = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'games.json.bz2') 16 17 18def add_values(env, number_of_iterations=1): 19 env.execute_command('FT.CREATE', 'games', 'ON', 'HASH', 20 'SCHEMA', 'title', 'TEXT', 'SORTABLE', 21 'brand', 'TEXT', 'NOSTEM', 'SORTABLE', 22 'description', 'TEXT', 'price', 'NUMERIC', 23 'categories', 'TAG') 24 25 for i in range(number_of_iterations): 26 fp = bz2.BZ2File(GAMES_JSON, 'r') 27 for line in fp: 28 obj = json.loads(line) 29 id = obj['asin'] + (str(i) if i > 0 else '') 30 del obj['asin'] 31 obj['price'] = obj.get('price') or 0 32 obj['categories'] = ','.join(obj['categories']) 33 cmd = ['FT.ADD', 'games', id, 1, 'FIELDS', ] + \ 34 [str(x) if x is not None else '' for x in itertools.chain( 35 *obj.items())] 36 env.execute_command(*cmd) 37 fp.close() 38 39 40class TestAggregate(): 41 def __init__(self): 42 self.env = Env() 43 add_values(self.env) 44 45 def testGroupBy(self): 46 cmd = ['ft.aggregate', 'games', '*', 47 'GROUPBY', '1', '@brand', 48 'REDUCE', 'count', '0', 'AS', 'count', 49 'SORTBY', 2, '@count', 'desc', 50 'LIMIT', '0', '5' 51 ] 52 53 res = self.env.cmd(*cmd) 54 self.env.assertIsNotNone(res) 55 self.env.assertEqual([292L, ['brand', '', 'count', '1518'], ['brand', 'mad catz', 'count', '43'], 56 ['brand', 'generic', 'count', '40'], ['brand', 'steelseries', 'count', '37'], 57 ['brand', 'logitech', 'count', '35']], res) 58 59 def testMinMax(self): 60 cmd = ['ft.aggregate', 'games', 'sony', 61 'GROUPBY', '1', '@brand', 62 'REDUCE', 'count', '0', 63 'REDUCE', 'min', '1', '@price', 'as', 'minPrice', 64 'SORTBY', '2', '@minPrice', 'DESC'] 65 res = self.env.cmd(*cmd) 66 self.env.assertIsNotNone(res) 67 row = to_dict(res[1]) 68 self.env.assertEqual(88, int(float(row['minPrice']))) 69 70 cmd = ['ft.aggregate', 'games', 'sony', 71 'GROUPBY', '1', '@brand', 72 'REDUCE', 'count', '0', 73 'REDUCE', 'max', '1', '@price', 'as', 'maxPrice', 74 'SORTBY', '2', '@maxPrice', 'DESC'] 75 res = self.env.cmd(*cmd) 76 row = to_dict(res[1]) 77 self.env.assertEqual(695, int(float(row['maxPrice']))) 78 79 def testAvg(self): 80 cmd = ['ft.aggregate', 'games', 'sony', 81 'GROUPBY', '1', '@brand', 82 'REDUCE', 'avg', '1', '@price', 'AS', 'avg_price', 83 'REDUCE', 'count', '0', 84 'SORTBY', '2', '@avg_price', 'DESC'] 85 res = self.env.cmd(*cmd) 86 self.env.assertIsNotNone(res) 87 self.env.assertEqual(26, res[0]) 88 # Ensure the formatting actually exists 89 90 first_row = to_dict(res[1]) 91 self.env.assertEqual(109, int(float(first_row['avg_price']))) 92 93 for row in res[1:]: 94 row = to_dict(row) 95 self.env.assertIn('avg_price', row) 96 97 # Test aliasing 98 cmd = ['FT.AGGREGATE', 'games', 'sony', 'GROUPBY', '1', '@brand', 99 'REDUCE', 'avg', '1', '@price', 'AS', 'avgPrice'] 100 res = self.env.cmd(*cmd) 101 first_row = to_dict(res[1]) 102 self.env.assertEqual(17, int(float(first_row['avgPrice']))) 103 104 def testCountDistinct(self): 105 cmd = ['FT.AGGREGATE', 'games', '*', 106 'GROUPBY', '1', '@brand', 107 'REDUCE', 'COUNT_DISTINCT', '1', '@title', 'AS', 'count_distinct(title)', 108 'REDUCE', 'COUNT', '0' 109 ] 110 res = self.env.cmd(*cmd)[1:] 111 # print res 112 row = to_dict(res[0]) 113 self.env.assertEqual(1484, int(row['count_distinct(title)'])) 114 115 cmd = ['FT.AGGREGATE', 'games', '*', 116 'GROUPBY', '1', '@brand', 117 'REDUCE', 'COUNT_DISTINCTISH', '1', '@title', 'AS', 'count_distinctish(title)', 118 'REDUCE', 'COUNT', '0' 119 ] 120 res = self.env.cmd(*cmd)[1:] 121 # print res 122 row = to_dict(res[0]) 123 self.env.assertEqual(1461, int(row['count_distinctish(title)'])) 124 125 def testQuantile(self): 126 cmd = ['FT.AGGREGATE', 'games', '*', 127 'GROUPBY', '1', '@brand', 128 'REDUCE', 'QUANTILE', '2', '@price', '0.50', 'AS', 'q50', 129 'REDUCE', 'QUANTILE', '2', '@price', '0.90', 'AS', 'q90', 130 'REDUCE', 'QUANTILE', '2', '@price', '0.95', 'AS', 'q95', 131 'REDUCE', 'AVG', '1', '@price', 132 'REDUCE', 'COUNT', '0', 'AS', 'rowcount', 133 'SORTBY', '2', '@rowcount', 'DESC', 'MAX', '1'] 134 135 res = self.env.cmd(*cmd) 136 row = to_dict(res[1]) 137 # TODO: Better samples 138 self.env.assertAlmostEqual(14.99, float(row['q50']), delta=3) 139 self.env.assertAlmostEqual(70, float(row['q90']), delta=50) 140 self.env.assertAlmostEqual(110, (float(row['q95'])), delta=50) 141 142 def testStdDev(self): 143 cmd = ['FT.AGGREGATE', 'games', '*', 144 'GROUPBY', '1', '@brand', 145 'REDUCE', 'STDDEV', '1', '@price', 'AS', 'stddev(price)', 146 'REDUCE', 'AVG', '1', '@price', 'AS', 'avgPrice', 147 'REDUCE', 'QUANTILE', '2', '@price', '0.50', 'AS', 'q50Price', 148 'REDUCE', 'COUNT', '0', 'AS', 'rowcount', 149 'SORTBY', '2', '@rowcount', 'DESC', 150 'LIMIT', '0', '10'] 151 res = self.env.cmd(*cmd) 152 row = to_dict(res[1]) 153 154 self.env.assertTrue(10 <= int( 155 float(row['q50Price'])) <= 20) 156 self.env.assertAlmostEqual(53, int(float(row['stddev(price)'])), delta=50) 157 self.env.assertEqual(29, int(float(row['avgPrice']))) 158 159 def testParseTime(self): 160 cmd = ['FT.AGGREGATE', 'games', '*', 161 'GROUPBY', '1', '@brand', 162 'REDUCE', 'COUNT', '0', 'AS', 'count', 163 'APPLY', 'timefmt(1517417144)', 'AS', 'dt', 164 'APPLY', 'parsetime(@dt, "%FT%TZ")', 'as', 'parsed_dt', 165 'LIMIT', '0', '1'] 166 res = self.env.cmd(*cmd) 167 168 self.env.assertEqual(['brand', '', 'count', '1518', 'dt', 169 '2018-01-31T16:45:44Z', 'parsed_dt', '1517417144'], res[1]) 170 171 def testRandomSample(self): 172 cmd = ['FT.AGGREGATE', 'games', '*', 'GROUPBY', '1', '@brand', 173 'REDUCE', 'COUNT', '0', 'AS', 'num', 174 'REDUCE', 'RANDOM_SAMPLE', '2', '@price', '10', 175 'SORTBY', '2', '@num', 'DESC', 'MAX', '10'] 176 for row in self.env.cmd(*cmd)[1:]: 177 self.env.assertIsInstance(row[5], list) 178 self.env.assertGreater(len(row[5]), 0) 179 self.env.assertGreaterEqual(row[3], len(row[5])) 180 181 self.env.assertLessEqual(len(row[5]), 10) 182 183 def testTimeFunctions(self): 184 cmd = ['FT.AGGREGATE', 'games', '*', 185 186 'APPLY', '1517417144', 'AS', 'dt', 187 'APPLY', 'timefmt(@dt)', 'AS', 'timefmt', 188 'APPLY', 'day(@dt)', 'AS', 'day', 189 'APPLY', 'hour(@dt)', 'AS', 'hour', 190 'APPLY', 'minute(@dt)', 'AS', 'minute', 191 'APPLY', 'month(@dt)', 'AS', 'month', 192 'APPLY', 'dayofweek(@dt)', 'AS', 'dayofweek', 193 'APPLY', 'dayofmonth(@dt)', 'AS', 'dayofmonth', 194 'APPLY', 'dayofyear(@dt)', 'AS', 'dayofyear', 195 'APPLY', 'year(@dt)', 'AS', 'year', 196 197 'LIMIT', '0', '1'] 198 res = self.env.cmd(*cmd) 199 self.env.assertListEqual([1L, ['dt', '1517417144', 'timefmt', '2018-01-31T16:45:44Z', 'day', '1517356800', 'hour', '1517414400', 200 'minute', '1517417100', 'month', '1514764800', 'dayofweek', '3', 'dayofmonth', '31', 'dayofyear', '30', 'year', '2018']], res) 201 202 def testStringFormat(self): 203 cmd = ['FT.AGGREGATE', 'games', '@brand:sony', 204 'GROUPBY', '2', '@title', '@brand', 205 'REDUCE', 'COUNT', '0', 206 'REDUCE', 'MAX', '1', '@price', 'AS', 'price', 207 'APPLY', 'format("%s|%s|%s|%s", @title, @brand, "Mark", @price)', 'as', 'titleBrand', 208 'LIMIT', '0', '10'] 209 res = self.env.cmd(*cmd) 210 for row in res[1:]: 211 row = to_dict(row) 212 expected = '%s|%s|%s|%g' % ( 213 row['title'], row['brand'], 'Mark', float(row['price'])) 214 self.env.assertEqual(expected, row['titleBrand']) 215 216 def testSum(self): 217 cmd = ['ft.aggregate', 'games', '*', 218 'GROUPBY', '1', '@brand', 219 'REDUCE', 'count', '0', 'AS', 'count', 220 'REDUCE', 'sum', 1, '@price', 'AS', 'sum(price)', 221 'SORTBY', 2, '@sum(price)', 'desc', 222 'LIMIT', '0', '5' 223 ] 224 res = self.env.cmd(*cmd) 225 self.env.assertEqual([292L, ['brand', '', 'count', '1518', 'sum(price)', '44780.69'], 226 ['brand', 'mad catz', 'count', 227 '43', 'sum(price)', '3973.48'], 228 ['brand', 'razer', 'count', '26', 229 'sum(price)', '2558.58'], 230 ['brand', 'logitech', 'count', 231 '35', 'sum(price)', '2329.21'], 232 ['brand', 'steelseries', 'count', '37', 'sum(price)', '1851.12']], res) 233 234 def testFilter(self): 235 cmd = ['ft.aggregate', 'games', '*', 236 'GROUPBY', '1', '@brand', 237 'REDUCE', 'count', '0', 'AS', 'count', 238 'FILTER', '@count > 5' 239 ] 240 241 res = self.env.cmd(*cmd) 242 for row in res[1:]: 243 row = to_dict(row) 244 self.env.assertGreater(int(row['count']), 5) 245 246 cmd = ['ft.aggregate', 'games', '*', 247 'GROUPBY', '1', '@brand', 248 'REDUCE', 'count', '0', 'AS', 'count', 249 'FILTER', '@count < 5', 250 'FILTER', '@count > 2 && @brand != ""' 251 ] 252 253 res = self.env.cmd(*cmd) 254 for row in res[1:]: 255 row = to_dict(row) 256 self.env.assertLess(int(row['count']), 5) 257 self.env.assertGreater(int(row['count']), 2) 258 259 def testToList(self): 260 cmd = ['ft.aggregate', 'games', '*', 261 'GROUPBY', '1', '@brand', 262 'REDUCE', 'count_distinct', '1', '@price', 'as', 'count', 263 'REDUCE', 'tolist', 1, '@price', 'as', 'prices', 264 'SORTBY', 2, '@count', 'desc', 265 'LIMIT', '0', '5' 266 ] 267 res = self.env.cmd(*cmd) 268 269 for row in res[1:]: 270 row = to_dict(row) 271 self.env.assertEqual(int(row['count']), len(row['prices'])) 272 273 def testSortBy(self): 274 res = self.env.cmd('ft.aggregate', 'games', '*', 'GROUPBY', '1', '@brand', 275 'REDUCE', 'sum', 1, '@price', 'as', 'price', 276 'SORTBY', 2, '@price', 'desc', 277 'LIMIT', '0', '2') 278 279 self.env.assertListEqual([292L, ['brand', '', 'price', '44780.69'], [ 280 'brand', 'mad catz', 'price', '3973.48']], res) 281 282 res = self.env.cmd('ft.aggregate', 'games', '*', 'GROUPBY', '1', '@brand', 283 'REDUCE', 'sum', 1, '@price', 'as', 'price', 284 'SORTBY', 2, '@price', 'asc', 285 'LIMIT', '0', '2') 286 287 self.env.assertListEqual([292L, ['brand', 'myiico', 'price', '0.23'], [ 288 'brand', 'crystal dynamics', 'price', '0.25']], res) 289 290 # Test MAX with limit higher than it 291 res = self.env.cmd('ft.aggregate', 'games', '*', 'GROUPBY', '1', '@brand', 292 'REDUCE', 'sum', 1, '@price', 'as', 'price', 293 'SORTBY', 2, '@price', 'asc', 'MAX', 2) 294 295 self.env.assertListEqual([292L, ['brand', 'myiico', 'price', '0.23'], [ 296 'brand', 'crystal dynamics', 'price', '0.25']], res) 297 298 # Test Sorting by multiple properties 299 res = self.env.cmd('ft.aggregate', 'games', '*', 'GROUPBY', '1', '@brand', 300 'REDUCE', 'sum', 1, '@price', 'as', 'price', 301 'APPLY', '(@price % 10)', 'AS', 'price', 302 'SORTBY', 4, '@price', 'asc', '@brand', 'desc', 'MAX', 10, 303 ) 304 self.env.assertListEqual([292L, ['brand', 'zps', 'price', '0'], ['brand', 'zalman', 'price', '0'], ['brand', 'yoozoo', 'price', '0'], ['brand', 'white label', 'price', '0'], ['brand', 'stinky', 'price', '0'], [ 305 'brand', 'polaroid', 'price', '0'], ['brand', 'plantronics', 'price', '0'], ['brand', 'ozone', 'price', '0'], ['brand', 'oooo', 'price', '0'], ['brand', 'neon', 'price', '0']], res) 306 307 # test LOAD with SORTBY 308 expected_res = [2265L, ['title', 'Logitech MOMO Racing - Wheel and pedals set - 6 button(s) - PC, MAC - black', 'price', '759.12'], 309 ['title', 'Sony PSP Slim & Lite 2000 Console', 'price', '695.8']] 310 res = self.env.cmd('ft.aggregate', 'games', '*', 311 'LOAD', 1, '@title', 312 'SORTBY', 2, '@price', 'desc', 313 'LIMIT', '0', '2') 314 self.env.assertListEqual(toSortedFlatList(res), toSortedFlatList(expected_res)) 315 316 res = self.env.cmd('ft.aggregate', 'games', '*', 317 'SORTBY', 2, '@price', 'desc', 318 'LOAD', 1, '@title', 319 'LIMIT', '0', '2') 320 self.env.assertListEqual(toSortedFlatList(res), toSortedFlatList(expected_res)) 321 322 # test with non-sortable filed 323 expected_res = [2265L, ['description', 'world of warcraft:the burning crusade-expansion set'], 324 ['description', 'wired playstation 3 controller, third party product with high quality.']] 325 res = self.env.cmd('ft.aggregate', 'games', '*', 326 'SORTBY', 2, '@description', 'desc', 327 'LOAD', 1, '@description', 328 'LIMIT', '0', '2') 329 self.env.assertListEqual(toSortedFlatList(res), toSortedFlatList(expected_res)) 330 331 res = self.env.cmd('ft.aggregate', 'games', '*', 332 'LOAD', 1, '@description', 333 'SORTBY', 2, '@description', 'desc', 334 'LIMIT', '0', '2') 335 self.env.assertListEqual(toSortedFlatList(res), toSortedFlatList(expected_res)) 336 337 def testExpressions(self): 338 pass 339 340 def testNoGroup(self): 341 res = self.env.cmd('ft.aggregate', 'games', '*', 'LOAD', '2', '@brand', '@price', 342 'APPLY', 'floor(sqrt(@price)) % 10', 'AS', 'price', 343 'SORTBY', 4, '@price', 'desc', '@brand', 'desc', 'MAX', 5, 344 ) 345 exp = [2265L, 346 ['brand', 'Xbox', 'price', '9'], 347 ['brand', 'turtle beach', 'price', '9'], 348 ['brand', 'trust', 'price', '9'], 349 ['brand', 'steelseries', 'price', '9'], 350 ['brand', 'speedlink', 'price', '9']] 351 # exp = [2265L, ['brand', 'Xbox', 'price', '9'], ['brand', 'Turtle Beach', 'price', '9'], [ 352 # 'brand', 'Trust', 'price', '9'], ['brand', 'SteelSeries', 'price', '9'], ['brand', 'Speedlink', 'price', '9']] 353 self.env.assertListEqual(exp[1], res[1]) 354 355 def testLoad(self): 356 res = self.env.cmd('ft.aggregate', 'games', '*', 357 'LOAD', '3', '@brand', '@price', '@nonexist', 358 'SORTBY', 2, '@price', 'DESC', 359 'MAX', 2) 360 exp = [3L, ['brand', '', 'price', '759.12'], ['brand', 'Sony', 'price', '695.8']] 361 self.env.assertEqual(exp[1], res[1]) 362 self.env.assertEqual(exp[2], res[2]) 363 364 def testLoadWithDocId(self): 365 res = self.env.cmd('ft.aggregate', 'games', '*', 366 'LOAD', '3', '@brand', '@price', '@__key', 367 'SORTBY', 2, '@price', 'DESC', 368 'MAX', 4) 369 exp = [3L, ['brand', '', 'price', '759.12', '__key', 'B00006JJIC'], 370 ['brand', 'Sony', 'price', '695.8', '__key', 'B000F6W1AG']] 371 self.env.assertEqual(exp[1], res[1]) 372 self.env.assertEqual(exp[2], res[2]) 373 374 res = self.env.cmd('ft.aggregate', 'games', '*', 375 'LOAD', '3', '@brand', '@price', '@__key', 376 'FILTER', '@__key == "B000F6W1AG"') 377 self.env.assertEqual(res[1], ['brand', 'Sony', 'price', '695.8', '__key', 'B000F6W1AG']) 378 379 def testLoadImplicit(self): 380 # same as previous 381 res = self.env.cmd('ft.aggregate', 'games', '*', 382 'LOAD', '1', '@brand', 383 'SORTBY', 2, '@price', 'DESC') 384 exp = [3L, ['brand', '', 'price', '759.12'], ['brand', 'Sony', 'price', '695.8']] 385 self.env.assertEqual(exp[1], res[1]) 386 387 def testSplit(self): 388 res = self.env.cmd('ft.aggregate', 'games', '*', 'APPLY', 'split("hello world, foo,,,bar,", ",", " ")', 'AS', 'strs', 389 'APPLY', 'split("hello world, foo,,,bar,", " ", ",")', 'AS', 'strs2', 390 'APPLY', 'split("hello world, foo,,,bar,", "", "")', 'AS', 'strs3', 391 'APPLY', 'split("hello world, foo,,,bar,")', 'AS', 'strs4', 392 'APPLY', 'split("hello world, foo,,,bar,",",")', 'AS', 'strs5', 393 'APPLY', 'split("")', 'AS', 'empty', 394 'LIMIT', '0', '1' 395 ) 396 # print "Got {} results".format(len(res)) 397 # return 398 # pprint.pprint(res) 399 self.env.assertListEqual([1L, ['strs', ['hello world', 'foo', 'bar'], 400 'strs2', ['hello', 'world', 'foo,,,bar'], 401 'strs3', ['hello world, foo,,,bar,'], 402 'strs4', ['hello world', 'foo', 'bar'], 403 'strs5', ['hello world', 'foo', 'bar'], 404 'empty', []]], res) 405 406 def testFirstValue(self): 407 res = self.env.cmd('ft.aggregate', 'games', '@brand:(sony|matias|beyerdynamic|(mad catz))', 408 'GROUPBY', 1, '@brand', 409 'REDUCE', 'FIRST_VALUE', 4, '@title', 'BY', '@price', 'DESC', 'AS', 'top_item', 410 'REDUCE', 'FIRST_VALUE', 4, '@price', 'BY', '@price', 'DESC', 'AS', 'top_price', 411 'REDUCE', 'FIRST_VALUE', 4, '@title', 'BY', '@price', 'ASC', 'AS', 'bottom_item', 412 'REDUCE', 'FIRST_VALUE', 4, '@price', 'BY', '@price', 'ASC', 'AS', 'bottom_price', 413 'SORTBY', 2, '@top_price', 'DESC', 'MAX', 5 414 ) 415 expected = [4L, ['brand', 'sony', 'top_item', 'sony psp slim & lite 2000 console', 'top_price', '695.8', 'bottom_item', 'sony dlchd20p high speed hdmi cable for playstation 3', 'bottom_price', '5.88'], 416 ['brand', 'matias', 'top_item', 'matias halfkeyboard usb', 'top_price', 417 '559.99', 'bottom_item', 'matias halfkeyboard usb', 'bottom_price', '559.99'], 418 ['brand', 'beyerdynamic', 'top_item', 'beyerdynamic mmx300 pc gaming premium digital headset with microphone', 'top_price', '359.74', 419 'bottom_item', 'beyerdynamic headzone pc gaming digital surround sound system with mmx300 digital headset with microphone', 'bottom_price', '0'], 420 ['brand', 'mad catz', 'top_item', 'mad catz s.t.r.i.k.e.7 gaming keyboard', 'top_price', '295.95', 'bottom_item', 'madcatz mov4545 xbox replacement breakaway cable', 'bottom_price', '3.49']] 421 422 # hack :( 423 def mklower(result): 424 for arr in result[1:]: 425 for x in range(len(arr)): 426 arr[x] = arr[x].lower() 427 mklower(expected) 428 mklower(res) 429 self.env.assertListEqual(expected, res) 430 431 def testLoadAfterGroupBy(self): 432 with self.env.assertResponseError(): 433 self.env.cmd('ft.aggregate', 'games', '*', 434 'GROUPBY', 1, '@brand', 435 'LOAD', 1, '@brand') 436 437 def testReducerGeneratedAliasing(self): 438 rv = self.env.cmd('ft.aggregate', 'games', '*', 439 'GROUPBY', 1, '@brand', 440 'REDUCE', 'MIN', 1, '@price', 441 'LIMIT', 0, 1) 442 self.env.assertEqual([292L, ['brand', '', '__generated_aliasminprice', '0']], rv) 443 444 rv = self.env.cmd('ft.aggregate', 'games', '@brand:(sony|matias|beyerdynamic|(mad catz))', 445 'GROUPBY', 1, '@brand', 446 'REDUCE', 'FIRST_VALUE', 4, '@title', 'BY', '@price', 'DESC', 447 'SORTBY', 2, '@brand', 'ASC') 448 self.env.assertEqual('__generated_aliasfirst_valuetitle,by,price,desc', rv[1][2]) 449 450 def testIssue1125(self): 451 self.env.skipOnCluster() 452 if VALGRIND: 453 self.env.skip() 454 # SEARCH should fail 455 self.env.expect('ft.search', 'games', '*', 'limit', 0, 2000000).error() \ 456 .contains('LIMIT exceeds maximum of 1000000') 457 # SEARCH should succeed 458 self.env.expect('ft.config', 'set', 'MAXSEARCHRESULTS', -1).ok() 459 rv = self.env.cmd('ft.search', 'games', '*', 460 'LIMIT', 0, 12345678) 461 self.env.assertEqual(4531, len(rv)) 462 # AGGREGATE should succeed 463 rv = self.env.cmd('ft.aggregate', 'games', '*', 464 'LIMIT', 0, 12345678) 465 self.env.assertEqual(2266, len(rv)) 466 # AGGREGATE should fail 467 self.env.expect('ft.config', 'set', 'MAXAGGREGATERESULTS', 1000000).ok() 468 self.env.expect('ft.aggregate', 'games', '*', 'limit', 0, 2000000).error() \ 469 .contains('LIMIT exceeds maximum of 1000000') 470 471 # force global limit on aggregate 472 num = 10 473 self.env.expect('ft.config', 'set', 'MAXAGGREGATERESULTS', num).ok() 474 rv = self.env.cmd('ft.aggregate', 'games', '*') 475 self.env.assertEqual(num + 1, len(rv)) 476 477 self.env.expect('ft.config', 'set', 'MAXAGGREGATERESULTS', -1).ok() 478 self.env.expect('ft.config', 'set', 'MAXSEARCHRESULTS', 1000000).ok() 479 480 def testMultiSortByStepsError(self): 481 self.env.expect('ft.aggregate', 'games', '*', 482 'LOAD', '2', '@brand', '@price', 483 'SORTBY', 2, '@brand', 'DESC', 484 'SORTBY', 2, '@price', 'DESC').error()\ 485 .contains('Multiple SORTBY steps are not allowed. Sort multiple fields in a single step') 486 487 488 def testLoadWithSortBy(self): 489 self.env.expect('ft.aggregate', 'games', '*', 490 'LOAD', '2', '@brand', '@price', 491 'SORTBY', 2, '@brand', 'DESC', 492 'SORTBY', 2, '@price', 'DESC').error()\ 493 .contains('Multiple SORTBY steps are not allowed. Sort multiple fields in a single step') 494 495 def testCountError(self): 496 # With 0 values 497 res = self.env.cmd('ft.aggregate', 'games', '*', 498 'GROUPBY', '2', '@brand', '@price', 499 'REDUCE', 'COUNT', 0) 500 self.env.assertEqual(len(res), 1245) 501 502 # With count 1 and 1 value 503 res = self.env.expect('ft.aggregate', 'games', '*', 504 'GROUPBY', '2', '@brand', '@price', 505 'REDUCE', 'COUNT', 1, '@brand').error() \ 506 .contains('Count accepts 0 values only') 507 508 # With count 1 and 0 values 509 res = self.env.expect('ft.aggregate', 'games', '*', 510 'GROUPBY', '2', '@brand', '@price', 511 'REDUCE', 'COUNT', 1).error() \ 512 .contains('Bad arguments for COUNT: Expected an argument, but none provided') 513 514 # def testLoadAfterSortBy(self): 515 # with self.env.assertResponseError(): 516 # self.env.cmd('ft.aggregate', 'games', '*', 517 # 'SORTBY', 1, '@brand', 518 # 'LOAD', 1, '@brand') 519 520 # def testLoadAfterApply(self): 521 # with self.env.assertResponseError(): 522 # self.env.cmd('ft.aggregate', 'games', '*', 523 # 'APPLY', 'timefmt(1517417144)', 'AS', 'dt', 524 # 'LOAD', 1, '@brand') 525 526 # def testLoadAfterFilter(self): 527 # with self.env.assertResponseError(): 528 # self.env.cmd('ft.aggregate', 'games', '*', 529 # 'FILTER', '@count > 5', 530 # 'LOAD', 1, '@brand') 531 532 # def testLoadAfterLimit(self): 533 # with self.env.assertResponseError(): 534 # self.env.cmd('ft.aggregate', 'games', '*', 535 # 'LIMIT', '0', '5', 536 # 'LOAD', 1, '@brand') 537 538 539class TestAggregateSecondUseCases(): 540 def __init__(self): 541 self.env = Env() 542 add_values(self.env, 2) 543 544 def testSimpleAggregate(self): 545 res = self.env.cmd('ft.aggregate', 'games', '*') 546 self.env.assertIsNotNone(res) 547 self.env.assertEqual(len(res), 4531) 548 549 def testSimpleAggregateWithCursor(self): 550 res = self.env.cmd('ft.aggregate', 'games', '*', 'WITHCURSOR', 'COUNT', 1000) 551 self.env.assertTrue(res[1] != 0) 552 553def grouper(iterable, n, fillvalue=None): 554 "Collect data into fixed-length chunks or blocks" 555 # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx 556 from itertools import izip_longest 557 args = [iter(iterable)] * n 558 return izip_longest(fillvalue=fillvalue, *args) 559 560def testAggregateGroupByOnEmptyField(env): 561 env.cmd('ft.create', 'idx', 'ON', 'HASH', 562 'SCHEMA', 'f', 'TEXT', 'SORTABLE', 'test', 'TEXT', 'SORTABLE') 563 env.cmd('ft.add', 'idx', 'doc1', '1.0', 'FIELDS', 'f', 'field', 'test', 'test1,test2,test3') 564 env.cmd('ft.add', 'idx', 'doc2', '1.0', 'FIELDS', 'f', 'field', 'test', '') 565 res = env.cmd('ft.aggregate', 'idx', 'field', 'APPLY', 'split(@test)', 'as', 'check', 566 'GROUPBY', '1', '@check', 'REDUCE', 'COUNT', '0', 'as', 'count') 567 568 expected = [4L, ['check', 'test3', 'count', '1'], 569 ['check', None, 'count', '1'], ['check', 'test1', 'count', '1'], ['check', 'test2', 'count', '1']] 570 for var in expected: 571 env.assertIn(var, res) 572 573def testMultiSortBy(env): 574 conn = getConnectionByEnv(env) 575 env.execute_command('FT.CREATE', 'sb_idx', 'SCHEMA', 't1', 'TEXT', 't2', 'TEXT') 576 conn.execute_command('hset', 'doc1', 't1', 'a', 't2', 'a') 577 conn.execute_command('hset', 'doc2', 't1', 'a', 't2', 'b') 578 conn.execute_command('hset', 'doc3', 't1', 'a', 't2', 'c') 579 conn.execute_command('hset', 'doc4', 't1', 'b', 't2', 'a') 580 conn.execute_command('hset', 'doc5', 't1', 'b', 't2', 'b') 581 conn.execute_command('hset', 'doc6', 't1', 'b', 't2', 'c') 582 conn.execute_command('hset', 'doc7', 't1', 'c', 't2', 'a') 583 conn.execute_command('hset', 'doc8', 't1', 'c', 't2', 'b') 584 conn.execute_command('hset', 'doc9', 't1', 'c', 't2', 'c') 585 586 # t1 ASC t2 ASC 587 res = [9L, ['t1', 'a', 't2', 'a'], ['t1', 'a', 't2', 'b'], ['t1', 'a', 't2', 'c'], 588 ['t1', 'b', 't2', 'a'], ['t1', 'b', 't2', 'b'], ['t1', 'b', 't2', 'c'], 589 ['t1', 'c', 't2', 'a'], ['t1', 'c', 't2', 'b'], ['t1', 'c', 't2', 'c']] 590 env.expect('FT.AGGREGATE', 'sb_idx', '*', 591 'LOAD', '2', '@t1', '@t2', 592 'SORTBY', '4', '@t1', 'ASC', '@t2', 'ASC').equal(res) 593 594 # t1 DESC t2 ASC 595 res = [9L, ['t1', 'c', 't2', 'a'], ['t1', 'c', 't2', 'b'], ['t1', 'c', 't2', 'c'], 596 ['t1', 'b', 't2', 'a'], ['t1', 'b', 't2', 'b'], ['t1', 'b', 't2', 'c'], 597 ['t1', 'a', 't2', 'a'], ['t1', 'a', 't2', 'b'], ['t1', 'a', 't2', 'c']] 598 env.expect('FT.AGGREGATE', 'sb_idx', '*', 599 'LOAD', '2', '@t1', '@t2', 600 'SORTBY', '4', '@t1', 'DESC', '@t2', 'ASC').equal(res) 601 602 # t2 ASC t1 ASC 603 res = [9L, ['t1', 'a', 't2', 'a'], ['t1', 'b', 't2', 'a'], ['t1', 'c', 't2', 'a'], 604 ['t1', 'a', 't2', 'b'], ['t1', 'b', 't2', 'b'], ['t1', 'c', 't2', 'b'], 605 ['t1', 'a', 't2', 'c'], ['t1', 'b', 't2', 'c'], ['t1', 'c', 't2', 'c']] 606 env.expect('FT.AGGREGATE', 'sb_idx', '*', 607 'LOAD', '2', '@t1', '@t2', 608 'SORTBY', '4', '@t2', 'ASC', '@t1', 'ASC').equal(res) 609 # t2 ASC t1 DESC 610 env.expect('FT.AGGREGATE', 'sb_idx', '*', 611 'LOAD', '2', '@t1', '@t2', 612 'SORTBY', '4', '@t2', 'ASC', '@t1', 'ASC').equal(res) 613 614def testGroupbyNoReduce(env): 615 env.cmd('ft.create', 'idx', 'ON', 'HASH', 616 'SCHEMA', 'primaryName', 'TEXT', 'SORTABLE', 617 'birthYear', 'NUMERIC', 'SORTABLE') 618 619 for x in range(10): 620 env.cmd('ft.add', 'idx', 'doc{}'.format(x), 1, 'fields', 621 'primaryName', 'sarah number{}'.format(x)) 622 623 rv = env.cmd('ft.aggregate', 'idx', 'sarah', 'groupby', 1, '@primaryName') 624 env.assertEqual(11, len(rv)) 625 for row in rv[1:]: 626 env.assertEqual('primaryName', row[0]) 627 env.assertTrue('sarah' in row[1]) 628 629def testStartsWith(env): 630 conn = getConnectionByEnv(env) 631 env.execute_command('ft.create', 'idx', 'SCHEMA', 't', 'TEXT', 'SORTABLE') 632 conn.execute_command('hset', 'doc1', 't', 'aa') 633 conn.execute_command('hset', 'doc2', 't', 'aaa') 634 conn.execute_command('hset', 'doc3', 't', 'ab') 635 636 res = env.cmd('ft.aggregate', 'idx', '*', 'load', 1, 't', 'apply', 'startswith(@t, "aa")', 'as', 'prefix') 637 env.assertEqual(toSortedFlatList(res), toSortedFlatList([1L, ['t', 'aa', 'prefix', '1'], \ 638 ['t', 'aaa', 'prefix', '1'], \ 639 ['t', 'ab', 'prefix', '0']])) 640 641def testContains(env): 642 conn = getConnectionByEnv(env) 643 env.execute_command('ft.create', 'idx', 'SCHEMA', 't', 'TEXT', 'SORTABLE') 644 conn.execute_command('hset', 'doc1', 't', 'aa') 645 conn.execute_command('hset', 'doc2', 't', 'bba') 646 conn.execute_command('hset', 'doc3', 't', 'aba') 647 conn.execute_command('hset', 'doc4', 't', 'abb') 648 conn.execute_command('hset', 'doc5', 't', 'abba') 649 conn.execute_command('hset', 'doc6', 't', 'abbabb') 650 651 res = env.cmd('ft.aggregate', 'idx', '*', 'load', 1, 't', 'apply', 'contains(@t, "bb")', 'as', 'substring') 652 env.assertEqual(toSortedFlatList(res), toSortedFlatList([1L, ['t', 'aa', 'substring', '0'], \ 653 ['t', 'bba', 'substring', '1'], \ 654 ['t', 'aba', 'substring', '0'], \ 655 ['t', 'abb', 'substring', '1'], \ 656 ['t', 'abba', 'substring', '1'], \ 657 ['t', 'abbabb', 'substring', '2']])) 658 659def testLoadAll(env): 660 conn = getConnectionByEnv(env) 661 env.execute_command('FT.CREATE', 'idx', 'SCHEMA', 't', 'TEXT', 'n', 'NUMERIC') 662 conn.execute_command('HSET', 'doc1', 't', 'hello', 'n', 42) 663 conn.execute_command('HSET', 'doc2', 't', 'world', 'n', 3.141) 664 conn.execute_command('HSET', 'doc3', 't', 'hello world', 'n', 17.8) 665 # without LOAD 666 env.expect('FT.AGGREGATE', 'idx', '*').equal([1L, [], [], []]) 667 # use LOAD with narg or ALL 668 res = [3L, ['__key', 'doc1', 't', 'hello', 'n', '42'], 669 ['__key', 'doc2', 't', 'world', 'n', '3.141'], 670 ['__key', 'doc3', 't', 'hello world', 'n', '17.8']] 671 672 env.expect('FT.AGGREGATE', 'idx', '*', 'LOAD', 3, '__key', 't', 'n', 'SORTBY', 1, '@__key').equal(res) 673 env.expect('FT.AGGREGATE', 'idx', '*', 'LOAD', '*', 'LOAD', 1, '@__key', 'SORTBY', 1, '@__key').equal(res) 674 675def testLimitIssue(env): 676 #ticket 66895 677 conn = getConnectionByEnv(env) 678 conn.execute_command('ft.create', 'idx', 'SCHEMA', 'PrimaryKey', 'TEXT', 'SORTABLE', 679 'CreatedDateTimeUTC', 'NUMERIC', 'SORTABLE') 680 conn.execute_command('HSET', 'doc1', 'PrimaryKey', '9::362330', 'CreatedDateTimeUTC', '637387878524969984') 681 conn.execute_command('HSET', 'doc2', 'PrimaryKey', '9::362329', 'CreatedDateTimeUTC', '637387875859270016') 682 conn.execute_command('HSET', 'doc3', 'PrimaryKey', '9::362326', 'CreatedDateTimeUTC', '637386176589869952') 683 conn.execute_command('HSET', 'doc4', 'PrimaryKey', '9::362311', 'CreatedDateTimeUTC', '637383865971600000') 684 conn.execute_command('HSET', 'doc5', 'PrimaryKey', '9::362310', 'CreatedDateTimeUTC', '637383864050669952') 685 conn.execute_command('HSET', 'doc6', 'PrimaryKey', '9::362309', 'CreatedDateTimeUTC', '637242254008029952') 686 conn.execute_command('HSET', 'doc7', 'PrimaryKey', '9::362308', 'CreatedDateTimeUTC', '637242253551670016') 687 conn.execute_command('HSET', 'doc8', 'PrimaryKey', '9::362306', 'CreatedDateTimeUTC', '637166988081200000') 688 689 _res = [8L, 690 ['PrimaryKey', '9::362330', 'CreatedDateTimeUTC', '637387878524969984'], 691 ['PrimaryKey', '9::362329', 'CreatedDateTimeUTC', '637387875859270016'], 692 ['PrimaryKey', '9::362326', 'CreatedDateTimeUTC', '637386176589869952'], 693 ['PrimaryKey', '9::362311', 'CreatedDateTimeUTC', '637383865971600000'], 694 ['PrimaryKey', '9::362310', 'CreatedDateTimeUTC', '637383864050669952'], 695 ['PrimaryKey', '9::362309', 'CreatedDateTimeUTC', '637242254008029952'], 696 ['PrimaryKey', '9::362308', 'CreatedDateTimeUTC', '637242253551670016'], 697 ['PrimaryKey', '9::362306', 'CreatedDateTimeUTC', '637166988081200000']] 698 699 actual_res = conn.execute_command('FT.AGGREGATE', 'idx', '*', 700 'APPLY', '@PrimaryKey', 'AS', 'PrimaryKey', 701 'SORTBY', '2', '@CreatedDateTimeUTC', 'DESC', 'LIMIT', '0', '8') 702 env.assertEqual(actual_res, _res) 703 704 res = [_res[0]] + _res[1:3] 705 actual_res = conn.execute_command('FT.AGGREGATE', 'idx', '*', 706 'APPLY', '@PrimaryKey', 'AS', 'PrimaryKey', 707 'SORTBY', '2', '@CreatedDateTimeUTC', 'DESC', 'LIMIT', '0', '2') 708 env.assertEqual(actual_res, res) 709 710 res = [_res[0]] + _res[2:4] 711 actual_res = conn.execute_command('FT.AGGREGATE', 'idx', '*', 712 'APPLY', '@PrimaryKey', 'AS', 'PrimaryKey', 713 'SORTBY', '2', '@CreatedDateTimeUTC', 'DESC', 'LIMIT', '1', '2') 714 env.assertEqual(actual_res, res) 715 716 res = [_res[0]] + _res[3:5] 717 actual_res = conn.execute_command('FT.AGGREGATE', 'idx', '*', 718 'APPLY', '@PrimaryKey', 'AS', 'PrimaryKey', 719 'SORTBY', '2', '@CreatedDateTimeUTC', 'DESC', 'LIMIT', '2', '2') 720 env.assertEqual(actual_res, res) 721 722def testMaxAggResults(env): 723 if env.env == 'existing-env': 724 env.skip() 725 env = Env(moduleArgs="MAXAGGREGATERESULTS 100") 726 conn = getConnectionByEnv(env) 727 conn.execute_command('ft.create', 'idx', 'SCHEMA', 't', 'TEXT') 728 env.expect('ft.aggregate', 'idx', '*', 'LIMIT', '0', '10000').error() \ 729 .contains('LIMIT exceeds maximum of 100') 730 731def testMaxAggInf(env): 732 env.skipOnCluster() 733 env.expect('ft.config', 'set', 'MAXAGGREGATERESULTS', -1).ok() 734 env.expect('ft.config', 'get', 'MAXAGGREGATERESULTS').equal([['MAXAGGREGATERESULTS', 'unlimited']]) 735 736def testLoadPosition(env): 737 conn = getConnectionByEnv(env) 738 env.execute_command('ft.create', 'idx', 'SCHEMA', 't1', 'TEXT', 't2', 'TEXT') 739 conn.execute_command('ft.add', 'idx', 'doc1', 1, 'FIELDS', 't1', 'hello', 't2', 'world') 740 741 # LOAD then SORTBY 742 env.expect('ft.aggregate', 'idx', '*', 'LOAD', '1', 't1', 'SORTBY', '2', '@t1', 'ASC') \ 743 .equal([1L, ['t1', 'hello']]) 744 745 # SORTBY then LOAD 746 env.expect('ft.aggregate', 'idx', '*', 'SORTBY', '2', '@t1', 'ASC', 'LOAD', '1', 't1') \ 747 .equal([1L, ['t1', 'hello']]) 748 749 # two LOADs 750 env.expect('ft.aggregate', 'idx', '*', 'LOAD', '1', 't1', 'LOAD', '1', 't2') \ 751 .equal([1L, ['t1', 'hello', 't2', 'world']]) 752 753 # two LOADs with an apply for error 754 res = env.cmd('ft.aggregate', 'idx', '*', 'LOAD', '1', 't1', 755 'APPLY', '@t2', 'AS', 'load_error', 756 'LOAD', '1', 't2') 757 env.assertContains('Value was not found in result', str(res[1])) 758