1import os.path 2from includes import * 3from common import waitForIndex,toSortedFlatList 4 5 6GENTEXT = os.path.dirname(os.path.abspath(__file__)) + '/../ctests/genesis.txt' 7 8 9def setupGenesis(env): 10 txt = open(GENTEXT, 'r').read() 11 env.expect('ft.create', 'idx', 'ON', 'HASH', 'schema', 'txt', 'text').ok() 12 waitForIndex(env, 'idx') 13 env.cmd('ft.add', 'idx', 'gen1', 1.0, 'fields', 'txt', txt) 14 15def testSummarization(env): 16 # Load the file 17 setupGenesis(env) 18 res = env.cmd('FT.SEARCH', 'idx', 'abraham isaac jacob', 19 'SUMMARIZE', 'FIELDS', 1, 'txt', 'LEN', 20, 20 'HIGHLIGHT', 'FIELDS', 1, 'txt', 'TAGS', '<b>', '</b>') 21 waitForIndex(env, 'idx') 22 env.assertEqual(1, res[0]) 23 # print res 24 res_txt = res[2][1] 25 # print res_txt 26 27 env.assertTrue("<b>Abraham</b>" in res_txt) 28 env.assertTrue("<b>Isaac</b>" in res_txt) 29 env.assertTrue("<b>Jacob</b>" in res_txt) 30 31 res = env.cmd('FT.SEARCH', 'idx', 'abraham isaac jacob', 'HIGHLIGHT', 'fields', 1, 'txt', 'TAGS', '<i>', '</i>') 32 res_txt = res[2][1] 33 env.assertGreaterEqual(len(res_txt), 160000) 34 35 res = env.cmd('FT.SEARCH', 'idx', 'abraham isaac jacob', 'SUMMARIZE', 'FIELDS', 1, 'txt', 'FRAGS', 10000) 36 # print res 37 38 res_list = res[2][1] 39 # env.assertIsInstance(res_list, list) 40 41 # Search with custom separator 42 res = env.cmd('FT.SEARCH', 'idx', 'isaac', 43 'SUMMARIZE', 'FIELDS', 1, 'txt', 44 'SEPARATOR', '\r\n', 45 'FRAGS', 4, 'LEN', 3) 46 env.assertEqual([1L, 'gen1', [ 47 'txt', 'name Isaac: and\r\nwith Isaac,\r\nIsaac. {21:4} And Abraham circumcised his son Isaac\r\nson Isaac was\r\n']], res) 48 49 # Attempt a query which doesn't have a corresponding matched term 50 res = env.cmd('FT.SEARCH', 'idx', '-blah', 'SUMMARIZE', 'LEN', 3) 51 env.assertEqual( 52 [1L, 'gen1', ['txt', ' The First Book of Moses, called Genesis {1:1} In']], res) 53 54 # Try the same, but attempting to highlight 55 res = env.cmd('FT.SEARCH', 'idx', '-blah', 'HIGHLIGHT') 56 env.assertEqual(214894, len(res[2][1])) 57 58def testPrefixExpansion(env): 59 # Search with prefix 60 setupGenesis(env) 61 res = env.cmd('FT.SEARCH', 'idx', 'begi*', 62 'HIGHLIGHT', 'FIELDS', 1, 'txt', 'TAGS', '<b>', '</b>', 63 'SUMMARIZE', 'FIELDS', 1, 'txt', 'LEN', 20) 64 65 # Prefix expansion uses "early exit" strategy, so the term highlighted won't necessarily be the 66 # best term 67 possibilities = [[1L, 'gen1', ['txt', 'is] one, and they have all one language; and this they <b>begin</b> to do: and now nothing will be restrained from them, which... ']], 68 [1L, 'gen1', ['txt', 'First Book of Moses, called Genesis {1:1} In the <b>beginning</b> God created the heaven and the earth. {1:2} And the earth... the mighty hunter before the LORD. {10:10} And the <b>beginning</b> of his kingdom was Babel, and Erech, and Accad, and Calneh... is] one, and they have all one language; and this they <b>begin</b> to do: and now nothing will be restrained from them, which... ']], 69 [1L, 'gen1', ['txt', '49:3} Reuben, thou [art] my firstborn, my might, and the <b>beginning of</b> my strength, the excellency of dignity, and the excellency... ']]] 70 env.assertIn(res, possibilities) 71 72def testSummarizationMultiField(env): 73 p1 = "Redis is an open-source in-memory database project implementing a networked, in-memory key-value store with optional durability. Redis supports different kinds of abstract data structures, such as strings, lists, maps, sets, sorted sets, hyperloglogs, bitmaps and spatial indexes. The project is mainly developed by Salvatore Sanfilippo and is currently sponsored by Redis Labs.[4] Redis Labs creates and maintains the official Redis Enterprise Pack." 74 p2 = "Redis typically holds the whole dataset in memory. Versions up to 2.4 could be configured to use what they refer to as virtual memory[19] in which some of the dataset is stored on disk, but this feature is deprecated. Persistence is now achieved in two different ways: one is called snapshotting, and is a semi-persistent durability mode where the dataset is asynchronously transferred from memory to disk from time to time, written in RDB dump format. Since version 1.1 the safer alternative is AOF, an append-only file (a journal) that is written as operations modifying the dataset in memory are processed. Redis is able to rewrite the append-only file in the background in order to avoid an indefinite growth of the journal." 75 76 env.cmd('FT.CREATE', 'idx', 'ON', 'HASH', 'SCHEMA', 'txt1', 'TEXT', 'txt2', 'TEXT') 77 waitForIndex(env, 'idx') 78 env.cmd('FT.ADD', 'idx', 'redis', 1.0, 79 'FIELDS', 'txt1', p1, 'txt2', p2) 80 81 # Now perform the multi-field search 82 env.cmd('FT.SEARCH', 'idx', 'memory persistence salvatore', 83 'HIGHLIGHT', 'TAGS', '<b>', '</b>', 84 'SUMMARIZE', 'LEN', 5, 85 'RETURN', 2, 'txt1', 'txt2') 86 87 # Now perform the multi-field search 88 res = env.cmd('FT.SEARCH', 'idx', 'memory persistence salvatore', 89 'SUMMARIZE', 'FIELDS', 2, 'txt1', 'txt2', 'LEN', 5) 90 91 env.assertEqual(1L, res[0]) 92 env.assertEqual('redis', res[1]) 93 for term in ['txt1', 'memory database project implementing a networked, in-memory ... by Salvatore Sanfilippo... ', 'txt2', 94 'dataset in memory. Versions... as virtual memory[19] in... persistent durability mode where the dataset is asynchronously transferred from memory... ']: 95 env.assertIn(term, res[2]) 96 97 98def testSummarizationDisabled(env): 99 env.cmd('FT.CREATE', 'idx', 'ON', 'HASH', 'NOOFFSETS', 'SCHEMA', 'body', 'TEXT') 100 waitForIndex(env, 'idx') 101 env.cmd('FT.ADD', 'idx', 'doc', 1.0, 'FIELDS', 'body', 'hello world') 102 with env.assertResponseError(): 103 res = env.cmd('FT.SEARCH', 'idx', 'hello', 104 'SUMMARIZE', 'FIELDS', 1, 'body') 105 106 env.cmd('FT.CREATE', 'idx2', 'ON', 'HASH', 'NOHL', 'SCHEMA', 'body', 'TEXT') 107 waitForIndex(env, 'idx') 108 env.cmd('FT.ADD', 'idx2', 'doc2', 1.0, 'FIELDS', 'body', 'hello world') 109 with env.assertResponseError(): 110 res = env.cmd('FT.SEARCH', 'idx2', 'hello', 111 'SUMMARIZE', 'FIELDS', 1, 'body') 112 113def testSummarizationNoSave(env): 114 env.skip() 115 env.cmd('FT.CREATE', 'idx', 'ON', 'HASH', 'SCHEMA', 'body', 'TEXT') 116 waitForIndex(env, 'idx') 117 env.cmd('FT.ADD', 'idx', 'doc', 1.0, 'NOSAVE', 118 'fields', 'body', 'hello world') 119 res = env.cmd('FT.SEARCH', 'idx', 'hello', 120 'SUMMARIZE', 'RETURN', 1, 'body') 121 # print res 122 env.assertEqual([1L, 'doc', []], res) 123 124def testSummarizationMeta(env): 125 env.cmd('ft.create', 'idx', 'ON', 'HASH', 'schema', 'foo', 'text', 'bar', 'text', 'baz', 'text') 126 waitForIndex(env, 'idx') 127 env.cmd('ft.add', 'idx', 'doc1', 1.0, 'fields', 'foo', 128 'pill', 'bar', 'pillow', 'baz', 'piller') 129 130 # Now, return the fields: 131 res = env.cmd('ft.search', 'idx', 'pill pillow piller', 132 'RETURN', 1, 'baz', 'SUMMARIZE', 'FIELDS', 2, 'foo', 'bar') 133 env.assertEqual(1, res[0]) 134 result = res[2] 135 names = [x[0] for x in grouper(result, 2)] 136 137 # RETURN restricts the number of fields 138 env.assertEqual(set(('baz',)), set(names)) 139 140 res = env.cmd('ft.search', 'idx', 'pill pillow piller', 141 'RETURN', 3, 'foo', 'bar', 'baz', 'SUMMARIZE') 142 env.assertEqual([1L, 'doc1', ['foo', 'pill... ', 'bar', 143 'pillow... ', 'baz', 'piller... ']], res) 144 145 146def testOverflow1(env): 147 #"FT.CREATE" "netflix" "SCHEMA" "title" "TEXT" "WEIGHT" "1" "rating" "TEXT" "WEIGHT" "1" "level" "TEXT" "WEIGHT" "1" "description" "TEXT" "WEIGHT" "1" "year" "NUMERIC" "uscore" "NUMERIC" "usize" "NUMERIC" 148 #FT.ADD" "netflix" "15ad80086ccc7f" "1" "FIELDS" "title" "The Vampire Diaries" "rating" "TV-14" "level" "Parents strongly cautioned. May be unsuitable for children ages 14 and under." "description" "90" "year" "2017" "uscore" "91" "usize" "80" 149 env.cmd('FT.CREATE', 'netflix', 'ON', 'HASH', 150 'SCHEMA', 'title', 'TEXT', 'rating', 'TEXT', 'leve', 'TEXT', 'description', 151 'TEXT', 'year', 'NUMERIC', 'uscore', 'NUMERIC', 'usize', 'NUMERIC') 152 waitForIndex(env, 'netflix') 153 env.cmd('FT.ADD', "netflix", "15ad80086ccc7f", "1.0", "FIELDS", "title", "The Vampire Diaries", "rating", "TV-14", "level", 154 "Parents strongly cautioned. May be unsuitable for children ages 14 and under.", 155 "description", "90", "year", "2017", "uscore", "91", "usize", "80") 156 res = env.cmd('ft.search', 'netflix', 'vampire', 'highlight') 157 env.assertEqual(1L, res[0]) 158 env.assertEqual('15ad80086ccc7f', res[1]) 159 for term in ['title', 'The <b>Vampire</b> Diaries', 'rating', 'TV-14', 'level', 160 'Parents strongly cautioned. May be unsuitable for children ages 14 and under.', 161 'description', '90', 'year', '2017', 'uscore', '91', 'usize', '80']: 162 env.assertIn(term, res[2]) 163 164def testIssue364(env): 165 # FT.CREATE testset "SCHEMA" "permit_timestamp" "NUMERIC" "SORTABLE" "job_category" "TEXT" "NOSTEM" "address" "TEXT" "NOSTEM" "neighbourhood" "TAG" "SORTABLE" "description" "TEXT" "building_type" "TEXT" "WEIGHT" "20" "NOSTEM" "SORTABLE" "work_type" "TEXT" "NOSTEM" "SORTABLE" "floor_area" "NUMERIC" "SORTABLE" "construction_value" "NUMERIC" "SORTABLE" "zoning" "TAG" "units_added" "NUMERIC" "SORTABLE" "location" "GEO" 166 # ft.add testset 109056573-002 1 fields building_type "Retail and Shops" description "To change the use from a Restaurant to a Personal Service Shop (Great Clips)" 167 # FT.SEARCH testset retail RETURN 1 description SUMMARIZE LIMIT 0 1 168 env.cmd('ft.create', 'idx', 'ON', 'HASH', 'SCHEMA', 'building_type', 'TEXT', 'description', 'TEXT') 169 waitForIndex(env, 'idx') 170 env.cmd('ft.add', 'idx', 'doc1', '1.0', 'FIELDS', 171 'building_type', 'Retail and Shops', 172 'description', 'To change the use from a Restaurant to a Personal Service Shop (Great Clips)') 173 174 env.cmd('ft.add', 'idx', 'doc2', '1.0', 'FIELDS', 175 'building_type', 'Retail and Shops', 176 'description', 'To change the use from a Restaurant to a Personal Service Shop (Great Clips) at the end') 177 178 ret = env.cmd('FT.SEARCH', 'idx', 'retail', 'RETURN', 1, 'description', 'SUMMARIZE') 179 expected = [2L, 'doc2', ['description', 'To change the use from a Restaurant to a Personal Service Shop (Great Clips) at the'], 'doc1', ['description', 'To change the use from a Restaurant to a Personal Service Shop (Great Clips)']] 180 env.assertEqual(toSortedFlatList(expected), toSortedFlatList(ret)) 181 182def grouper(iterable, n, fillvalue=None): 183 "Collect data into fixed-length chunks or blocks" 184 from itertools import izip_longest 185 # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx 186 args = [iter(iterable)] * n 187 return izip_longest(fillvalue=fillvalue, *args) 188 189def testFailedHighlight(env): 190 #test NOINDEX 191 env.cmd('ft.create', 'idx', 'ON', 'HASH', 'PREFIX', 1, 'doc1', 192 'SCHEMA', 'f1', 'TEXT', 'f2', 'TEXT', 'f3', 'TEXT', 'NOINDEX') 193 waitForIndex(env, 'idx') 194 env.cmd('ft.add', 'idx', 'doc1', '1.0', 'FIELDS', 'f1', 'foo foo foo', 'f2', 'bar bar bar', 'f3', 'baz baz baz') 195 env.assertEqual(toSortedFlatList([1L, 'doc1', ['f1', 'foo foo foo', 'f2', 'bar bar bar', 'f3', 'baz baz baz']]), 196 toSortedFlatList(env.cmd('ft.search idx foo'))) 197 env.assertEqual(toSortedFlatList([1L, 'doc1', ['f1', '<b>foo</b> <b>foo</b> <b>foo</b>', 'f2', 'bar bar bar', 'f3', 'baz baz baz']]), 198 toSortedFlatList(env.cmd('ft.search', 'idx', 'foo', 'highlight', 'fields', '1', 'f1'))) 199 env.assertEqual(toSortedFlatList([1L, 'doc1', ['f2', 'bar bar bar', 'f1', 'foo foo foo', 'f3', 'baz baz baz']]), 200 toSortedFlatList(env.cmd('ft.search idx foo highlight fields 1 f2'))) 201 env.assertEqual(toSortedFlatList([1L, 'doc1', ['f3', 'baz baz baz', 'f1', 'foo foo foo', 'f2', 'bar bar bar']]), 202 toSortedFlatList(env.cmd('ft.search idx foo highlight fields 1 f3'))) 203 204 #test empty string 205 env.cmd('ft.create', 'idx2', 'ON', 'HASH', 'PREFIX', 1, 'doc2', 206 'SCHEMA', 'f1', 'TEXT', 'f2', 'TEXT', 'f3', 'TEXT') 207 waitForIndex(env, 'idx') 208 env.cmd('ft.add', 'idx2', 'doc2', '1.0', 'FIELDS', 'f1', 'foo foo foo', 'f2', '', 'f3', 'baz baz baz') 209 env.assertEqual(toSortedFlatList([1L, 'doc2', ['f1', '<b>foo</b> <b>foo</b> <b>foo</b>', 'f2', '', 'f3', 'baz baz baz']]), 210 toSortedFlatList(env.cmd('ft.search idx2 foo highlight fields 1 f1'))) 211 env.assertEqual(toSortedFlatList([1L, 'doc2', ['f2', '', 'f1', 'foo foo foo', 'f3', 'baz baz baz']]), 212 toSortedFlatList(env.cmd('ft.search idx2 foo highlight fields 1 f2'))) 213 env.assertEqual(toSortedFlatList([1L, 'doc2', ['f3', 'baz baz baz', 'f1', 'foo foo foo', 'f2', '']]), 214 toSortedFlatList(env.cmd('ft.search idx2 foo highlight fields 1 f3'))) 215 216 #test stop word list 217 env.cmd('ft.create', 'idx3', 'ON', 'HASH', 'PREFIX', 1, 'doc3', 218 'SCHEMA', 'f1', 'TEXT', 'f2', 'TEXT', 'f3', 'TEXT') 219 waitForIndex(env, 'idx') 220 env.cmd('ft.add', 'idx3', 'doc3', '1.0', 'FIELDS', 'f1', 'foo foo foo', 'f2', 'not a', 'f3', 'baz baz baz') 221 env.assertEqual(toSortedFlatList([1L, 'doc3', ['f1', '<b>foo</b> <b>foo</b> <b>foo</b>', 'f2', 'not a', 'f3', 'baz baz baz']]), 222 toSortedFlatList(env.cmd('ft.search idx3 foo highlight fields 1 f1'))) 223 env.assertEqual(toSortedFlatList([1L, 'doc3', ['f2', 'not a', 'f1', 'foo foo foo', 'f3', 'baz baz baz']]), 224 toSortedFlatList(env.cmd('ft.search idx3 foo highlight fields 1 f2'))) 225 env.assertEqual(toSortedFlatList([1L, 'doc3', ['f3', 'baz baz baz', 'f1', 'foo foo foo', 'f2', 'not a']]), 226 toSortedFlatList(env.cmd('ft.search idx3 foo highlight fields 1 f3'))) 227