1import os.path
2from includes import *
3from common import waitForIndex,toSortedFlatList
4
5
6GENTEXT = os.path.dirname(os.path.abspath(__file__)) + '/../ctests/genesis.txt'
7
8
9def setupGenesis(env):
10    txt = open(GENTEXT, 'r').read()
11    env.expect('ft.create', 'idx', 'ON', 'HASH', 'schema', 'txt', 'text').ok()
12    waitForIndex(env, 'idx')
13    env.cmd('ft.add', 'idx', 'gen1', 1.0, 'fields', 'txt', txt)
14
15def testSummarization(env):
16    # Load the file
17    setupGenesis(env)
18    res = env.cmd('FT.SEARCH', 'idx', 'abraham isaac jacob',
19                   'SUMMARIZE', 'FIELDS', 1, 'txt', 'LEN', 20,
20                   'HIGHLIGHT', 'FIELDS', 1, 'txt', 'TAGS', '<b>', '</b>')
21    waitForIndex(env, 'idx')
22    env.assertEqual(1, res[0])
23    # print res
24    res_txt = res[2][1]
25    # print res_txt
26
27    env.assertTrue("<b>Abraham</b>" in res_txt)
28    env.assertTrue("<b>Isaac</b>" in res_txt)
29    env.assertTrue("<b>Jacob</b>" in res_txt)
30
31    res = env.cmd('FT.SEARCH', 'idx', 'abraham isaac jacob', 'HIGHLIGHT', 'fields', 1, 'txt', 'TAGS', '<i>', '</i>')
32    res_txt = res[2][1]
33    env.assertGreaterEqual(len(res_txt), 160000)
34
35    res = env.cmd('FT.SEARCH', 'idx', 'abraham isaac jacob', 'SUMMARIZE', 'FIELDS', 1, 'txt', 'FRAGS', 10000)
36    # print res
37
38    res_list = res[2][1]
39    # env.assertIsInstance(res_list, list)
40
41    # Search with custom separator
42    res = env.cmd('FT.SEARCH', 'idx', 'isaac',
43                  'SUMMARIZE', 'FIELDS', 1, 'txt',
44                  'SEPARATOR', '\r\n',
45                  'FRAGS', 4, 'LEN', 3)
46    env.assertEqual([1L, 'gen1', [
47                     'txt', 'name Isaac: and\r\nwith Isaac,\r\nIsaac. {21:4} And Abraham circumcised his son Isaac\r\nson Isaac was\r\n']], res)
48
49    # Attempt a query which doesn't have a corresponding matched term
50    res = env.cmd('FT.SEARCH', 'idx', '-blah', 'SUMMARIZE', 'LEN', 3)
51    env.assertEqual(
52        [1L, 'gen1', ['txt', ' The First Book of Moses, called Genesis {1:1} In']], res)
53
54    # Try the same, but attempting to highlight
55    res = env.cmd('FT.SEARCH', 'idx', '-blah', 'HIGHLIGHT')
56    env.assertEqual(214894, len(res[2][1]))
57
58def testPrefixExpansion(env):
59    # Search with prefix
60    setupGenesis(env)
61    res = env.cmd('FT.SEARCH', 'idx', 'begi*',
62                  'HIGHLIGHT', 'FIELDS', 1, 'txt', 'TAGS', '<b>', '</b>',
63                  'SUMMARIZE', 'FIELDS', 1, 'txt', 'LEN', 20)
64
65    # Prefix expansion uses "early exit" strategy, so the term highlighted won't necessarily be the
66    # best term
67    possibilities = [[1L, 'gen1', ['txt', 'is] one, and they have all one language; and this they <b>begin</b> to do: and now nothing will be restrained from them, which... ']],
68                     [1L, 'gen1', ['txt', 'First Book of Moses, called Genesis {1:1} In the <b>beginning</b> God created the heaven and the earth. {1:2} And the earth... the mighty hunter before the LORD. {10:10} And the <b>beginning</b> of his kingdom was Babel, and Erech, and Accad, and Calneh... is] one, and they have all one language; and this they <b>begin</b> to do: and now nothing will be restrained from them, which... ']],
69                     [1L, 'gen1', ['txt', '49:3} Reuben, thou [art] my firstborn, my might, and the <b>beginning of</b> my strength, the excellency of dignity, and the excellency... ']]]
70    env.assertIn(res, possibilities)
71
72def testSummarizationMultiField(env):
73    p1 = "Redis is an open-source in-memory database project implementing a networked, in-memory key-value store with optional durability. Redis supports different kinds of abstract data structures, such as strings, lists, maps, sets, sorted sets, hyperloglogs, bitmaps and spatial indexes. The project is mainly developed by Salvatore Sanfilippo and is currently sponsored by Redis Labs.[4] Redis Labs creates and maintains the official Redis Enterprise Pack."
74    p2 = "Redis typically holds the whole dataset in memory. Versions up to 2.4 could be configured to use what they refer to as virtual memory[19] in which some of the dataset is stored on disk, but this feature is deprecated. Persistence is now achieved in two different ways: one is called snapshotting, and is a semi-persistent durability mode where the dataset is asynchronously transferred from memory to disk from time to time, written in RDB dump format. Since version 1.1 the safer alternative is AOF, an append-only file (a journal) that is written as operations modifying the dataset in memory are processed. Redis is able to rewrite the append-only file in the background in order to avoid an indefinite growth of the journal."
75
76    env.cmd('FT.CREATE', 'idx', 'ON', 'HASH', 'SCHEMA', 'txt1', 'TEXT', 'txt2', 'TEXT')
77    waitForIndex(env, 'idx')
78    env.cmd('FT.ADD', 'idx', 'redis', 1.0,
79             'FIELDS', 'txt1', p1, 'txt2', p2)
80
81    # Now perform the multi-field search
82    env.cmd('FT.SEARCH', 'idx', 'memory persistence salvatore',
83             'HIGHLIGHT', 'TAGS', '<b>', '</b>',
84             'SUMMARIZE', 'LEN', 5,
85             'RETURN', 2, 'txt1', 'txt2')
86
87    # Now perform the multi-field search
88    res = env.cmd('FT.SEARCH', 'idx', 'memory persistence salvatore',
89                   'SUMMARIZE', 'FIELDS', 2, 'txt1', 'txt2', 'LEN', 5)
90
91    env.assertEqual(1L, res[0])
92    env.assertEqual('redis', res[1])
93    for term in ['txt1', 'memory database project implementing a networked, in-memory ... by Salvatore Sanfilippo... ', 'txt2',
94                 'dataset in memory. Versions... as virtual memory[19] in... persistent durability mode where the dataset is asynchronously transferred from memory... ']:
95        env.assertIn(term, res[2])
96
97
98def testSummarizationDisabled(env):
99    env.cmd('FT.CREATE', 'idx', 'ON', 'HASH', 'NOOFFSETS', 'SCHEMA', 'body', 'TEXT')
100    waitForIndex(env, 'idx')
101    env.cmd('FT.ADD', 'idx', 'doc', 1.0, 'FIELDS', 'body', 'hello world')
102    with env.assertResponseError():
103        res = env.cmd('FT.SEARCH', 'idx', 'hello',
104                       'SUMMARIZE', 'FIELDS', 1, 'body')
105
106    env.cmd('FT.CREATE', 'idx2', 'ON', 'HASH', 'NOHL', 'SCHEMA', 'body', 'TEXT')
107    waitForIndex(env, 'idx')
108    env.cmd('FT.ADD', 'idx2', 'doc2', 1.0, 'FIELDS', 'body', 'hello world')
109    with env.assertResponseError():
110        res = env.cmd('FT.SEARCH', 'idx2', 'hello',
111                       'SUMMARIZE', 'FIELDS', 1, 'body')
112
113def testSummarizationNoSave(env):
114    env.skip()
115    env.cmd('FT.CREATE', 'idx', 'ON', 'HASH', 'SCHEMA', 'body', 'TEXT')
116    waitForIndex(env, 'idx')
117    env.cmd('FT.ADD', 'idx', 'doc', 1.0, 'NOSAVE',
118             'fields', 'body', 'hello world')
119    res = env.cmd('FT.SEARCH', 'idx', 'hello',
120                   'SUMMARIZE', 'RETURN', 1, 'body')
121    # print res
122    env.assertEqual([1L, 'doc', []], res)
123
124def testSummarizationMeta(env):
125    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'schema', 'foo', 'text', 'bar', 'text', 'baz', 'text')
126    waitForIndex(env, 'idx')
127    env.cmd('ft.add', 'idx', 'doc1', 1.0, 'fields', 'foo',
128             'pill', 'bar', 'pillow', 'baz', 'piller')
129
130    # Now, return the fields:
131    res = env.cmd('ft.search', 'idx', 'pill pillow piller',
132                   'RETURN', 1, 'baz', 'SUMMARIZE', 'FIELDS', 2, 'foo', 'bar')
133    env.assertEqual(1, res[0])
134    result = res[2]
135    names = [x[0] for x in grouper(result, 2)]
136
137    # RETURN restricts the number of fields
138    env.assertEqual(set(('baz',)), set(names))
139
140    res = env.cmd('ft.search', 'idx', 'pill pillow piller',
141                   'RETURN', 3, 'foo', 'bar', 'baz', 'SUMMARIZE')
142    env.assertEqual([1L, 'doc1', ['foo', 'pill... ', 'bar',
143                                   'pillow... ', 'baz', 'piller... ']], res)
144
145
146def testOverflow1(env):
147    #"FT.CREATE" "netflix" "SCHEMA" "title" "TEXT" "WEIGHT" "1" "rating" "TEXT" "WEIGHT" "1" "level" "TEXT" "WEIGHT" "1" "description" "TEXT" "WEIGHT" "1" "year" "NUMERIC" "uscore" "NUMERIC" "usize" "NUMERIC"
148    #FT.ADD" "netflix" "15ad80086ccc7f" "1" "FIELDS" "title" "The Vampire Diaries" "rating" "TV-14" "level" "Parents strongly cautioned. May be unsuitable for children ages 14 and under." "description" "90" "year" "2017" "uscore" "91" "usize" "80"
149    env.cmd('FT.CREATE', 'netflix', 'ON', 'HASH',
150            'SCHEMA', 'title', 'TEXT', 'rating', 'TEXT', 'leve', 'TEXT', 'description',
151            'TEXT', 'year', 'NUMERIC', 'uscore', 'NUMERIC', 'usize', 'NUMERIC')
152    waitForIndex(env, 'netflix')
153    env.cmd('FT.ADD', "netflix", "15ad80086ccc7f", "1.0", "FIELDS", "title", "The Vampire Diaries", "rating", "TV-14", "level",
154            "Parents strongly cautioned. May be unsuitable for children ages 14 and under.",
155            "description", "90", "year", "2017", "uscore", "91", "usize", "80")
156    res = env.cmd('ft.search', 'netflix', 'vampire', 'highlight')
157    env.assertEqual(1L, res[0])
158    env.assertEqual('15ad80086ccc7f', res[1])
159    for term in ['title', 'The <b>Vampire</b> Diaries', 'rating', 'TV-14', 'level',
160                 'Parents strongly cautioned. May be unsuitable for children ages 14 and under.',
161                 'description', '90', 'year', '2017', 'uscore', '91', 'usize', '80']:
162        env.assertIn(term, res[2])
163
164def testIssue364(env):
165    # FT.CREATE testset "SCHEMA" "permit_timestamp" "NUMERIC" "SORTABLE" "job_category" "TEXT" "NOSTEM" "address" "TEXT" "NOSTEM"  "neighbourhood" "TAG" "SORTABLE" "description" "TEXT"  "building_type" "TEXT" "WEIGHT" "20" "NOSTEM" "SORTABLE"     "work_type" "TEXT" "NOSTEM" "SORTABLE"     "floor_area" "NUMERIC" "SORTABLE"     "construction_value" "NUMERIC" "SORTABLE"     "zoning" "TAG"     "units_added" "NUMERIC" "SORTABLE"     "location" "GEO"
166    # ft.add testset 109056573-002 1 fields building_type "Retail and Shops" description "To change the use from a Restaurant to a Personal Service Shop (Great Clips)"
167    # FT.SEARCH testset retail RETURN 1 description SUMMARIZE LIMIT 0 1
168    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'SCHEMA', 'building_type', 'TEXT', 'description', 'TEXT')
169    waitForIndex(env, 'idx')
170    env.cmd('ft.add', 'idx', 'doc1', '1.0', 'FIELDS',
171            'building_type', 'Retail and Shops',
172            'description', 'To change the use from a Restaurant to a Personal Service Shop (Great Clips)')
173
174    env.cmd('ft.add', 'idx', 'doc2', '1.0', 'FIELDS',
175            'building_type', 'Retail and Shops',
176            'description', 'To change the use from a Restaurant to a Personal Service Shop (Great Clips) at the end')
177
178    ret = env.cmd('FT.SEARCH', 'idx', 'retail', 'RETURN', 1, 'description', 'SUMMARIZE')
179    expected = [2L, 'doc2', ['description', 'To change the use from a Restaurant to a Personal Service Shop (Great Clips) at the'], 'doc1', ['description', 'To change the use from a Restaurant to a Personal Service Shop (Great Clips)']]
180    env.assertEqual(toSortedFlatList(expected), toSortedFlatList(ret))
181
182def grouper(iterable, n, fillvalue=None):
183    "Collect data into fixed-length chunks or blocks"
184    from itertools import izip_longest
185    # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
186    args = [iter(iterable)] * n
187    return izip_longest(fillvalue=fillvalue, *args)
188
189def testFailedHighlight(env):
190    #test NOINDEX
191    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'PREFIX', 1, 'doc1',
192            'SCHEMA', 'f1', 'TEXT', 'f2', 'TEXT', 'f3', 'TEXT', 'NOINDEX')
193    waitForIndex(env, 'idx')
194    env.cmd('ft.add', 'idx', 'doc1', '1.0', 'FIELDS', 'f1', 'foo foo foo', 'f2', 'bar bar bar', 'f3', 'baz baz baz')
195    env.assertEqual(toSortedFlatList([1L, 'doc1', ['f1', 'foo foo foo', 'f2', 'bar bar bar', 'f3', 'baz baz baz']]),
196        toSortedFlatList(env.cmd('ft.search idx foo')))
197    env.assertEqual(toSortedFlatList([1L, 'doc1', ['f1', '<b>foo</b> <b>foo</b> <b>foo</b>', 'f2', 'bar bar bar', 'f3', 'baz baz baz']]),
198        toSortedFlatList(env.cmd('ft.search', 'idx', 'foo', 'highlight', 'fields', '1', 'f1')))
199    env.assertEqual(toSortedFlatList([1L, 'doc1', ['f2', 'bar bar bar', 'f1', 'foo foo foo', 'f3', 'baz baz baz']]),
200        toSortedFlatList(env.cmd('ft.search idx foo highlight fields 1 f2')))
201    env.assertEqual(toSortedFlatList([1L, 'doc1', ['f3', 'baz baz baz', 'f1', 'foo foo foo', 'f2', 'bar bar bar']]),
202        toSortedFlatList(env.cmd('ft.search idx foo highlight fields 1 f3')))
203
204    #test empty string
205    env.cmd('ft.create', 'idx2', 'ON', 'HASH', 'PREFIX', 1, 'doc2',
206            'SCHEMA', 'f1', 'TEXT', 'f2', 'TEXT', 'f3', 'TEXT')
207    waitForIndex(env, 'idx')
208    env.cmd('ft.add', 'idx2', 'doc2', '1.0', 'FIELDS', 'f1', 'foo foo foo', 'f2', '', 'f3', 'baz baz baz')
209    env.assertEqual(toSortedFlatList([1L, 'doc2', ['f1', '<b>foo</b> <b>foo</b> <b>foo</b>', 'f2', '', 'f3', 'baz baz baz']]),
210        toSortedFlatList(env.cmd('ft.search idx2 foo highlight fields 1 f1')))
211    env.assertEqual(toSortedFlatList([1L, 'doc2', ['f2', '', 'f1', 'foo foo foo', 'f3', 'baz baz baz']]),
212        toSortedFlatList(env.cmd('ft.search idx2 foo highlight fields 1 f2')))
213    env.assertEqual(toSortedFlatList([1L, 'doc2', ['f3', 'baz baz baz', 'f1', 'foo foo foo', 'f2', '']]),
214        toSortedFlatList(env.cmd('ft.search idx2 foo highlight fields 1 f3')))
215
216    #test stop word list
217    env.cmd('ft.create', 'idx3', 'ON', 'HASH', 'PREFIX', 1, 'doc3',
218            'SCHEMA', 'f1', 'TEXT', 'f2', 'TEXT', 'f3', 'TEXT')
219    waitForIndex(env, 'idx')
220    env.cmd('ft.add', 'idx3', 'doc3', '1.0', 'FIELDS', 'f1', 'foo foo foo', 'f2', 'not a', 'f3', 'baz baz baz')
221    env.assertEqual(toSortedFlatList([1L, 'doc3', ['f1', '<b>foo</b> <b>foo</b> <b>foo</b>', 'f2', 'not a', 'f3', 'baz baz baz']]),
222        toSortedFlatList(env.cmd('ft.search idx3 foo highlight fields 1 f1')))
223    env.assertEqual(toSortedFlatList([1L, 'doc3', ['f2', 'not a', 'f1', 'foo foo foo', 'f3', 'baz baz baz']]),
224        toSortedFlatList(env.cmd('ft.search idx3 foo highlight fields 1 f2')))
225    env.assertEqual(toSortedFlatList([1L, 'doc3', ['f3', 'baz baz baz', 'f1', 'foo foo foo', 'f2', 'not a']]),
226        toSortedFlatList(env.cmd('ft.search idx3 foo highlight fields 1 f3')))
227