1import math 2from includes import * 3from common import getConnectionByEnv, waitForIndex, server_version_at_least 4 5 6def testHammingScorer(env): 7 env.expect('ft.create', 'idx', 'ON', 'HASH', 'SCORE_FIELD', '__score', 'schema', 'title', 'text').ok() 8 waitForIndex(env, 'idx') 9 10 for i in range(16): 11 env.expect('ft.add', 'idx', 'doc%d' % i, 1, 12 'payload', ('%x' % i) * 8, 13 'fields', 'title', 'hello world').ok() 14 for i in range(16): 15 res = env.cmd('ft.search', 'idx', '*', 'PAYLOAD', ('%x' % i) * 8, 16 'SCORER', 'HAMMING', 'WITHSCORES', 'WITHPAYLOADS') 17 env.assertEqual(res[1], 'doc%d' % i) 18 env.assertEqual(res[2], '1') 19 # test with payload of different length 20 res = env.cmd('ft.search', 'idx', '*', 'PAYLOAD', ('%x' % i) * 7, 21 'SCORER', 'HAMMING', 'WITHSCORES', 'WITHPAYLOADS') 22 env.assertEqual(res[2], '0') 23 # test with no payload 24 res = env.cmd('ft.search', 'idx', '*', 25 'SCORER', 'HAMMING', 'WITHSCORES', 'WITHPAYLOADS') 26 env.assertEqual(res[2], '0') 27 28def testScoreTagIndex(env): 29 env.expect('ft.create', 'idx', 'ON', 'HASH', 'SCORE_FIELD', '__score', 30 'schema', 'title', 'text', 'weight', 10, 'body', 'text').ok() 31 waitForIndex(env, 'idx') 32 N = 25 33 for n in range(N): 34 35 sc = math.sqrt(float(N - n + 10) / float(N + 10)) 36 # print n, sc 37 38 env.expect('ft.add', 'idx', 'doc%d' % n, sc, 'fields', 39 'title', 'hello world ' * n, 'body', 'lorem ipsum ' * n).ok() 40 results_single = [ 41 [24L, 'doc1', 1.97, 'doc2', 1.94, 'doc3', 42 1.91, 'doc4', 1.88, 'doc5', 1.85], 43 [24L, 'doc1', 0.9, 'doc2', 0.59, 'doc3', 44 0.43, 'doc4', 0.34, 'doc5', 0.28], 45 [24L, 'doc4', 1.75, 'doc5', 1.75, 'doc3', 46 1.74, 'doc6', 1.74, 'doc7', 1.72], 47 [24L, 'doc24', 480.0, 'doc23', 460.0, 'doc22', 48 440.0, 'doc21', 420.0, 'doc20', 400.0], 49 [24L, 'doc1', 0.99, 'doc2', 0.97, 'doc3', 50 0.96, 'doc4', 0.94, 'doc5', 0.93] 51 ] 52 results_cluster = [ 53 [24L, 'doc1', 1.97, 'doc2', 1.94, 'doc3', 54 1.91, 'doc4', 1.88, 'doc5', 1.85], 55 [24L, 'doc1', 0.9, 'doc2', 0.59, 'doc3', 56 0.43, 'doc4', 0.34, 'doc5', 0.28], 57 [24L, 'doc4', 1.76, 'doc5', 1.75, 'doc3', 58 1.74, 'doc6', 1.73, 'doc7', 1.72], 59 [24L, 'doc24', 480.0, 'doc23', 460.0, 'doc22', 60 440.0, 'doc21', 420.0, 'doc20', 400.0], 61 [24L, 'doc1', 0.99, 'doc2', 0.97, 'doc3', 62 0.96, 'doc4', 0.94, 'doc5', 0.93], 63 ] 64 65 scorers = ['TFIDF', 'TFIDF.DOCNORM', 'BM25', 'DISMAX', 'DOCSCORE'] 66 expected_results = results_cluster if env.is_cluster() else results_single 67 68 for _ in env.reloading_iterator(): 69 waitForIndex(env, 'idx') 70 for i, scorer in enumerate(scorers): 71 res = env.cmd('ft.search', 'idx', 'hello world', 'scorer', 72 scorer, 'nocontent', 'withscores', 'limit', 0, 5) 73 res = [round(float(x), 2) if j > 0 and (j - 1) % 74 2 == 1 else x for j, x in enumerate(res)] 75 #print res 76 env.assertListEqual(expected_results[i], res) 77 78def testDocscoreScorerExplanation(env): 79 env.expect('ft.create', 'idx', 'ON', 'HASH', 'SCORE_FIELD', '__score', 80 'schema', 'title', 'text', 'weight', 10, 'body', 'text').ok() 81 waitForIndex(env, 'idx') 82 env.expect('ft.add', 'idx', 'doc1', 0.5, 'fields', 'title', 'hello world',' body', 'lorem ist ipsum').ok() 83 env.expect('ft.add', 'idx', 'doc2', 1, 'fields', 'title', 'hello another world',' body', 'lorem ist ipsum lorem lorem').ok() 84 env.expect('ft.add', 'idx', 'doc3', 0.1, 'fields', 'title', 'hello yet another world',' body', 'lorem ist ipsum lorem lorem').ok() 85 res = env.cmd('ft.search', 'idx', 'hello world', 'withscores', 'EXPLAINSCORE', 'scorer', 'DOCSCORE') 86 env.assertEqual(res[0], 3L) 87 env.assertEqual(res[2][1], "Document's score is 1.00") 88 env.assertEqual(res[5][1], "Document's score is 0.50") 89 env.assertEqual(res[8][1], "Document's score is 0.10") 90 91def testTFIDFScorerExplanation(env): 92 conn = getConnectionByEnv(env) 93 env.expect('ft.create', 'idx', 'ON', 'HASH', 'SCORE_FIELD', '__score', 94 'schema', 'title', 'text', 'weight', 10, 'body', 'text').ok() 95 waitForIndex(env, 'idx') 96 97 conn.execute_command('ft.add', 'idx', 'doc1', 0.5, 'fields', 'title', 'hello world',' body', 'lorem ist ipsum') 98 conn.execute_command('ft.add', 'idx', 'doc2', 1, 'fields', 'title', 'hello another world',' body', 'lorem ist ipsum lorem lorem') 99 conn.execute_command('ft.add', 'idx', 'doc3', 0.1, 'fields', 'title', 'hello yet another world',' body', 'lorem ist ipsum lorem lorem') 100 101 res = env.cmd('ft.search', 'idx', 'hello world', 'withscores', 'EXPLAINSCORE') 102 env.assertEqual(res[0], 3L) 103 env.assertEqual(res[2][1], ['Final TFIDF : words TFIDF 20.00 * document score 0.50 / norm 10 / slop 1', 104 [['(Weight 1.00 * total children TFIDF 20.00)', 105 ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)', 106 '(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)']]]]) 107 env.assertEqual(res[5][1],['Final TFIDF : words TFIDF 20.00 * document score 1.00 / norm 10 / slop 2', 108 [['(Weight 1.00 * total children TFIDF 20.00)', 109 ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)', 110 '(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)']]]]) 111 env.assertEqual(res[8][1], ['Final TFIDF : words TFIDF 20.00 * document score 0.10 / norm 10 / slop 3', 112 [['(Weight 1.00 * total children TFIDF 20.00)', 113 ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)', 114 '(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)']]]]) 115 116 # test depth limit 117 118 # TODO: re-enable this 119 env.skipOnCluster() 120 121 res = env.cmd('ft.search', 'idx', 'hello(world(world))', 'withscores', 'EXPLAINSCORE', 'limit', 0, 1) 122 env.assertEqual(res[2][1], ['Final TFIDF : words TFIDF 30.00 * document score 0.50 / norm 10 / slop 1', 123 [['(Weight 1.00 * total children TFIDF 30.00)', 124 ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)', 125 ['(Weight 1.00 * total children TFIDF 20.00)', 126 ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)', 127 '(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)']]]]]]) 128 129 res1 = ['Final TFIDF : words TFIDF 40.00 * document score 1.00 / norm 10 / slop 1', 130 [['(Weight 1.00 * total children TFIDF 40.00)', 131 ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)', 132 ['(Weight 1.00 * total children TFIDF 30.00)', 133 ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)', 134 ['(Weight 1.00 * total children TFIDF 20.00)', 135 ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)', 136 '(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)']]]]]]]] 137 res2 = ['Final TFIDF : words TFIDF 40.00 * document score 1.00 / norm 10 / slop 1', 138 [['(Weight 1.00 * total children TFIDF 40.00)', 139 ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)', 140 ['(Weight 1.00 * total children TFIDF 30.00)', 141 ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)', 142 '(Weight 1.00 * total children TFIDF 20.00)']]]]]] 143 144 145 actual_res = env.cmd('ft.search', 'idx', 'hello(world(world(hello)))', 'withscores', 'EXPLAINSCORE', 'limit', 0, 1) 146 # on older versions we trim the reply to remain under the 7-layer limitation. 147 res = res1 if server_version_at_least(env, "6.2.0") else res2 148 env.assertEqual(actual_res[2][1], res) 149 150def testBM25ScorerExplanation(env): 151 env.expect('ft.create', 'idx', 'ON', 'HASH', 'SCORE_FIELD', '__score', 152 'schema', 'title', 'text', 'weight', 10, 'body', 'text').ok() 153 waitForIndex(env, 'idx') 154 env.expect('ft.add', 'idx', 'doc1', 0.5, 'fields', 'title', 'hello world',' body', 'lorem ist ipsum').ok() 155 env.expect('ft.add', 'idx', 'doc2', 1, 'fields', 'title', 'hello another world',' body', 'lorem ist ipsum lorem lorem').ok() 156 env.expect('ft.add', 'idx', 'doc3', 0.1, 'fields', 'title', 'hello yet another world',' body', 'lorem ist ipsum lorem lorem').ok() 157 res = env.cmd('ft.search', 'idx', 'hello world', 'withscores', 'EXPLAINSCORE', 'scorer', 'BM25') 158 env.assertEqual(res[0], 3L) 159 if env.isCluster(): 160 env.assertContains('Final BM25', res[2][1][0]) 161 env.assertContains('Final BM25', res[5][1][0]) 162 env.assertContains('Final BM25', res[8][1][0]) 163 else: 164 env.assertEqual(res[2][1], ['Final BM25 : words BM25 1.56 * document score 0.50 / slop 1', 165 [['(Weight 1.00 * children BM25 1.56)', 166 ['(0.78 = IDF 1.00 * F 10 / (F 10 + k1 1.2 * (1 - b 0.5 + b 0.5 * Average Len 3.67)))', 167 '(0.78 = IDF 1.00 * F 10 / (F 10 + k1 1.2 * (1 - b 0.5 + b 0.5 * Average Len 3.67)))']]]]) 168 env.assertEqual(res[5][1], ['Final BM25 : words BM25 1.56 * document score 1.00 / slop 2', 169 [['(Weight 1.00 * children BM25 1.56)', 170 ['(0.78 = IDF 1.00 * F 10 / (F 10 + k1 1.2 * (1 - b 0.5 + b 0.5 * Average Len 3.67)))', 171 '(0.78 = IDF 1.00 * F 10 / (F 10 + k1 1.2 * (1 - b 0.5 + b 0.5 * Average Len 3.67)))']]]]) 172 env.assertEqual(res[8][1], ['Final BM25 : words BM25 1.56 * document score 0.10 / slop 3', 173 [['(Weight 1.00 * children BM25 1.56)', 174 ['(0.78 = IDF 1.00 * F 10 / (F 10 + k1 1.2 * (1 - b 0.5 + b 0.5 * Average Len 3.67)))', 175 '(0.78 = IDF 1.00 * F 10 / (F 10 + k1 1.2 * (1 - b 0.5 + b 0.5 * Average Len 3.67)))']]]]) 176 177 178def testDisMaxScorerExplanation(env): 179 env.expect('ft.create', 'idx', 'ON', 'HASH', 'SCORE_FIELD', '__score', 180 'schema', 'title', 'text', 'weight', 10, 'body', 'text').ok() 181 waitForIndex(env, 'idx') 182 env.expect('ft.add', 'idx', 'doc1', 0.5, 'fields', 'title', 'hello world',' body', 'lorem ist ipsum').ok() 183 env.expect('ft.add', 'idx', 'doc2', 1, 'fields', 'title', 'hello another world',' body', 'lorem ist ipsum lorem lorem').ok() 184 env.expect('ft.add', 'idx', 'doc3', 0.1, 'fields', 'title', 'hello yet another world',' body', 'lorem ist ipsum lorem lorem').ok() 185 res = env.cmd('ft.search', 'idx', 'hello world', 'withscores', 'EXPLAINSCORE', 'scorer', 'DISMAX') 186 env.assertEqual(res[0], 3L) 187 env.assertEqual(res[2][1], ['20.00 = Weight 1.00 * children DISMAX 20.00', 188 ['DISMAX 10.00 = Weight 1.00 * Frequency 10', 'DISMAX 10.00 = Weight 1.00 * Frequency 10']]) 189 env.assertEqual(res[5][1], ['20.00 = Weight 1.00 * children DISMAX 20.00', 190 ['DISMAX 10.00 = Weight 1.00 * Frequency 10', 'DISMAX 10.00 = Weight 1.00 * Frequency 10']]) 191 env.assertEqual(res[8][1], ['20.00 = Weight 1.00 * children DISMAX 20.00', 192 ['DISMAX 10.00 = Weight 1.00 * Frequency 10', 'DISMAX 10.00 = Weight 1.00 * Frequency 10']]) 193 194def testScoreReplace(env): 195 conn = getConnectionByEnv(env) 196 env.expect('ft.create idx ON HASH schema f text').ok() 197 waitForIndex(env, 'idx') 198 conn.execute_command('HSET', 'doc1', 'f', 'redisearch') 199 conn.execute_command('HSET', 'doc1', 'f', 'redisearch') 200 env.expect('FT.SEARCH idx redisearch withscores nocontent').equal([1L, 'doc1', '1']) 201 conn.execute_command('HSET', 'doc1', 'f', 'redisearch') 202 env.expect('FT.SEARCH idx redisearch withscores nocontent').equal([1L, 'doc1', '0']) 203 if not env.isCluster: 204 env.expect('ft.config set FORK_GC_CLEAN_THRESHOLD 0').ok() 205 env.expect('ft.debug GC_FORCEINVOKE idx').equal('DONE') 206 env.expect('FT.SEARCH idx redisearch withscores nocontent').equal([1L, 'doc1', '1']) 207 208def testScoreDecimal(env): 209 env.expect('ft.create idx ON HASH schema title text').ok() 210 waitForIndex(env, 'idx') 211 env.expect('ft.add idx doc1 0.01 fields title hello').ok() 212 res = env.cmd('ft.search idx hello withscores nocontent') 213 env.assertLess(float(res[2]), 1) 214 215def testScoreError(env): 216 env.skipOnCluster() 217 env.expect('ft.create idx ON HASH schema title text').ok() 218 waitForIndex(env, 'idx') 219 env.expect('ft.add idx doc1 0.01 fields title hello').ok() 220 env.expect('ft.search idx hello EXPLAINSCORE').error().contains('EXPLAINSCORE must be accompanied with WITHSCORES') 221