1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6 
7 #include "TestInc.h"
8 #include "LuceneTestFixture.h"
9 #include "Document.h"
10 #include "Field.h"
11 #include "PhraseQuery.h"
12 #include "Term.h"
13 #include "RAMDirectory.h"
14 #include "WhitespaceAnalyzer.h"
15 #include "IndexWriter.h"
16 #include "IndexSearcher.h"
17 #include "TopDocs.h"
18 
19 using namespace Lucene;
20 
21 class SloppyPhraseQueryTest : public LuceneTestFixture {
22 public:
SloppyPhraseQueryTest()23     SloppyPhraseQueryTest() {
24         S_1 = L"A A A";
25         S_2 = L"A 1 2 3 A 4 5 6 A";
26 
27         DOC_1 = makeDocument(L"X " + S_1 + L" Y");
28         DOC_2 = makeDocument(L"X " + S_2 + L" Y");
29         DOC_3 = makeDocument(L"X " + S_1 + L" A Y");
30         DOC_1_B = makeDocument(L"X " + S_1 + L" Y N N N N " + S_1 + L" Z");
31         DOC_2_B = makeDocument(L"X " + S_2 + L" Y N N N N " + S_2 + L" Z");
32         DOC_3_B = makeDocument(L"X " + S_1 + L" A Y N N N N " + S_1 + L" A Y");
33         DOC_4 = makeDocument(L"A A X A X B A X B B A A X B A A");
34 
35         QUERY_1 = makePhraseQuery(S_1);
36         QUERY_2 = makePhraseQuery(S_2);
37         QUERY_4 = makePhraseQuery(L"X A A");
38     }
39 
~SloppyPhraseQueryTest()40     virtual ~SloppyPhraseQueryTest() {
41     }
42 
43 protected:
44     String S_1;
45     String S_2;
46 
47     DocumentPtr DOC_1;
48     DocumentPtr DOC_2;
49     DocumentPtr DOC_3;
50     DocumentPtr DOC_1_B;
51     DocumentPtr DOC_2_B;
52     DocumentPtr DOC_3_B;
53     DocumentPtr DOC_4;
54 
55     PhraseQueryPtr QUERY_1;
56     PhraseQueryPtr QUERY_2;
57     PhraseQueryPtr QUERY_4;
58 
59 public:
makeDocument(const String & docText)60     DocumentPtr makeDocument(const String& docText) {
61         DocumentPtr doc = newLucene<Document>();
62         FieldPtr f = newLucene<Field>(L"f", docText, Field::STORE_NO, Field::INDEX_ANALYZED);
63         f->setOmitNorms(true);
64         doc->add(f);
65         return doc;
66     }
67 
makePhraseQuery(const String & terms)68     PhraseQueryPtr makePhraseQuery(const String& terms) {
69         PhraseQueryPtr query = newLucene<PhraseQuery>();
70         Collection<String> tokens = StringUtils::split(terms, L" +");
71         for (int32_t i = 0; i < tokens.size(); ++i) {
72             query->add(newLucene<Term>(L"f", tokens[i]));
73         }
74         return query;
75     }
76 
checkPhraseQuery(const DocumentPtr & doc,const PhraseQueryPtr & query,int32_t slop,int32_t expectedNumResults)77     double checkPhraseQuery(const DocumentPtr& doc, const PhraseQueryPtr& query, int32_t slop, int32_t expectedNumResults) {
78         query->setSlop(slop);
79 
80         RAMDirectoryPtr ramDir = newLucene<RAMDirectory>();
81         WhitespaceAnalyzerPtr analyzer = newLucene<WhitespaceAnalyzer>();
82         IndexWriterPtr writer = newLucene<IndexWriter>(ramDir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED);
83         writer->addDocument(doc);
84         writer->close();
85 
86         IndexSearcherPtr searcher = newLucene<IndexSearcher>(ramDir, true);
87         TopDocsPtr td = searcher->search(query, FilterPtr(), 10);
88         EXPECT_EQ(expectedNumResults, td->totalHits);
89 
90         searcher->close();
91         ramDir->close();
92 
93         return td->maxScore;
94     }
95 };
96 
97 /// Test DOC_4 and QUERY_4.
98 /// QUERY_4 has a fuzzy (len=1) match to DOC_4, so all slop values > 0 should succeed.
99 /// But only the 3rd sequence of A's in DOC_4 will do.
TEST_F(SloppyPhraseQueryTest,testDoc4Query4AllSlopsShouldMatch)100 TEST_F(SloppyPhraseQueryTest, testDoc4Query4AllSlopsShouldMatch) {
101     for (int32_t slop = 0; slop < 30; ++slop) {
102         int32_t numResultsExpected = slop < 1 ? 0 : 1;
103         checkPhraseQuery(DOC_4, QUERY_4, slop, numResultsExpected);
104     }
105 }
106 
107 /// Test DOC_1 and QUERY_1.
108 /// QUERY_1 has an exact match to DOC_1, so all slop values should succeed.
TEST_F(SloppyPhraseQueryTest,testDoc1Query1AllSlopsShouldMatch)109 TEST_F(SloppyPhraseQueryTest, testDoc1Query1AllSlopsShouldMatch) {
110     for (int32_t slop = 0; slop < 30; ++slop) {
111         double score1 = checkPhraseQuery(DOC_1, QUERY_1, slop, 1);
112         double score2 = checkPhraseQuery(DOC_1_B, QUERY_1, slop, 1);
113         EXPECT_TRUE(score2 > score1);
114     }
115 }
116 
117 /// Test DOC_2 and QUERY_1.
118 /// 6 should be the minimum slop to make QUERY_1 match DOC_2.
TEST_F(SloppyPhraseQueryTest,testDoc2Query1Slop6OrMoreShouldMatch)119 TEST_F(SloppyPhraseQueryTest, testDoc2Query1Slop6OrMoreShouldMatch) {
120     for (int32_t slop = 0; slop < 30; ++slop) {
121         int32_t numResultsExpected = slop < 6 ? 0 : 1;
122         double score1 = checkPhraseQuery(DOC_2, QUERY_1, slop, numResultsExpected);
123         if (numResultsExpected > 0) {
124             double score2 = checkPhraseQuery(DOC_2_B, QUERY_1, slop, 1);
125             EXPECT_TRUE(score2 > score1);
126         }
127     }
128 }
129 
130 /// Test DOC_2 and QUERY_2.
131 /// QUERY_2 has an exact match to DOC_2, so all slop values should succeed.
TEST_F(SloppyPhraseQueryTest,testDoc2Query2AllSlopsShouldMatch)132 TEST_F(SloppyPhraseQueryTest, testDoc2Query2AllSlopsShouldMatch) {
133     for (int32_t slop = 0; slop < 30; ++slop) {
134         double score1 = checkPhraseQuery(DOC_2, QUERY_2, slop, 1);
135         double score2 = checkPhraseQuery(DOC_2_B, QUERY_2, slop, 1);
136         EXPECT_TRUE(score2 > score1);
137     }
138 }
139 
140 /// Test DOC_3 and QUERY_1.
141 /// QUERY_1 has an exact match to DOC_3, so all slop values should succeed.
TEST_F(SloppyPhraseQueryTest,testDoc3Query1AllSlopsShouldMatch)142 TEST_F(SloppyPhraseQueryTest, testDoc3Query1AllSlopsShouldMatch) {
143     for (int32_t slop = 0; slop < 30; ++slop) {
144         double score1 = checkPhraseQuery(DOC_3, QUERY_1, slop, 1);
145         double score2 = checkPhraseQuery(DOC_3_B, QUERY_1, slop, 1);
146         EXPECT_TRUE(score2 > score1);
147     }
148 }
149