1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6
7 #include "TestInc.h"
8 #include "LuceneTestFixture.h"
9 #include "Document.h"
10 #include "Field.h"
11 #include "PhraseQuery.h"
12 #include "Term.h"
13 #include "RAMDirectory.h"
14 #include "WhitespaceAnalyzer.h"
15 #include "IndexWriter.h"
16 #include "IndexSearcher.h"
17 #include "TopDocs.h"
18
19 using namespace Lucene;
20
21 class SloppyPhraseQueryTest : public LuceneTestFixture {
22 public:
SloppyPhraseQueryTest()23 SloppyPhraseQueryTest() {
24 S_1 = L"A A A";
25 S_2 = L"A 1 2 3 A 4 5 6 A";
26
27 DOC_1 = makeDocument(L"X " + S_1 + L" Y");
28 DOC_2 = makeDocument(L"X " + S_2 + L" Y");
29 DOC_3 = makeDocument(L"X " + S_1 + L" A Y");
30 DOC_1_B = makeDocument(L"X " + S_1 + L" Y N N N N " + S_1 + L" Z");
31 DOC_2_B = makeDocument(L"X " + S_2 + L" Y N N N N " + S_2 + L" Z");
32 DOC_3_B = makeDocument(L"X " + S_1 + L" A Y N N N N " + S_1 + L" A Y");
33 DOC_4 = makeDocument(L"A A X A X B A X B B A A X B A A");
34
35 QUERY_1 = makePhraseQuery(S_1);
36 QUERY_2 = makePhraseQuery(S_2);
37 QUERY_4 = makePhraseQuery(L"X A A");
38 }
39
~SloppyPhraseQueryTest()40 virtual ~SloppyPhraseQueryTest() {
41 }
42
43 protected:
44 String S_1;
45 String S_2;
46
47 DocumentPtr DOC_1;
48 DocumentPtr DOC_2;
49 DocumentPtr DOC_3;
50 DocumentPtr DOC_1_B;
51 DocumentPtr DOC_2_B;
52 DocumentPtr DOC_3_B;
53 DocumentPtr DOC_4;
54
55 PhraseQueryPtr QUERY_1;
56 PhraseQueryPtr QUERY_2;
57 PhraseQueryPtr QUERY_4;
58
59 public:
makeDocument(const String & docText)60 DocumentPtr makeDocument(const String& docText) {
61 DocumentPtr doc = newLucene<Document>();
62 FieldPtr f = newLucene<Field>(L"f", docText, Field::STORE_NO, Field::INDEX_ANALYZED);
63 f->setOmitNorms(true);
64 doc->add(f);
65 return doc;
66 }
67
makePhraseQuery(const String & terms)68 PhraseQueryPtr makePhraseQuery(const String& terms) {
69 PhraseQueryPtr query = newLucene<PhraseQuery>();
70 Collection<String> tokens = StringUtils::split(terms, L" +");
71 for (int32_t i = 0; i < tokens.size(); ++i) {
72 query->add(newLucene<Term>(L"f", tokens[i]));
73 }
74 return query;
75 }
76
checkPhraseQuery(const DocumentPtr & doc,const PhraseQueryPtr & query,int32_t slop,int32_t expectedNumResults)77 double checkPhraseQuery(const DocumentPtr& doc, const PhraseQueryPtr& query, int32_t slop, int32_t expectedNumResults) {
78 query->setSlop(slop);
79
80 RAMDirectoryPtr ramDir = newLucene<RAMDirectory>();
81 WhitespaceAnalyzerPtr analyzer = newLucene<WhitespaceAnalyzer>();
82 IndexWriterPtr writer = newLucene<IndexWriter>(ramDir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED);
83 writer->addDocument(doc);
84 writer->close();
85
86 IndexSearcherPtr searcher = newLucene<IndexSearcher>(ramDir, true);
87 TopDocsPtr td = searcher->search(query, FilterPtr(), 10);
88 EXPECT_EQ(expectedNumResults, td->totalHits);
89
90 searcher->close();
91 ramDir->close();
92
93 return td->maxScore;
94 }
95 };
96
97 /// Test DOC_4 and QUERY_4.
98 /// QUERY_4 has a fuzzy (len=1) match to DOC_4, so all slop values > 0 should succeed.
99 /// But only the 3rd sequence of A's in DOC_4 will do.
TEST_F(SloppyPhraseQueryTest,testDoc4Query4AllSlopsShouldMatch)100 TEST_F(SloppyPhraseQueryTest, testDoc4Query4AllSlopsShouldMatch) {
101 for (int32_t slop = 0; slop < 30; ++slop) {
102 int32_t numResultsExpected = slop < 1 ? 0 : 1;
103 checkPhraseQuery(DOC_4, QUERY_4, slop, numResultsExpected);
104 }
105 }
106
107 /// Test DOC_1 and QUERY_1.
108 /// QUERY_1 has an exact match to DOC_1, so all slop values should succeed.
TEST_F(SloppyPhraseQueryTest,testDoc1Query1AllSlopsShouldMatch)109 TEST_F(SloppyPhraseQueryTest, testDoc1Query1AllSlopsShouldMatch) {
110 for (int32_t slop = 0; slop < 30; ++slop) {
111 double score1 = checkPhraseQuery(DOC_1, QUERY_1, slop, 1);
112 double score2 = checkPhraseQuery(DOC_1_B, QUERY_1, slop, 1);
113 EXPECT_TRUE(score2 > score1);
114 }
115 }
116
117 /// Test DOC_2 and QUERY_1.
118 /// 6 should be the minimum slop to make QUERY_1 match DOC_2.
TEST_F(SloppyPhraseQueryTest,testDoc2Query1Slop6OrMoreShouldMatch)119 TEST_F(SloppyPhraseQueryTest, testDoc2Query1Slop6OrMoreShouldMatch) {
120 for (int32_t slop = 0; slop < 30; ++slop) {
121 int32_t numResultsExpected = slop < 6 ? 0 : 1;
122 double score1 = checkPhraseQuery(DOC_2, QUERY_1, slop, numResultsExpected);
123 if (numResultsExpected > 0) {
124 double score2 = checkPhraseQuery(DOC_2_B, QUERY_1, slop, 1);
125 EXPECT_TRUE(score2 > score1);
126 }
127 }
128 }
129
130 /// Test DOC_2 and QUERY_2.
131 /// QUERY_2 has an exact match to DOC_2, so all slop values should succeed.
TEST_F(SloppyPhraseQueryTest,testDoc2Query2AllSlopsShouldMatch)132 TEST_F(SloppyPhraseQueryTest, testDoc2Query2AllSlopsShouldMatch) {
133 for (int32_t slop = 0; slop < 30; ++slop) {
134 double score1 = checkPhraseQuery(DOC_2, QUERY_2, slop, 1);
135 double score2 = checkPhraseQuery(DOC_2_B, QUERY_2, slop, 1);
136 EXPECT_TRUE(score2 > score1);
137 }
138 }
139
140 /// Test DOC_3 and QUERY_1.
141 /// QUERY_1 has an exact match to DOC_3, so all slop values should succeed.
TEST_F(SloppyPhraseQueryTest,testDoc3Query1AllSlopsShouldMatch)142 TEST_F(SloppyPhraseQueryTest, testDoc3Query1AllSlopsShouldMatch) {
143 for (int32_t slop = 0; slop < 30; ++slop) {
144 double score1 = checkPhraseQuery(DOC_3, QUERY_1, slop, 1);
145 double score2 = checkPhraseQuery(DOC_3_B, QUERY_1, slop, 1);
146 EXPECT_TRUE(score2 > score1);
147 }
148 }
149