1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6 
7 #include "TestInc.h"
8 #include "LuceneTestFixture.h"
9 #include "IndexSearcher.h"
10 #include "IndexReader.h"
11 #include "DefaultSimilarity.h"
12 #include "PayloadHelper.h"
13 #include "SpanTermQuery.h"
14 #include "SpanFirstQuery.h"
15 #include "Term.h"
16 #include "Spans.h"
17 #include "SpanNearQuery.h"
18 #include "SpanNotQuery.h"
19 #include "RAMDirectory.h"
20 #include "PayloadAttribute.h"
21 #include "TokenFilter.h"
22 #include "TermAttribute.h"
23 #include "PositionIncrementAttribute.h"
24 #include "Payload.h"
25 #include "LowerCaseTokenizer.h"
26 #include "Analyzer.h"
27 #include "IndexWriter.h"
28 #include "Document.h"
29 #include "Field.h"
30 #include "StringReader.h"
31 #include "TopDocs.h"
32 #include "PayloadSpanUtil.h"
33 #include "TermQuery.h"
34 
35 using namespace Lucene;
36 
37 DECLARE_SHARED_PTR(PayloadSpansAnalyzer)
38 
39 class PayloadSpansFilter : public TokenFilter {
40 public:
PayloadSpansFilter(const TokenStreamPtr & input,const String & fieldName)41     PayloadSpansFilter(const TokenStreamPtr& input, const String& fieldName) : TokenFilter(input) {
42         this->fieldName = fieldName;
43         this->pos = 0;
44         this->entities = HashSet<String>::newInstance();
45         this->entities.add(L"xx");
46         this->entities.add(L"one");
47         this->nopayload = HashSet<String>::newInstance();
48         this->nopayload.add(L"nopayload");
49         this->nopayload.add(L"np");
50         this->termAtt = addAttribute<TermAttribute>();
51         this->posIncrAtt = addAttribute<PositionIncrementAttribute>();
52         this->payloadAtt = addAttribute<PayloadAttribute>();
53     }
54 
~PayloadSpansFilter()55     virtual ~PayloadSpansFilter() {
56     }
57 
58     LUCENE_CLASS(PayloadSpansFilter);
59 
60 public:
61     String fieldName;
62     HashSet<String> entities;
63     HashSet<String> nopayload;
64     int32_t pos;
65     PayloadAttributePtr payloadAtt;
66     TermAttributePtr termAtt;
67     PositionIncrementAttributePtr posIncrAtt;
68 
69 public:
incrementToken()70     virtual bool incrementToken() {
71         if (input->incrementToken()) {
72             String token(termAtt->termBuffer().get(), termAtt->termLength());
73 
74             if (!nopayload.contains(token)) {
75                 StringStream buf;
76                 buf << token;
77                 if (entities.contains(token)) {
78                     buf << L":Entity:";
79                 } else {
80                     buf << L":Noise:";
81                 }
82                 buf << pos;
83                 ByteArray data = ByteArray::newInstance(buf.str().length() * sizeof(wchar_t));
84                 std::wcsncpy((wchar_t*)data.get(), buf.str().c_str(), buf.str().length());
85                 payloadAtt->setPayload(newLucene<Payload>(data));
86             }
87             pos += posIncrAtt->getPositionIncrement();
88             return true;
89         } else {
90             return false;
91         }
92     }
93 };
94 
95 class PayloadSpansAnalyzer : public Analyzer {
96 public:
~PayloadSpansAnalyzer()97     virtual ~PayloadSpansAnalyzer() {
98     }
99 
100     LUCENE_CLASS(PayloadSpansAnalyzer);
101 
102 public:
tokenStream(const String & fieldName,const ReaderPtr & reader)103     virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader) {
104         TokenStreamPtr result = newLucene<LowerCaseTokenizer>(reader);
105         result = newLucene<PayloadSpansFilter>(result, fieldName);
106         return result;
107     }
108 };
109 
110 class PayloadSpansTest : public LuceneTestFixture {
111 public:
PayloadSpansTest()112     PayloadSpansTest() {
113         similarity = newLucene<DefaultSimilarity>();
114         searcher = PayloadHelper::setUp(similarity, 1000);
115         indexReader = searcher->getIndexReader();
116     }
117 
~PayloadSpansTest()118     virtual ~PayloadSpansTest() {
119     }
120 
121 protected:
122     IndexSearcherPtr searcher;
123     SimilarityPtr similarity;
124     IndexReaderPtr indexReader;
125 
126 public:
checkSpans(const SpansPtr & spans,int32_t expectedNumSpans,int32_t expectedNumPayloads,int32_t expectedPayloadLength,int32_t expectedFirstByte)127     void checkSpans(const SpansPtr& spans, int32_t expectedNumSpans, int32_t expectedNumPayloads, int32_t expectedPayloadLength, int32_t expectedFirstByte) {
128         EXPECT_TRUE(spans);
129         int32_t seen = 0;
130         while (spans->next()) {
131             // if we expect payloads, then isPayloadAvailable should be true
132             if (expectedNumPayloads > 0) {
133                 EXPECT_TRUE(spans->isPayloadAvailable());
134             } else {
135                 EXPECT_TRUE(!spans->isPayloadAvailable());
136             }
137             // See payload helper, for the PayloadHelper::FIELD field, there is a single byte payload at every token
138             if (spans->isPayloadAvailable()) {
139                 Collection<ByteArray> payload = spans->getPayload();
140                 EXPECT_EQ(payload.size(), expectedNumPayloads);
141                 for (Collection<ByteArray>::iterator thePayload = payload.begin(); thePayload != payload.end(); ++thePayload) {
142                     EXPECT_EQ(thePayload->size(), expectedPayloadLength);
143                     EXPECT_EQ((*thePayload)[0], expectedFirstByte);
144                 }
145             }
146             ++seen;
147         }
148         EXPECT_EQ(seen, expectedNumSpans);
149     }
150 
checkSpans(const SpansPtr & spans,int32_t numSpans,Collection<int32_t> numPayloads)151     void checkSpans(const SpansPtr& spans, int32_t numSpans, Collection<int32_t> numPayloads) {
152         int32_t cnt = 0;
153         while (spans->next()) {
154             if (spans->isPayloadAvailable()) {
155                 Collection<ByteArray> payload = spans->getPayload();
156                 EXPECT_EQ(numPayloads[cnt], payload.size());
157             } else {
158                 EXPECT_TRUE(numPayloads.size() <= 0 || numPayloads[cnt] <= 0);
159             }
160         }
161         ++cnt;
162     }
163 
getSpanNotSearcher()164     IndexSearcherPtr getSpanNotSearcher() {
165         RAMDirectoryPtr directory = newLucene<RAMDirectory>();
166         PayloadSpansAnalyzerPtr analyzer = newLucene<PayloadSpansAnalyzer>();
167         IndexWriterPtr writer = newLucene<IndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED);
168         writer->setSimilarity(similarity);
169 
170         DocumentPtr doc = newLucene<Document>();
171         doc->add(newLucene<Field>(PayloadHelper::FIELD, L"one two three one four three", Field::STORE_YES, Field::INDEX_ANALYZED));
172         writer->addDocument(doc);
173         writer->close();
174 
175         IndexSearcherPtr searcher = newLucene<IndexSearcher>(directory, true);
176         searcher->setSimilarity(similarity);
177         return searcher;
178     }
179 
getSearcher()180     IndexSearcherPtr getSearcher() {
181         RAMDirectoryPtr directory = newLucene<RAMDirectory>();
182         PayloadSpansAnalyzerPtr analyzer = newLucene<PayloadSpansAnalyzer>();
183         Collection<String> docs = newCollection<String>(
184                                       L"xx rr yy mm  pp", L"xx yy mm rr pp", L"nopayload qq ss pp np",
185                                       L"one two three four five six seven eight nine ten eleven",
186                                       L"nine one two three four five six seven eight eleven ten"
187                                   );
188         IndexWriterPtr writer = newLucene<IndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED);
189         writer->setSimilarity(similarity);
190 
191         for (int32_t i = 0; i < docs.size(); ++i) {
192             DocumentPtr doc = newLucene<Document>();
193             doc->add(newLucene<Field>(PayloadHelper::FIELD, docs[i], Field::STORE_YES, Field::INDEX_ANALYZED));
194             writer->addDocument(doc);
195         }
196 
197         writer->close();
198 
199         IndexSearcherPtr searcher = newLucene<IndexSearcher>(directory, true);
200         return searcher;
201     }
202 };
203 
TEST_F(PayloadSpansTest,testSpanTermQuery)204 TEST_F(PayloadSpansTest, testSpanTermQuery) {
205     SpanTermQueryPtr stq = newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"seventy"));
206     SpansPtr spans = stq->getSpans(indexReader);
207     EXPECT_TRUE(spans);
208     checkSpans(spans, 100, 1, 1, 1);
209 
210     stq = newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::NO_PAYLOAD_FIELD, L"seventy"));
211     spans = stq->getSpans(indexReader);
212     EXPECT_TRUE(spans);
213     checkSpans(spans, 100, 0, 0, 0);
214 }
215 
TEST_F(PayloadSpansTest,testSpanFirst)216 TEST_F(PayloadSpansTest, testSpanFirst) {
217     SpanQueryPtr match = newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"one"));
218     SpanFirstQueryPtr sfq = newLucene<SpanFirstQuery>(match, 2);
219     SpansPtr spans = sfq->getSpans(indexReader);
220     checkSpans(spans, 109, 1, 1, 1);
221     // Test more complicated subclause
222     Collection<SpanQueryPtr> clauses = newCollection<SpanQueryPtr>(
223                                            newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"one")),
224                                            newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"hundred"))
225                                        );
226     match = newLucene<SpanNearQuery>(clauses, 0, true);
227     sfq = newLucene<SpanFirstQuery>(match, 2);
228     checkSpans(sfq->getSpans(indexReader), 100, 2, 1, 1);
229 
230     match = newLucene<SpanNearQuery>(clauses, 0, false);
231     sfq = newLucene<SpanFirstQuery>(match, 2);
232     checkSpans(sfq->getSpans(indexReader), 100, 2, 1, 1);
233 }
234 
TEST_F(PayloadSpansTest,testSpanNot)235 TEST_F(PayloadSpansTest, testSpanNot) {
236     Collection<SpanQueryPtr> clauses = newCollection<SpanQueryPtr>(
237                                            newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"one")),
238                                            newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"three"))
239                                        );
240     SpanQueryPtr spq = newLucene<SpanNearQuery>(clauses, 5, true);
241     SpanNotQueryPtr snq = newLucene<SpanNotQuery>(spq, newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"two")));
242     checkSpans(snq->getSpans(getSpanNotSearcher()->getIndexReader()), 1, newCollection<int32_t>(2));
243 }
244 
TEST_F(PayloadSpansTest,testNestedSpans)245 TEST_F(PayloadSpansTest, testNestedSpans) {
246     IndexSearcherPtr searcher = getSearcher();
247     SpanTermQueryPtr stq = newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"mark"));
248     SpansPtr spans = stq->getSpans(searcher->getIndexReader());
249     EXPECT_TRUE(spans);
250     checkSpans(spans, 0, Collection<int32_t>());
251 
252     Collection<SpanQueryPtr> clauses = newCollection<SpanQueryPtr>(
253                                            newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"rr")),
254                                            newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"yy")),
255                                            newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"xx"))
256                                        );
257     SpanNearQueryPtr spanNearQuery = newLucene<SpanNearQuery>(clauses, 12, false);
258 
259     spans = spanNearQuery->getSpans(searcher->getIndexReader());
260     EXPECT_TRUE(spans);
261     checkSpans(spans, 2, newCollection<int32_t>(3, 3));
262 
263     clauses[0] = newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"xx"));
264     clauses[1] = newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"rr"));
265     clauses[2] = newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"yy"));
266 
267     spanNearQuery = newLucene<SpanNearQuery>(clauses, 6, true);
268 
269     spans = spanNearQuery->getSpans(searcher->getIndexReader());
270     EXPECT_TRUE(spans);
271     checkSpans(spans, 1, newCollection<int32_t>(3));
272 
273     clauses = newCollection<SpanQueryPtr>(
274                   newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"xx")),
275                   newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"rr"))
276               );
277 
278     spanNearQuery = newLucene<SpanNearQuery>(clauses, 6, true);
279 
280     // xx within 6 of rr
281     Collection<SpanQueryPtr> clauses2 = newCollection<SpanQueryPtr>(
282                                             newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"yy")),
283                                             spanNearQuery
284                                         );
285 
286     SpanNearQueryPtr nestedSpanNearQuery = newLucene<SpanNearQuery>(clauses2, 6, false);
287 
288     // yy within 6 of xx within 6 of rr
289     spans = nestedSpanNearQuery->getSpans(searcher->getIndexReader());
290     EXPECT_TRUE(spans);
291     checkSpans(spans, 2, newCollection<int32_t>(3, 3));
292 }
293 
TEST_F(PayloadSpansTest,testFirstClauseWithoutPayload)294 TEST_F(PayloadSpansTest, testFirstClauseWithoutPayload) {
295     IndexSearcherPtr searcher = getSearcher();
296     Collection<SpanQueryPtr> clauses = newCollection<SpanQueryPtr>(
297                                            newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"nopayload")),
298                                            newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"qq")),
299                                            newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"ss"))
300                                        );
301     SpanNearQueryPtr spanNearQuery = newLucene<SpanNearQuery>(clauses, 6, true);
302 
303     Collection<SpanQueryPtr> clauses2 = newCollection<SpanQueryPtr>(
304                                             newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"pp")),
305                                             spanNearQuery
306                                         );
307 
308     SpanNearQueryPtr snq = newLucene<SpanNearQuery>(clauses2, 6, false);
309 
310     Collection<SpanQueryPtr> clauses3 = newCollection<SpanQueryPtr>(
311                                             newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"np")),
312                                             snq
313                                         );
314 
315     SpanNearQueryPtr nestedSpanNearQuery = newLucene<SpanNearQuery>(clauses3, 6, false);
316 
317     SpansPtr spans = nestedSpanNearQuery->getSpans(searcher->getIndexReader());
318     EXPECT_TRUE(spans);
319     checkSpans(spans, 1, newCollection<int32_t>(3));
320 }
321 
TEST_F(PayloadSpansTest,testHeavilyNestedSpanQuery)322 TEST_F(PayloadSpansTest, testHeavilyNestedSpanQuery) {
323     IndexSearcherPtr searcher = getSearcher();
324     Collection<SpanQueryPtr> clauses = newCollection<SpanQueryPtr>(
325                                            newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"one")),
326                                            newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"two")),
327                                            newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"three"))
328                                        );
329     SpanNearQueryPtr spanNearQuery = newLucene<SpanNearQuery>(clauses, 5, true);
330 
331     clauses[0] = spanNearQuery;
332     clauses[1] = newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"five"));
333     clauses[2] = newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"six"));
334 
335     SpanNearQueryPtr spanNearQuery2 = newLucene<SpanNearQuery>(clauses, 6, true);
336 
337     Collection<SpanQueryPtr> clauses2 = newCollection<SpanQueryPtr>(
338                                             newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"eleven")),
339                                             newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"ten"))
340                                         );
341     SpanNearQueryPtr spanNearQuery3 = newLucene<SpanNearQuery>(clauses2, 2, false);
342 
343     Collection<SpanQueryPtr> clauses3 = newCollection<SpanQueryPtr>(
344                                             newLucene<SpanTermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"nine")),
345                                             spanNearQuery2,
346                                             spanNearQuery3
347                                         );
348     SpanNearQueryPtr nestedSpanNearQuery = newLucene<SpanNearQuery>(clauses3, 6, false);
349 
350     SpansPtr spans = nestedSpanNearQuery->getSpans(searcher->getIndexReader());
351     EXPECT_TRUE(spans);
352     checkSpans(spans, 2, newCollection<int32_t>(8, 8));
353 }
354 
TEST_F(PayloadSpansTest,testShrinkToAfterShortestMatch)355 TEST_F(PayloadSpansTest, testShrinkToAfterShortestMatch) {
356     RAMDirectoryPtr directory = newLucene<RAMDirectory>();
357     IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<PayloadSpansAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);
358     DocumentPtr doc = newLucene<Document>();
359     doc->add(newLucene<Field>(L"content", newLucene<StringReader>(L"a b c d e f g h i j a k")));
360     writer->addDocument(doc);
361     writer->close();
362 
363     IndexSearcherPtr is = newLucene<IndexSearcher>(directory, true);
364 
365     SpanTermQueryPtr stq1 = newLucene<SpanTermQuery>(newLucene<Term>(L"content", L"a"));
366     SpanTermQueryPtr stq2 = newLucene<SpanTermQuery>(newLucene<Term>(L"content", L"k"));
367     Collection<SpanQueryPtr> sqs = newCollection<SpanQueryPtr>(stq1, stq2);
368     SpanNearQueryPtr snq = newLucene<SpanNearQuery>(sqs, 1, true);
369     SpansPtr spans = snq->getSpans(is->getIndexReader());
370 
371     TopDocsPtr topDocs = is->search(snq, 1);
372     HashSet<String> payloadSet = HashSet<String>::newInstance();
373     for (int32_t i = 0; i < topDocs->scoreDocs.size(); ++i) {
374         while (spans->next()) {
375             Collection<ByteArray> payloads = spans->getPayload();
376             for (Collection<ByteArray>::iterator it = payloads.begin(); it != payloads.end(); ++it) {
377                 payloadSet.add(String((wchar_t*)it->get(), it->size() / sizeof(wchar_t)));
378             }
379         }
380     }
381     EXPECT_EQ(2, payloadSet.size());
382     EXPECT_TRUE(payloadSet.contains(L"a:Noise:10"));
383     EXPECT_TRUE(payloadSet.contains(L"k:Noise:11"));
384 }
385 
TEST_F(PayloadSpansTest,testShrinkToAfterShortestMatch2)386 TEST_F(PayloadSpansTest, testShrinkToAfterShortestMatch2) {
387     RAMDirectoryPtr directory = newLucene<RAMDirectory>();
388     IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<PayloadSpansAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);
389     DocumentPtr doc = newLucene<Document>();
390     doc->add(newLucene<Field>(L"content", newLucene<StringReader>(L"a b a d k f a h i k a k")));
391     writer->addDocument(doc);
392     writer->close();
393 
394     IndexSearcherPtr is = newLucene<IndexSearcher>(directory, true);
395 
396     SpanTermQueryPtr stq1 = newLucene<SpanTermQuery>(newLucene<Term>(L"content", L"a"));
397     SpanTermQueryPtr stq2 = newLucene<SpanTermQuery>(newLucene<Term>(L"content", L"k"));
398     Collection<SpanQueryPtr> sqs = newCollection<SpanQueryPtr>(stq1, stq2);
399     SpanNearQueryPtr snq = newLucene<SpanNearQuery>(sqs, 0, true);
400     SpansPtr spans = snq->getSpans(is->getIndexReader());
401 
402     TopDocsPtr topDocs = is->search(snq, 1);
403     HashSet<String> payloadSet = HashSet<String>::newInstance();
404     for (int32_t i = 0; i < topDocs->scoreDocs.size(); ++i) {
405         while (spans->next()) {
406             Collection<ByteArray> payloads = spans->getPayload();
407             for (Collection<ByteArray>::iterator it = payloads.begin(); it != payloads.end(); ++it) {
408                 payloadSet.add(String((wchar_t*)it->get(), it->size() / sizeof(wchar_t)));
409             }
410         }
411     }
412     EXPECT_EQ(2, payloadSet.size());
413     EXPECT_TRUE(payloadSet.contains(L"a:Noise:10"));
414     EXPECT_TRUE(payloadSet.contains(L"k:Noise:11"));
415 }
416 
TEST_F(PayloadSpansTest,testShrinkToAfterShortestMatch3)417 TEST_F(PayloadSpansTest, testShrinkToAfterShortestMatch3) {
418     RAMDirectoryPtr directory = newLucene<RAMDirectory>();
419     IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<PayloadSpansAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);
420     DocumentPtr doc = newLucene<Document>();
421     doc->add(newLucene<Field>(L"content", newLucene<StringReader>(L"j k a l f k k p a t a k l k t a")));
422     writer->addDocument(doc);
423     writer->close();
424 
425     IndexSearcherPtr is = newLucene<IndexSearcher>(directory, true);
426 
427     SpanTermQueryPtr stq1 = newLucene<SpanTermQuery>(newLucene<Term>(L"content", L"a"));
428     SpanTermQueryPtr stq2 = newLucene<SpanTermQuery>(newLucene<Term>(L"content", L"k"));
429     Collection<SpanQueryPtr> sqs = newCollection<SpanQueryPtr>(stq1, stq2);
430     SpanNearQueryPtr snq = newLucene<SpanNearQuery>(sqs, 0, true);
431     SpansPtr spans = snq->getSpans(is->getIndexReader());
432 
433     TopDocsPtr topDocs = is->search(snq, 1);
434     HashSet<String> payloadSet = HashSet<String>::newInstance();
435     for (int32_t i = 0; i < topDocs->scoreDocs.size(); ++i) {
436         while (spans->next()) {
437             Collection<ByteArray> payloads = spans->getPayload();
438             for (Collection<ByteArray>::iterator it = payloads.begin(); it != payloads.end(); ++it) {
439                 payloadSet.add(String((wchar_t*)it->get(), it->size() / sizeof(wchar_t)));
440             }
441         }
442     }
443     EXPECT_EQ(2, payloadSet.size());
444     EXPECT_TRUE(payloadSet.contains(L"a:Noise:10"));
445     EXPECT_TRUE(payloadSet.contains(L"k:Noise:11"));
446 }
447 
TEST_F(PayloadSpansTest,testPayloadSpanUtil)448 TEST_F(PayloadSpansTest, testPayloadSpanUtil) {
449     RAMDirectoryPtr directory = newLucene<RAMDirectory>();
450     PayloadSpansAnalyzerPtr analyzer = newLucene<PayloadSpansAnalyzer>();
451     IndexWriterPtr writer = newLucene<IndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED);
452     writer->setSimilarity(similarity);
453     DocumentPtr doc = newLucene<Document>();
454     doc->add(newLucene<Field>(PayloadHelper::FIELD, L"xx rr yy mm  pp", Field::STORE_YES, Field::INDEX_ANALYZED));
455     writer->addDocument(doc);
456     writer->close();
457 
458     IndexSearcherPtr searcher = newLucene<IndexSearcher>(directory, true);
459 
460     IndexReaderPtr reader = searcher->getIndexReader();
461     PayloadSpanUtilPtr psu = newLucene<PayloadSpanUtil>(reader);
462 
463     Collection<ByteArray> payloads = psu->getPayloadsForQuery(newLucene<TermQuery>(newLucene<Term>(PayloadHelper::FIELD, L"rr")));
464     EXPECT_EQ(1, payloads.size());
465     EXPECT_EQ(String((wchar_t*)(payloads[0].get()), payloads[0].size() / sizeof(wchar_t)), L"rr:Noise:1");
466 }
467