1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2018, Chris Fraire <cfraire@me.com>.
22  */
23 
24 package org.opengrok.indexer.search.context;
25 
26 import java.util.Collections;
27 import org.apache.lucene.search.uhighlight.Passage;
28 import org.apache.lucene.util.BytesRef;
29 import org.junit.Test;
30 import static org.junit.Assert.assertEquals;
31 import static org.junit.Assert.assertNotNull;
32 import static org.junit.Assert.assertTrue;
33 import static org.opengrok.indexer.util.CustomAssertions.assertLinesEqual;
34 
35 /**
36  * Represents a container for tests of {@link ContextFormatter}.
37  */
38 public class ContextFormatterTest {
39 
40     private static final String DOC =
41         "    Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n" +
42         "Mauris vel tortor vel nisl efficitur fermentum nec vel erat.\n" +
43         "Mauris diam nisl, tincidunt nec gravida sit amet, efficitur vitae\n" +
44         "est. Sed aliquam non mi vel mattis:\n" +
45         "\n" +
46         "----Maecenas vitae lacus velit varius vulputate ipsum sed laoreet. Nam maximus libero non ornare egestas. Aenean dignissim ipsum eu rhoncus ultricies.\n" +
47         "\n" +
48         "    Fusce pretium hendrerit dictum. Pellentesque habitant\n" +
49         "morbi tristique senectus et netus.";
50 
51     private static final String DOC2 =
52         "abc\n" +
53         "def\n" +
54         "ghi";
55 
56     @Test
testLineMatchFormatted()57     public void testLineMatchFormatted() {
58         final String WORD = "gravida";
59         int woff = DOC.indexOf(WORD);
60         assertTrue(WORD, woff >= 0);
61 
62         Passage p = new Passage();
63         p.setStartOffset(woff);
64         p.setEndOffset(woff + WORD.length());
65         p.addMatch(woff, p.getEndOffset(), new BytesRef(WORD),1);
66         assertEquals("getNumMatches()", 1, p.getNumMatches());
67 
68         // First, test with contextCount==0
69         ContextArgs args = new ContextArgs((short)0, (short)10);
70         ContextFormatter fmt = new ContextFormatter(args);
71         fmt.setUrl("http://example.com");
72         Object res = fmt.format(new Passage[] {p}, DOC);
73         assertNotNull("format() result", res);
74 
75         final String DOCCTX_0 =
76             "<a class=\"s\" href=\"http://example.com#3\"><span class=\"l\">" +
77             "3</span> Mauris diam nisl, tincidunt nec <b>gravida</b> sit" +
78             " amet, efficitur vitae</a><br/>\n";
79         String ctx = res.toString();
80         assertLinesEqual("format().toString()", DOCCTX_0, ctx);
81 
82         // Second, test with contextCount==1
83         args = new ContextArgs((short)1, (short)10);
84         fmt = new ContextFormatter(args);
85         fmt.setUrl("http://example.com");
86         res = fmt.format(new Passage[] {p}, DOC);
87         assertNotNull("format() result", res);
88 
89         final String DOCCTX_1 =
90             "<a class=\"s\" href=\"http://example.com#2\"><span class=\"l\">" +
91             "2</span> Mauris vel tortor vel nisl efficitur fermentum nec vel" +
92             " erat.</a><br/>" +
93             "<a class=\"s\" href=\"http://example.com#3\"><span class=\"l\">" +
94             "3</span> Mauris diam nisl, tincidunt nec <b>gravida</b> sit" +
95             " amet, efficitur vitae</a><br/>" +
96             "<a class=\"s\" href=\"http://example.com#4\"><span class=\"l\">" +
97             "4</span> est. Sed aliquam non mi vel mattis:</a><br/>";
98         ctx = res.toString();
99         assertLinesEqual("format().toString()", DOCCTX_1, ctx);
100     }
101 
102     @Test
testLinesSpanningMatchFormatted()103     public void testLinesSpanningMatchFormatted() {
104         Passage p = new Passage();
105         p.setStartOffset(0);
106         p.setEndOffset(DOC2.length());
107         p.addMatch(0, p.getEndOffset(), new BytesRef(DOC2),1);
108         assertEquals("getNumMatches()", 1, p.getNumMatches());
109 
110         /**
111          * We're using the entire document, but see how it behaves with
112          * contextCount==1
113          */
114         ContextArgs args = new ContextArgs((short)1, (short)10);
115         ContextFormatter fmt = new ContextFormatter(args);
116         fmt.setUrl("http://example.com");
117         Object res = fmt.format(new Passage[] {p}, DOC2);
118         assertNotNull("format() result", res);
119 
120         final String DOC2CTX =
121             "<a class=\"s\" href=\"http://example.com#1\"><span class=\"l\">" +
122             "1</span> <b>abc</b></a><br/>" +
123             "<a class=\"s\" href=\"http://example.com#2\"><span class=\"l\">" +
124             "2</span> <b>def</b></a><br/>" +
125             "<a class=\"s\" href=\"http://example.com#3\"><span class=\"l\">" +
126             "3</span> <b>ghi</b></a><br/>";
127         String ctx = res.toString();
128         assertLinesEqual("format().toString()", DOC2CTX, ctx);
129     }
130 
131     @Test
testDualElidedMatchFormatted()132     public void testDualElidedMatchFormatted() {
133         final String WORD = "dignissim";
134         int woff = DOC.indexOf(WORD);
135         assertTrue(WORD, woff >= 0);
136 
137         Passage p = new Passage();
138         p.setStartOffset(woff);
139         p.setEndOffset(woff + WORD.length());
140         p.addMatch(woff, p.getEndOffset(), new BytesRef(WORD),1);
141         assertEquals("getNumMatches()", 1, p.getNumMatches());
142 
143         // First, test with contextCount==0
144         ContextArgs args = new ContextArgs((short)0, (short)10);
145         ContextFormatter fmt = new ContextFormatter(args);
146         fmt.setUrl("http://example.com");
147         Object res = fmt.format(new Passage[] {p}, DOC);
148         assertNotNull("format() result", res);
149 
150         final String DOCCTX_0 =
151             "<a class=\"s\" href=\"http://example.com#6\"><span class=\"l\">" +
152             "6</span> &hellip;putate ipsum sed laoreet. Nam maximus libero" +
153             " non ornare egestas. Aenean <b>dignissim</b> ipsum eu" +
154             " rhoncus&hellip;</a><br/>\n";
155         String ctx = res.toString();
156         assertLinesEqual("format().toString()", DOCCTX_0, ctx);
157 
158         // Second, test with contextCount==1
159         args = new ContextArgs((short)1, (short)10);
160         fmt = new ContextFormatter(args);
161         fmt.setUrl("http://example.com");
162         res = fmt.format(new Passage[] {p}, DOC);
163         assertNotNull("format() result", res);
164 
165         final String DOCCTX_1 =
166             "<a class=\"s\" href=\"http://example.com#5\"><span class=\"l\">" +
167             "5</span> </a><br/>" +
168             "<a class=\"s\" href=\"http://example.com#6\"><span class=\"l\">" +
169             "6</span> &hellip;putate ipsum sed laoreet. Nam maximus libero" +
170             " non ornare egestas. Aenean <b>dignissim</b> ipsum eu" +
171             " rhoncus&hellip;</a><br/>" +
172             "<a class=\"s\" href=\"http://example.com#7\"><span class=\"l\">" +
173             "7</span> </a><br/>";
174         ctx = res.toString();
175         assertLinesEqual("format().toString()", DOCCTX_1, ctx);
176 
177         // Third, test with contextCount==1 and a line limit
178         args = new ContextArgs((short)1, (short)10);
179         fmt = new ContextFormatter(args);
180         fmt.setUrl("http://example.com");
181         fmt.setMoreLimit(2);
182         fmt.setMoreUrl("http://example.com/more");
183         res = fmt.format(new Passage[] {p}, DOC);
184         assertNotNull("format() result", res);
185 
186         final String DOCCTX_2M =
187             "<a class=\"s\" href=\"http://example.com#5\"><span class=\"l\">" +
188             "5</span> </a><br/>" +
189             "<a class=\"s\" href=\"http://example.com#6\"><span class=\"l\">" +
190             "6</span> &hellip;putate ipsum sed laoreet. Nam maximus libero" +
191             " non ornare egestas. Aenean <b>dignissim</b> ipsum eu" +
192             " rhoncus&hellip;</a><br/>" +
193             "<a href=\"http://example.com/more\">[all &hellip;]</a><br/>";
194         ctx = res.toString();
195         assertLinesEqual("format().toString()", DOCCTX_2M, ctx);
196     }
197 
198     @Test
testLeftElidedMatchFormatted()199     public void testLeftElidedMatchFormatted() {
200         final String WORD = "ultricies";
201         int woff = DOC.indexOf(WORD);
202         assertTrue(WORD, woff >= 0);
203 
204         Passage p = new Passage();
205         p.setStartOffset(woff);
206         p.setEndOffset(woff + WORD.length());
207         p.addMatch(woff, p.getEndOffset(), new BytesRef(WORD),1);
208         assertEquals("getNumMatches()", 1, p.getNumMatches());
209 
210         // First, test with contextCount==0
211         ContextArgs args = new ContextArgs((short)0, (short)10);
212         ContextFormatter fmt = new ContextFormatter(args);
213         fmt.setUrl("http://example.com");
214         Object res = fmt.format(new Passage[] {p}, DOC);
215         assertNotNull("format() result", res);
216 
217         final String DOCCTX_0 =
218                 "<a class=\"s\" href=\"http://example.com#6\"><span " +
219                         "class=\"l\">6</span> &hellip;um sed laoreet. Nam " +
220                         "maximus libero non ornare egestas. Aenean " +
221                         "dignissim ipsum eu rhoncus <b>ultricies</b>.</a>" +
222                         "<br/>";
223         String ctx = res.toString();
224         assertLinesEqual("format().toString()", DOCCTX_0, ctx);
225 
226         // Second, test with contextCount==1
227         args = new ContextArgs((short)1, (short)10);
228         fmt = new ContextFormatter(args);
229         fmt.setUrl("http://example.com");
230         res = fmt.format(new Passage[] {p}, DOC);
231         assertNotNull("format() result", res);
232 
233         final String DOCCTX_1 =
234                 "<a class=\"s\" href=\"http://example.com#5\"><span " +
235                         "class=\"l\">5</span> </a><br/>" +
236                         "<a class=\"s\" href=\"http://example.com#6\"><span " +
237                         "class=\"l\">6</span> &hellip;um sed laoreet. Nam " +
238                         "maximus libero non ornare egestas. Aenean " +
239                         "dignissim ipsum eu rhoncus <b>ultricies</b>.</a>" +
240                         "<br/>" +
241                         "<a class=\"s\" href=\"http://example.com#7\"><span " +
242                         "class=\"l\">7</span> </a><br/>";
243         ctx = res.toString();
244         assertLinesEqual("format().toString()", DOCCTX_1, ctx);
245 
246         // Third, test with contextCount==1 and a line limit
247         args = new ContextArgs((short)1, (short)10);
248         fmt = new ContextFormatter(args);
249         fmt.setUrl("http://example.com");
250         fmt.setMoreLimit(2);
251         fmt.setMoreUrl("http://example.com/more");
252         res = fmt.format(new Passage[] {p}, DOC);
253         assertNotNull("format() result", res);
254 
255         final String DOCCTX_2M =
256                 "<a class=\"s\" href=\"http://example.com#5\">" +
257                         "<span class=\"l\">5</span> </a><br/>" +
258                         "<a class=\"s\" href=\"http://example.com#6\"><span " +
259                         "class=\"l\">6</span> &hellip;um sed laoreet. Nam " +
260                         "maximus libero non ornare egestas. Aenean " +
261                         "dignissim ipsum eu rhoncus <b>ultricies</b>.</a>" +
262                         "<br/><a href=\"http://example.com/more\">[all " +
263                         "&hellip;]</a><br/>";
264         ctx = res.toString();
265         assertLinesEqual("format().toString()", DOCCTX_2M, ctx);
266     }
267 
268     @Test
testRightElidedMatchFormatted()269     public void testRightElidedMatchFormatted() {
270         final String WORD = "Maecenas";
271         int woff = DOC.indexOf(WORD);
272         assertTrue(WORD, woff >= 0);
273 
274         Passage p = new Passage();
275         p.setStartOffset(woff);
276         p.setEndOffset(woff + WORD.length());
277         p.addMatch(woff, p.getEndOffset(), new BytesRef(WORD),1);
278         assertEquals("getNumMatches()", 1, p.getNumMatches());
279 
280         // First, test with contextCount==0
281         ContextArgs args = new ContextArgs((short)0, (short)10);
282         ContextFormatter fmt = new ContextFormatter(args);
283         fmt.setUrl("http://example.com");
284         Object res = fmt.format(new Passage[] {p}, DOC);
285         assertNotNull("format() result", res);
286 
287         final String DOCCTX_0 =
288                 "<a class=\"s\" href=\"http://example.com#6\"><span " +
289                         "class=\"l\">6</span> ----<b>Maecenas</b> vitae " +
290                         "lacus velit varius vulputate ipsum sed laoreet. " +
291                         "Nam maximus libero non ornare eg&hellip;</a><br/>";
292         String ctx = res.toString();
293         assertLinesEqual("format().toString()", DOCCTX_0, ctx);
294 
295         // Second, test with contextCount==1
296         args = new ContextArgs((short)1, (short)10);
297         fmt = new ContextFormatter(args);
298         fmt.setUrl("http://example.com");
299         res = fmt.format(new Passage[] {p}, DOC);
300         assertNotNull("format() result", res);
301 
302         final String DOCCTX_1 =
303                 "<a class=\"s\" href=\"http://example.com#5\"><span " +
304                         "class=\"l\">5</span> </a><br/>" +
305                         "<a class=\"s\" href=\"http://example.com#6\"><span " +
306                         "class=\"l\">6</span> ----<b>Maecenas</b> vitae " +
307                         "lacus velit varius vulputate ipsum sed laoreet. " +
308                         "Nam maximus libero non ornare eg&hellip;</a><br/>" +
309                         "<a class=\"s\" href=\"http://example.com#7\"><span " +
310                         "class=\"l\">7</span> </a><br/>";
311         ctx = res.toString();
312         assertLinesEqual("format().toString()", DOCCTX_1, ctx);
313 
314         // Third, test with contextCount==1 and a line limit
315         args = new ContextArgs((short)1, (short)10);
316         fmt = new ContextFormatter(args);
317         fmt.setUrl("http://example.com");
318         fmt.setMoreLimit(2);
319         fmt.setMoreUrl("http://example.com/more");
320         res = fmt.format(new Passage[] {p}, DOC);
321         assertNotNull("format() result", res);
322 
323         final String DOCCTX_2M =
324                 "<a class=\"s\" href=\"http://example.com#5\"><span " +
325                         "class=\"l\">5</span> </a><br/>" +
326                         "<a class=\"s\" href=\"http://example.com#6\"><span " +
327                         "class=\"l\">6</span> ----<b>Maecenas</b> vitae " +
328                         "lacus velit varius vulputate ipsum sed laoreet. " +
329                         "Nam maximus libero non ornare eg&hellip;</a><br/>" +
330                         "<a href=\"http://example.com/more\">[all " +
331                         "&hellip;]</a><br/>\n";
332         ctx = res.toString();
333         assertLinesEqual("format().toString()", DOCCTX_2M, ctx);
334     }
335 
336     @Test
testBoundsProblemFormatted()337     public void testBoundsProblemFormatted() {
338         final String PHRASE = "efficitur vitae";
339         int phOff = DOC.indexOf(PHRASE);
340         assertTrue(PHRASE, phOff >= 0);
341 
342         // Create a slightly-longer word of all '*'.
343         final int LF_CHAR_COUNT = 1;
344         final String STARS = String.join("", Collections.nCopies(
345                 PHRASE.length() + LF_CHAR_COUNT, "*"));
346 
347         Passage p = new Passage();
348         p.setStartOffset(phOff);
349         p.setEndOffset(phOff + STARS.length());
350         p.addMatch(phOff, p.getEndOffset(), new BytesRef(STARS),1);
351         assertEquals("getNumMatches()", 1, p.getNumMatches());
352 
353         // Test with contextCount==0
354         ContextArgs args = new ContextArgs((short)0, (short)10);
355         ContextFormatter fmt = new ContextFormatter(args);
356         fmt.setUrl("http://example.com");
357         Object res = fmt.format(new Passage[] {p}, DOC);
358         assertNotNull("format() result", res);
359 
360         final String DOC_CTX_0 =
361                 "<a class=\"s\" href=\"http://example.com#3\"><span class=\"l\">" +
362                         "3</span> Mauris diam nisl, tincidunt nec gravida sit" +
363                         " amet, <b>efficitur vitae</b></a><br/>\n";
364         String ctx = res.toString();
365         assertLinesEqual("format().toString()", DOC_CTX_0, ctx);
366     }
367 }
368