1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>.
22  */
23 
24 package org.opengrok.indexer.util;
25 
26 import static org.junit.Assert.assertEquals;
27 
28 import java.io.IOException;
29 import org.junit.Test;
30 import org.opengrok.indexer.analysis.StreamSource;
31 
32 /**
33  * Represents a container for tests of {@link SourceSplitter}.
34  */
35 public class SourceSplitterTest {
36 
37     @Test
shouldSplitEmptyStringIntoOneLine()38     public void shouldSplitEmptyStringIntoOneLine() {
39         SourceSplitter splitter = new SourceSplitter();
40         splitter.reset("");
41         assertEquals("split count", 1, splitter.count());
42         assertEquals("split offset", 0, splitter.getOffset(0));
43         assertEquals("split offset", 0, splitter.getOffset(1));
44 
45         assertEquals("split find-index", 0, splitter.findLineIndex(0));
46         assertEquals("split find-index", -1, splitter.findLineIndex(1));
47     }
48 
49     @Test
shouldSplitEndingLFsIntoOneMoreLine()50     public void shouldSplitEndingLFsIntoOneMoreLine() {
51         SourceSplitter splitter = new SourceSplitter();
52         splitter.reset("abc\ndef\n");
53         assertEquals("split count", 3, splitter.count());
54         assertEquals("split offset", 0, splitter.getOffset(0));
55         assertEquals("split offset", 4, splitter.getOffset(1));
56         assertEquals("split offset", 8, splitter.getOffset(2));
57         assertEquals("split offset", 8, splitter.getOffset(3));
58     }
59 
60     @Test
shouldSplitDocsWithNoLastLF()61     public void shouldSplitDocsWithNoLastLF() {
62         SourceSplitter splitter = new SourceSplitter();
63         splitter.reset("abc\r\ndef");
64         assertEquals("split count", 2, splitter.count());
65         assertEquals("split offset", 0, splitter.getOffset(0));
66         assertEquals("split offset", 5, splitter.getOffset(1));
67         assertEquals("split offset", 8, splitter.getOffset(2));
68 
69         assertEquals("split find-index", 0, splitter.findLineIndex(0));
70         assertEquals("split find-index", 0, splitter.findLineIndex(1));
71         assertEquals("split find-index", 0, splitter.findLineIndex(4));
72         assertEquals("split find-index", 1, splitter.findLineIndex(5));
73         assertEquals("split find-index", 1, splitter.findLineIndex(6));
74     }
75 
76     @Test
shouldHandleDocsOfLongerLength()77     public void shouldHandleDocsOfLongerLength() {
78         //                                  0             0
79         //                    0-- -  5-- - -1--- - 5--- - 2-
80         final String INPUT = "ab\r\ncde\r\nefgh\r\nijk\r\nlm";
81 
82         SourceSplitter splitter = new SourceSplitter();
83         splitter.reset(INPUT);
84         assertEquals("split count", 5, splitter.count());
85         assertEquals("split offset", 0, splitter.getOffset(0));
86         assertEquals("split offset", 4, splitter.getOffset(1));
87         assertEquals("split offset", 9, splitter.getOffset(2));
88         assertEquals("split offset", 15, splitter.getOffset(3));
89         assertEquals("split offset", 20, splitter.getOffset(4));
90         assertEquals("split offset", 22, splitter.getOffset(5));
91 
92         /*
93          * Test findLineIndex() for every character with an alternate
94          * computation that counts every LF.
95          */
96         for (int i = 0; i < splitter.originalLength(); ++i) {
97             char c = INPUT.charAt(i);
98             int li = splitter.findLineIndex(i);
99             long numLF = INPUT.substring(0, i + 1).chars().filter(ch ->
100                 ch == '\n').count();
101             long exp = numLF - (c == '\n' ? 1 : 0);
102             assertEquals("split find-index of " + i, exp, li);
103         }
104     }
105 
106     @Test
shouldHandleStreamedDocsOfLongerLength()107     public void shouldHandleStreamedDocsOfLongerLength() throws IOException {
108         //                                  0             0
109         //                    0-- -  5-- - -1--- - 5--- - 2-
110         final String INPUT = "ab\r\ncde\r\nefgh\r\nijk\r\nlm";
111         StreamSource src = StreamSource.fromString(INPUT);
112 
113         SourceSplitter splitter = new SourceSplitter();
114         splitter.reset(src);
115         assertEquals("split count", 5, splitter.count());
116         assertEquals("split offset", 0, splitter.getOffset(0));
117         assertEquals("split offset", 4, splitter.getOffset(1));
118         assertEquals("split offset", 9, splitter.getOffset(2));
119         assertEquals("split offset", 15, splitter.getOffset(3));
120         assertEquals("split offset", 20, splitter.getOffset(4));
121         assertEquals("split offset", 22, splitter.getOffset(5));
122 
123         /*
124          * Test findLineIndex() for every character with an alternate
125          * computation that counts every LF.
126          */
127         for (int i = 0; i < splitter.originalLength(); ++i) {
128             char c = INPUT.charAt(i);
129             int li = splitter.findLineIndex(i);
130             long numLF = INPUT.substring(0, i + 1).chars().filter(ch ->
131                 ch == '\n').count();
132             long exp = numLF - (c == '\n' ? 1 : 0);
133             assertEquals("split find-index of " + i, exp, li);
134         }
135     }
136 
137     @Test
shouldHandleInterspersedLineEndings()138     public void shouldHandleInterspersedLineEndings() throws IOException {
139         //                                    0                0
140         //                    0- -- -5 - -- - 1 - - - -5 -- - -2--
141         //                    0  1  2    3  4 5   6 7  8 9    0
142         //                                                    1
143         final String INPUT = "a\rb\nc\r\nd\r\r\r\n\re\n\rf\r\nghij";
144         StreamSource src = StreamSource.fromString(INPUT);
145 
146         SourceSplitter splitter = new SourceSplitter();
147         splitter.reset(src);
148         assertEquals("split count", 11, splitter.count());
149         assertEquals("split offset", 0, splitter.getOffset(0));
150         assertEquals("split offset", 2, splitter.getOffset(1));
151         assertEquals("split offset", 4, splitter.getOffset(2));
152         assertEquals("split offset", 7, splitter.getOffset(3));
153         assertEquals("split offset", 9, splitter.getOffset(4));
154         assertEquals("split offset", 10, splitter.getOffset(5));
155         assertEquals("split offset", 12, splitter.getOffset(6));
156         assertEquals("split offset", 13, splitter.getOffset(7));
157         assertEquals("split offset", 15, splitter.getOffset(8));
158         assertEquals("split offset", 16, splitter.getOffset(9));
159         assertEquals("split offset", 19, splitter.getOffset(10));
160         assertEquals("split offset", 23, splitter.getOffset(11));
161     }
162 }
163