1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>. 22 */ 23 24 package org.opengrok.indexer.util; 25 26 import static org.junit.Assert.assertEquals; 27 28 import java.io.IOException; 29 import org.junit.Test; 30 import org.opengrok.indexer.analysis.StreamSource; 31 32 /** 33 * Represents a container for tests of {@link SourceSplitter}. 34 */ 35 public class SourceSplitterTest { 36 37 @Test shouldSplitEmptyStringIntoOneLine()38 public void shouldSplitEmptyStringIntoOneLine() { 39 SourceSplitter splitter = new SourceSplitter(); 40 splitter.reset(""); 41 assertEquals("split count", 1, splitter.count()); 42 assertEquals("split offset", 0, splitter.getOffset(0)); 43 assertEquals("split offset", 0, splitter.getOffset(1)); 44 45 assertEquals("split find-index", 0, splitter.findLineIndex(0)); 46 assertEquals("split find-index", -1, splitter.findLineIndex(1)); 47 } 48 49 @Test shouldSplitEndingLFsIntoOneMoreLine()50 public void shouldSplitEndingLFsIntoOneMoreLine() { 51 SourceSplitter splitter = new SourceSplitter(); 52 splitter.reset("abc\ndef\n"); 53 assertEquals("split count", 3, splitter.count()); 54 assertEquals("split offset", 0, splitter.getOffset(0)); 55 assertEquals("split offset", 4, splitter.getOffset(1)); 56 assertEquals("split offset", 8, splitter.getOffset(2)); 57 assertEquals("split offset", 8, splitter.getOffset(3)); 58 } 59 60 @Test shouldSplitDocsWithNoLastLF()61 public void shouldSplitDocsWithNoLastLF() { 62 SourceSplitter splitter = new SourceSplitter(); 63 splitter.reset("abc\r\ndef"); 64 assertEquals("split count", 2, splitter.count()); 65 assertEquals("split offset", 0, splitter.getOffset(0)); 66 assertEquals("split offset", 5, splitter.getOffset(1)); 67 assertEquals("split offset", 8, splitter.getOffset(2)); 68 69 assertEquals("split find-index", 0, splitter.findLineIndex(0)); 70 assertEquals("split find-index", 0, splitter.findLineIndex(1)); 71 assertEquals("split find-index", 0, splitter.findLineIndex(4)); 72 assertEquals("split find-index", 1, splitter.findLineIndex(5)); 73 assertEquals("split find-index", 1, splitter.findLineIndex(6)); 74 } 75 76 @Test shouldHandleDocsOfLongerLength()77 public void shouldHandleDocsOfLongerLength() { 78 // 0 0 79 // 0-- - 5-- - -1--- - 5--- - 2- 80 final String INPUT = "ab\r\ncde\r\nefgh\r\nijk\r\nlm"; 81 82 SourceSplitter splitter = new SourceSplitter(); 83 splitter.reset(INPUT); 84 assertEquals("split count", 5, splitter.count()); 85 assertEquals("split offset", 0, splitter.getOffset(0)); 86 assertEquals("split offset", 4, splitter.getOffset(1)); 87 assertEquals("split offset", 9, splitter.getOffset(2)); 88 assertEquals("split offset", 15, splitter.getOffset(3)); 89 assertEquals("split offset", 20, splitter.getOffset(4)); 90 assertEquals("split offset", 22, splitter.getOffset(5)); 91 92 /* 93 * Test findLineIndex() for every character with an alternate 94 * computation that counts every LF. 95 */ 96 for (int i = 0; i < splitter.originalLength(); ++i) { 97 char c = INPUT.charAt(i); 98 int li = splitter.findLineIndex(i); 99 long numLF = INPUT.substring(0, i + 1).chars().filter(ch -> 100 ch == '\n').count(); 101 long exp = numLF - (c == '\n' ? 1 : 0); 102 assertEquals("split find-index of " + i, exp, li); 103 } 104 } 105 106 @Test shouldHandleStreamedDocsOfLongerLength()107 public void shouldHandleStreamedDocsOfLongerLength() throws IOException { 108 // 0 0 109 // 0-- - 5-- - -1--- - 5--- - 2- 110 final String INPUT = "ab\r\ncde\r\nefgh\r\nijk\r\nlm"; 111 StreamSource src = StreamSource.fromString(INPUT); 112 113 SourceSplitter splitter = new SourceSplitter(); 114 splitter.reset(src); 115 assertEquals("split count", 5, splitter.count()); 116 assertEquals("split offset", 0, splitter.getOffset(0)); 117 assertEquals("split offset", 4, splitter.getOffset(1)); 118 assertEquals("split offset", 9, splitter.getOffset(2)); 119 assertEquals("split offset", 15, splitter.getOffset(3)); 120 assertEquals("split offset", 20, splitter.getOffset(4)); 121 assertEquals("split offset", 22, splitter.getOffset(5)); 122 123 /* 124 * Test findLineIndex() for every character with an alternate 125 * computation that counts every LF. 126 */ 127 for (int i = 0; i < splitter.originalLength(); ++i) { 128 char c = INPUT.charAt(i); 129 int li = splitter.findLineIndex(i); 130 long numLF = INPUT.substring(0, i + 1).chars().filter(ch -> 131 ch == '\n').count(); 132 long exp = numLF - (c == '\n' ? 1 : 0); 133 assertEquals("split find-index of " + i, exp, li); 134 } 135 } 136 137 @Test shouldHandleInterspersedLineEndings()138 public void shouldHandleInterspersedLineEndings() throws IOException { 139 // 0 0 140 // 0- -- -5 - -- - 1 - - - -5 -- - -2-- 141 // 0 1 2 3 4 5 6 7 8 9 0 142 // 1 143 final String INPUT = "a\rb\nc\r\nd\r\r\r\n\re\n\rf\r\nghij"; 144 StreamSource src = StreamSource.fromString(INPUT); 145 146 SourceSplitter splitter = new SourceSplitter(); 147 splitter.reset(src); 148 assertEquals("split count", 11, splitter.count()); 149 assertEquals("split offset", 0, splitter.getOffset(0)); 150 assertEquals("split offset", 2, splitter.getOffset(1)); 151 assertEquals("split offset", 4, splitter.getOffset(2)); 152 assertEquals("split offset", 7, splitter.getOffset(3)); 153 assertEquals("split offset", 9, splitter.getOffset(4)); 154 assertEquals("split offset", 10, splitter.getOffset(5)); 155 assertEquals("split offset", 12, splitter.getOffset(6)); 156 assertEquals("split offset", 13, splitter.getOffset(7)); 157 assertEquals("split offset", 15, splitter.getOffset(8)); 158 assertEquals("split offset", 16, splitter.getOffset(9)); 159 assertEquals("split offset", 19, splitter.getOffset(10)); 160 assertEquals("split offset", 23, splitter.getOffset(11)); 161 } 162 } 163