1 /*******************************************************************************
2  * Copyright (c) 2000, 2018 IBM Corporation and others.
3  *
4  * This program and the accompanying materials
5  * are made available under the terms of the Eclipse Public License 2.0
6  * which accompanies this distribution, and is available at
7  * https://www.eclipse.org/legal/epl-2.0/
8  *
9  * SPDX-License-Identifier: EPL-2.0
10  *
11  * Contributors:
12  *     IBM Corporation - initial API and implementation
13  *******************************************************************************/
14 package org.eclipse.jdt.internal.core.index;
15 
16 import java.io.*;
17 import java.util.regex.Pattern;
18 
19 import org.eclipse.jdt.core.compiler.CharOperation;
20 import org.eclipse.jdt.core.search.*;
21 import org.eclipse.jdt.internal.core.util.*;
22 import org.eclipse.jdt.internal.compiler.util.HashtableOfIntValues;
23 import org.eclipse.jdt.internal.compiler.util.HashtableOfObject;
24 import org.eclipse.jdt.internal.compiler.util.SimpleLookupTable;
25 import org.eclipse.jdt.internal.compiler.util.SimpleSet;
26 import org.eclipse.jdt.internal.compiler.util.SimpleSetOfCharArray;
27 
28 public class DiskIndex {
29 
30 IndexLocation indexLocation;
31 
32 private int headerInfoOffset;
33 private int numberOfChunks;
34 private int sizeOfLastChunk;
35 private int[] chunkOffsets;
36 private int documentReferenceSize; // 1, 2 or more bytes... depends on # of document names
37 private int startOfCategoryTables;
38 private HashtableOfIntValues categoryOffsets, categoryEnds;
39 
40 private int cacheUserCount;
41 private String[][] cachedChunks; // decompressed chunks of document names
42 private HashtableOfObject categoryTables; // category name -> HashtableOfObject(words -> int[] of document #'s) or offset if not read yet
43 private char[] cachedCategoryName;
44 
45 private static final int DEFAULT_BUFFER_SIZE = 2048;
46 private static int BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE;
47 private static final int BUFFER_WRITE_SIZE = DEFAULT_BUFFER_SIZE;
48 private byte[] streamBuffer;
49 private int bufferIndex, bufferEnd; // used when reading from the file into the streamBuffer
50 private int streamEnd; // used when writing data from the streamBuffer to the file
51 char separator = Index.DEFAULT_SEPARATOR;
52 
53 public static final String SIGNATURE= "INDEX VERSION 1.131"; //$NON-NLS-1$
54 private static final char[] SIGNATURE_CHARS = SIGNATURE.toCharArray();
55 public static boolean DEBUG = false;
56 
57 private static final int RE_INDEXED = -1;
58 private static final int DELETED = -2;
59 
60 private static final int CHUNK_SIZE = 100;
61 
62 private static final SimpleSetOfCharArray INTERNED_CATEGORY_NAMES = new SimpleSetOfCharArray(20);
63 private static final String TMP_EXT = ".tmp"; //$NON-NLS-1$
64 
65 static class IntList {
66 
67 int size;
68 int[] elements;
69 
IntList(int[] elements)70 IntList(int[] elements) {
71 	this.elements = elements;
72 	this.size = elements.length;
73 }
add(int newElement)74 void add(int newElement) {
75 	if (this.size == this.elements.length) {
76 		int newSize = this.size * 3;
77 		if (newSize < 7) newSize = 7;
78 		System.arraycopy(this.elements, 0, this.elements = new int[newSize], 0, this.size);
79 	}
80 	this.elements[this.size++] = newElement;
81 }
asArray()82 int[] asArray() {
83 	int[] result = new int[this.size];
84 	System.arraycopy(this.elements, 0, result, 0, this.size);
85 	return result;
86 }
87 }
88 
89 
DiskIndex()90 DiskIndex() {
91 	this.headerInfoOffset = -1;
92 	this.numberOfChunks = -1;
93 	this.sizeOfLastChunk = -1;
94 	this.chunkOffsets = null;
95 	this.documentReferenceSize = -1;
96 	this.cacheUserCount = -1;
97 	this.cachedChunks = null;
98 	this.categoryTables = null;
99 	this.cachedCategoryName = null;
100 	this.categoryOffsets = null;
101 	this.categoryEnds = null;
102 }
DiskIndex(IndexLocation location)103 DiskIndex(IndexLocation location) throws IOException {
104 	this();
105 	if (location == null) {
106 		throw new IllegalArgumentException();
107 	}
108 	this.indexLocation = location;
109 }
addDocumentNames(String substring, MemoryIndex memoryIndex)110 SimpleSet addDocumentNames(String substring, MemoryIndex memoryIndex) throws IOException {
111 	// must skip over documents which have been added/changed/deleted in the memory index
112 	String[] docNames = readAllDocumentNames();
113 	SimpleSet results = new SimpleSet(docNames.length);
114 	if (substring == null) {
115 		if (memoryIndex == null) {
116 			for (int i = 0, l = docNames.length; i < l; i++)
117 				results.add(docNames[i]);
118 		} else {
119 			SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
120 			for (int i = 0, l = docNames.length; i < l; i++) {
121 				String docName = docNames[i];
122 				if (!docsToRefs.containsKey(docName))
123 					results.add(docName);
124 			}
125 		}
126 	} else {
127 		if (memoryIndex == null) {
128 			for (int i = 0, l = docNames.length; i < l; i++)
129 				if (docNames[i].startsWith(substring, 0))
130 					results.add(docNames[i]);
131 		} else {
132 			SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
133 			for (int i = 0, l = docNames.length; i < l; i++) {
134 				String docName = docNames[i];
135 				if (docName.startsWith(substring, 0) && !docsToRefs.containsKey(docName))
136 					results.add(docName);
137 			}
138 		}
139 	}
140 	return results;
141 }
addQueryResult(HashtableOfObject results, char[] word, Object docs, MemoryIndex memoryIndex, boolean prevResults)142 private HashtableOfObject addQueryResult(HashtableOfObject results, char[] word, Object docs, MemoryIndex memoryIndex, boolean prevResults) throws IOException {
143 	// must skip over documents which have been added/changed/deleted in the memory index
144 	if (results == null)
145 		results = new HashtableOfObject(13);
146 	EntryResult result = prevResults ? (EntryResult) results.get(word) : null;
147 	if (memoryIndex == null) {
148 		if (result == null)
149 			results.putUnsafely(word, new EntryResult(word, docs));
150 		else
151 			result.addDocumentTable(docs);
152 	} else {
153 		SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
154 		if (result == null) result = new EntryResult(word, null);
155 		int[] docNumbers = readDocumentNumbers(docs);
156 		for (int i = 0, l = docNumbers.length; i < l; i++) {
157 			String docName = readDocumentName(docNumbers[i]);
158 			if (!docsToRefs.containsKey(docName))
159 				result.addDocumentName(docName);
160 		}
161 		if (!result.isEmpty())
162 			results.put(word, result);
163 	}
164 	return results;
165 }
addQueryResults(char[][] categories, char[] key, int matchRule, MemoryIndex memoryIndex)166 HashtableOfObject addQueryResults(char[][] categories, char[] key, int matchRule, MemoryIndex memoryIndex) throws IOException {
167 	// assumes sender has called startQuery() & will call stopQuery() when finished
168 	if (this.categoryOffsets == null) return null; // file is empty
169 
170 	HashtableOfObject results = null; // initialized if needed
171 
172 	// No need to check the results table for duplicates while processing the
173 	// first category table or if the first category tables doesn't have any results.
174 	boolean prevResults = false;
175 	if (key == null) {
176 		for (int i = 0, l = categories.length; i < l; i++) {
177 			HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], true); // cache if key is null since its a definite match
178 			if (wordsToDocNumbers != null) {
179 				char[][] words = wordsToDocNumbers.keyTable;
180 				Object[] values = wordsToDocNumbers.valueTable;
181 				if (results == null)
182 					results = new HashtableOfObject(wordsToDocNumbers.elementSize);
183 				for (int j = 0, m = words.length; j < m; j++)
184 					if (words[j] != null)
185 						results = addQueryResult(results, words[j], values[j], memoryIndex, prevResults);
186 			}
187 			prevResults = results != null;
188 		}
189 		if (results != null && this.cachedChunks == null)
190 			cacheDocumentNames();
191 	} else {
192 		switch (matchRule) {
193 			case SearchPattern.R_EXACT_MATCH | SearchPattern.R_CASE_SENSITIVE:
194 				for (int i = 0, l = categories.length; i < l; i++) {
195 					HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
196 					Object value;
197 					if (wordsToDocNumbers != null && (value = wordsToDocNumbers.get(key)) != null)
198 						results = addQueryResult(results, key, value, memoryIndex, prevResults);
199 					prevResults = results != null;
200 				}
201 				break;
202 			case SearchPattern.R_PREFIX_MATCH | SearchPattern.R_CASE_SENSITIVE:
203 				for (int i = 0, l = categories.length; i < l; i++) {
204 					HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
205 					if (wordsToDocNumbers != null) {
206 						char[][] words = wordsToDocNumbers.keyTable;
207 						Object[] values = wordsToDocNumbers.valueTable;
208 						for (int j = 0, m = words.length; j < m; j++) {
209 							char[] word = words[j];
210 							if (word != null && key[0] == word[0] && CharOperation.prefixEquals(key, word))
211 								results = addQueryResult(results, word, values[j], memoryIndex, prevResults);
212 						}
213 					}
214 					prevResults = results != null;
215 				}
216 				break;
217 			case SearchPattern.R_REGEXP_MATCH:
218 				Pattern pattern = Pattern.compile(new String(key));
219 				for (int i = 0, l = categories.length; i < l; i++) {
220 					HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
221 					if (wordsToDocNumbers != null) {
222 						char[][] words = wordsToDocNumbers.keyTable;
223 						Object[] values = wordsToDocNumbers.valueTable;
224 						for (int j = 0, m = words.length; j < m; j++) {
225 							char[] word = words[j];
226 							if (word != null && pattern.matcher(new String(word)).matches())
227 								results = addQueryResult(results, word, values[j], memoryIndex, prevResults);
228 						}
229 					}
230 					prevResults = results != null;
231 				}
232 				break;
233 			default:
234 				for (int i = 0, l = categories.length; i < l; i++) {
235 					HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
236 					if (wordsToDocNumbers != null) {
237 						char[][] words = wordsToDocNumbers.keyTable;
238 						Object[] values = wordsToDocNumbers.valueTable;
239 						for (int j = 0, m = words.length; j < m; j++) {
240 							char[] word = words[j];
241 							if (word != null && Index.isMatch(key, word, matchRule))
242 								results = addQueryResult(results, word, values[j], memoryIndex, prevResults);
243 						}
244 					}
245 					prevResults = results != null;
246 				}
247 		}
248 	}
249 
250 	return results;
251 }
cacheDocumentNames()252 private void cacheDocumentNames() throws IOException {
253 	// will need all document names so get them now
254 	this.cachedChunks = new String[this.numberOfChunks][];
255 	InputStream stream = this.indexLocation.getInputStream();
256 	try {
257 		if (this.numberOfChunks > 5) BUFFER_READ_SIZE <<= 1;
258 		int offset = this.chunkOffsets[0];
259 		stream.skip(offset);
260 		this.streamBuffer = new byte[BUFFER_READ_SIZE];
261 		this.bufferIndex = 0;
262 		this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
263 		for (int i = 0; i < this.numberOfChunks; i++) {
264 			int size = i == this.numberOfChunks - 1 ? this.sizeOfLastChunk : CHUNK_SIZE;
265 			readChunk(this.cachedChunks[i] = new String[size], stream, 0, size);
266 		}
267 	} catch (IOException e) {
268 		this.cachedChunks = null;
269 		throw e;
270 	} finally {
271 		stream.close();
272 		this.indexLocation.close();
273 		this.streamBuffer = null;
274 		BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE;
275 	}
276 }
computeDocumentNames(String[] onDiskNames, int[] positions, SimpleLookupTable indexedDocuments, MemoryIndex memoryIndex)277 private String[] computeDocumentNames(String[] onDiskNames, int[] positions, SimpleLookupTable indexedDocuments, MemoryIndex memoryIndex) {
278 	int onDiskLength = onDiskNames.length;
279 	Object[] docNames = memoryIndex.docsToReferences.keyTable;
280 	Object[] referenceTables = memoryIndex.docsToReferences.valueTable;
281 	if (onDiskLength == 0) {
282 		// disk index was empty, so add every indexed document
283 		for (int i = 0, l = referenceTables.length; i < l; i++)
284 			if (referenceTables[i] != null)
285 				indexedDocuments.put(docNames[i], null); // remember each new document
286 
287 		String[] newDocNames = new String[indexedDocuments.elementSize];
288 		int count = 0;
289 		Object[] added = indexedDocuments.keyTable;
290 		for (int i = 0, l = added.length; i < l; i++)
291 			if (added[i] != null)
292 				newDocNames[count++] = (String) added[i];
293 		Util.sort(newDocNames);
294 		for (int i = 0, l = newDocNames.length; i < l; i++)
295 			indexedDocuments.put(newDocNames[i], Integer.valueOf(i));
296 		return newDocNames;
297 	}
298 
299 	// initialize positions as if each document will remain in the same position
300 	for (int i = 0; i < onDiskLength; i++)
301 		positions[i] = i;
302 
303 	// find out if the memory index has any new or deleted documents, if not then the names & positions are the same
304 	int numDeletedDocNames = 0;
305 	nextPath : for (int i = 0, l = docNames.length; i < l; i++) {
306 		String docName = (String) docNames[i];
307 		if (docName != null) {
308 			for (int j = 0; j < onDiskLength; j++) {
309 				if (docName.equals(onDiskNames[j])) {
310 					if (referenceTables[i] == null) {
311 						positions[j] = DELETED;
312 						numDeletedDocNames++;
313 					} else {
314 						positions[j] = RE_INDEXED;
315 					}
316 					continue nextPath;
317 				}
318 			}
319 			if (referenceTables[i] != null)
320 				indexedDocuments.put(docName, null); // remember each new document, skip deleted documents which were never saved
321 		}
322 	}
323 
324 	String[] newDocNames = onDiskNames;
325 	if (numDeletedDocNames > 0 || indexedDocuments.elementSize > 0) {
326 		// some new documents have been added or some old ones deleted
327 		newDocNames = new String[onDiskLength + indexedDocuments.elementSize - numDeletedDocNames];
328 		int count = 0;
329 		for (int i = 0; i < onDiskLength; i++)
330 			if (positions[i] >= RE_INDEXED)
331 				newDocNames[count++] = onDiskNames[i]; // keep each unchanged document
332 		Object[] added = indexedDocuments.keyTable;
333 		for (int i = 0, l = added.length; i < l; i++)
334 			if (added[i] != null)
335 				newDocNames[count++] = (String) added[i]; // add each new document
336 		Util.sort(newDocNames);
337 		for (int i = 0, l = newDocNames.length; i < l; i++)
338 			if (indexedDocuments.containsKey(newDocNames[i]))
339 				indexedDocuments.put(newDocNames[i], Integer.valueOf(i)); // remember the position for each new document
340 	}
341 
342 	// need to be able to look up an old position (ref# from a ref[]) and map it to its new position
343 	// if its old position == DELETED then its forgotton
344 	// if its old position == ReINDEXED then its also forgotten but its new position is needed to map references
345 	int count = -1;
346 	for (int i = 0; i < onDiskLength;) {
347 		switch(positions[i]) {
348 			case DELETED :
349 				i++; // skip over deleted... references are forgotten
350 				break;
351 			case RE_INDEXED :
352 				String newName = newDocNames[++count];
353 				if (newName.equals(onDiskNames[i])) {
354 					indexedDocuments.put(newName, Integer.valueOf(count)); // the reindexed docName that was at position i is now at position count
355 					i++;
356 				}
357 				break;
358 			default :
359 				if (newDocNames[++count].equals(onDiskNames[i]))
360 					positions[i++] = count; // the unchanged docName that was at position i is now at position count
361 		}
362 	}
363 	return newDocNames;
364 }
copyQueryResults(HashtableOfObject categoryToWords, int newPosition)365 private void copyQueryResults(HashtableOfObject categoryToWords, int newPosition) {
366 	char[][] categoryNames = categoryToWords.keyTable;
367 	Object[] wordSets = categoryToWords.valueTable;
368 	for (int i = 0, l = categoryNames.length; i < l; i++) {
369 		char[] categoryName = categoryNames[i];
370 		if (categoryName != null) {
371 			SimpleWordSet wordSet = (SimpleWordSet) wordSets[i];
372 			HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName);
373 			if (wordsToDocs == null)
374 				this.categoryTables.put(categoryName, wordsToDocs = new HashtableOfObject(wordSet.elementSize));
375 
376 			char[][] words = wordSet.words;
377 			for (int j = 0, m = words.length; j < m; j++) {
378 				char[] word = words[j];
379 				if (word != null) {
380 					Object o = wordsToDocs.get(word);
381 					if (o == null) {
382 						wordsToDocs.putUnsafely(word, new int[] {newPosition});
383 					} else if (o instanceof IntList) {
384 						((IntList) o).add(newPosition);
385 					} else {
386 						IntList list = new IntList((int[]) o);
387 						list.add(newPosition);
388 						wordsToDocs.put(word, list);
389 					}
390 				}
391 			}
392 		}
393 	}
394 }
initialize(boolean reuseExistingFile)395 void initialize(boolean reuseExistingFile) throws IOException {
396 	if (this.indexLocation.exists()) {
397 		if (reuseExistingFile) {
398 			InputStream stream = this.indexLocation.getInputStream();
399 			if (stream == null) {
400 				throw new IOException("Failed to use the index file"); //$NON-NLS-1$
401 			}
402 			this.streamBuffer = new byte[BUFFER_READ_SIZE];
403 			this.bufferIndex = 0;
404 			this.bufferEnd = stream.read(this.streamBuffer, 0, 128);
405 			try {
406 				char[] signature = readStreamChars(stream);
407 				if (!CharOperation.equals(signature, SIGNATURE_CHARS)) {
408 					throw new IOException(Messages.exception_wrongFormat);
409 				}
410 				this.headerInfoOffset = readStreamInt(stream);
411 				if (this.headerInfoOffset > 0) { // file is empty if its not set
412 					stream.skip(this.headerInfoOffset - this.bufferEnd); // assume that the header info offset is over current buffer end
413 					this.bufferIndex = 0;
414 					this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
415 					readHeaderInfo(stream);
416 				}
417 			} finally {
418 				stream.close();
419 				this.indexLocation.close();
420 			}
421 			return;
422 		}
423 		if (!this.indexLocation.delete()) {
424 			if (DEBUG)
425 				System.out.println("initialize - Failed to delete index " + this.indexLocation); //$NON-NLS-1$
426 			throw new IOException("Failed to delete index " + this.indexLocation); //$NON-NLS-1$
427 		}
428 	}
429 	if (this.indexLocation.createNewFile()) {
430 		FileOutputStream stream = new FileOutputStream(this.indexLocation.getIndexFile(), false);
431 		try {
432 			this.streamBuffer = new byte[BUFFER_READ_SIZE];
433 			this.bufferIndex = 0;
434 			writeStreamChars(stream, SIGNATURE_CHARS);
435 			writeStreamInt(stream, -1); // file is empty
436 			// write the buffer to the stream
437 			if (this.bufferIndex > 0) {
438 				stream.write(this.streamBuffer, 0, this.bufferIndex);
439 				this.bufferIndex = 0;
440 			}
441 		} finally {
442 			stream.close();
443 		}
444 	} else {
445 		if (DEBUG)
446 			System.out.println("initialize - Failed to create new index " + this.indexLocation); //$NON-NLS-1$
447 		throw new IOException("Failed to create new index " + this.indexLocation); //$NON-NLS-1$
448 	}
449 }
initializeFrom(DiskIndex diskIndex, File newIndexFile)450 private void initializeFrom(DiskIndex diskIndex, File newIndexFile) throws IOException {
451 	if (newIndexFile.exists() && !newIndexFile.delete()) { // delete the temporary index file
452 		if (DEBUG)
453 			System.out.println("initializeFrom - Failed to delete temp index " + this.indexLocation); //$NON-NLS-1$
454 	} else if (!newIndexFile.createNewFile()) {
455 		if (DEBUG)
456 			System.out.println("initializeFrom - Failed to create temp index " + this.indexLocation); //$NON-NLS-1$
457 		throw new IOException("Failed to create temp index " + this.indexLocation); //$NON-NLS-1$
458 	}
459 
460 	int size = diskIndex.categoryOffsets == null ? 8 : diskIndex.categoryOffsets.elementSize;
461 	this.categoryOffsets = new HashtableOfIntValues(size);
462 	this.categoryEnds = new HashtableOfIntValues(size);
463 	this.categoryTables = new HashtableOfObject(size);
464 	this.separator = diskIndex.separator;
465 }
mergeCategories(DiskIndex onDisk, int[] positions, FileOutputStream stream)466 private void mergeCategories(DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException {
467 	// at this point, this.categoryTables contains the names -> wordsToDocs added in copyQueryResults()
468 	char[][] oldNames = onDisk.categoryOffsets.keyTable;
469 	for (int i = 0, l = oldNames.length; i < l; i++) {
470 		char[] oldName = oldNames[i];
471 		if (oldName != null && !this.categoryTables.containsKey(oldName))
472 			this.categoryTables.put(oldName, null);
473 	}
474 
475 	char[][] categoryNames = this.categoryTables.keyTable;
476 	for (int i = 0, l = categoryNames.length; i < l; i++)
477 		if (categoryNames[i] != null)
478 			mergeCategory(categoryNames[i], onDisk, positions, stream);
479 	this.categoryTables = null;
480 }
mergeCategory(char[] categoryName, DiskIndex onDisk, int[] positions, FileOutputStream stream)481 private void mergeCategory(char[] categoryName, DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException {
482 	HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName);
483 	if (wordsToDocs == null)
484 		wordsToDocs = new HashtableOfObject(3);
485 
486 	HashtableOfObject oldWordsToDocs = onDisk.readCategoryTable(categoryName, true);
487 	if (oldWordsToDocs != null) {
488 		char[][] oldWords = oldWordsToDocs.keyTable;
489 		Object[] oldArrayOffsets = oldWordsToDocs.valueTable;
490 		nextWord: for (int i = 0, l = oldWords.length; i < l; i++) {
491 			char[] oldWord = oldWords[i];
492 			if (oldWord != null) {
493 				int[] oldDocNumbers = (int[]) oldArrayOffsets[i];
494 				int length = oldDocNumbers.length;
495 				int[] mappedNumbers = new int[length];
496 				int count = 0;
497 				for (int j = 0; j < length; j++) {
498 					int pos = positions[oldDocNumbers[j]];
499 					if (pos > RE_INDEXED) // forget any reference to a document which was deleted or re_indexed
500 						mappedNumbers[count++] = pos;
501 				}
502 				if (count < length) {
503 					if (count == 0) continue nextWord; // skip words which no longer have any references
504 					System.arraycopy(mappedNumbers, 0, mappedNumbers = new int[count], 0, count);
505 				}
506 
507 				Object o = wordsToDocs.get(oldWord);
508 				if (o == null) {
509 					wordsToDocs.putUnsafely(oldWord, mappedNumbers);
510 				} else {
511 					IntList list = null;
512 					if (o instanceof IntList) {
513 						list = (IntList) o;
514 					} else {
515 						list = new IntList((int[]) o);
516 						wordsToDocs.put(oldWord, list);
517 					}
518 					for (int j = 0; j < count; j++)
519 						list.add(mappedNumbers[j]);
520 				}
521 			}
522 		}
523 		onDisk.categoryTables.put(categoryName, null); // flush cached table
524 	}
525 	writeCategoryTable(categoryName, wordsToDocs, stream);
526 }
mergeWith(MemoryIndex memoryIndex)527 DiskIndex mergeWith(MemoryIndex memoryIndex) throws IOException {
528  	// assume write lock is held
529 	// compute & write out new docNames
530 	if (this.indexLocation == null) {
531 		throw new IOException("Pre-built index file not writeable");  //$NON-NLS-1$
532 	}
533 	String[] docNames = readAllDocumentNames();
534 	int previousLength = docNames.length;
535 	int[] positions = new int[previousLength]; // keeps track of the position of each document in the new sorted docNames
536 	SimpleLookupTable indexedDocuments = new SimpleLookupTable(3); // for each new/changed document in the memoryIndex
537 	docNames = computeDocumentNames(docNames, positions, indexedDocuments, memoryIndex);
538 	if (docNames.length == 0) {
539 		if (previousLength == 0) return this; // nothing to do... memory index contained deleted documents that had never been saved
540 
541 		// index is now empty since all the saved documents were removed
542 		DiskIndex newDiskIndex = new DiskIndex(this.indexLocation);
543 		newDiskIndex.initialize(false);
544 		return newDiskIndex;
545 	}
546 	boolean usingTmp = false;
547 	File oldIndexFile = this.indexLocation.getIndexFile();
548 	String indexFilePath = oldIndexFile.getPath();
549 	if (indexFilePath.endsWith(TMP_EXT)) { // the tmp file could not be renamed last time
550 		indexFilePath = indexFilePath.substring(0, indexFilePath.length()-TMP_EXT.length());
551 		usingTmp = true;
552 	} else {
553 		indexFilePath += TMP_EXT;
554 	}
555 	DiskIndex newDiskIndex = new DiskIndex(new FileIndexLocation(new File(indexFilePath)));
556 	File newIndexFile = newDiskIndex.indexLocation.getIndexFile();
557 	try {
558 		newDiskIndex.initializeFrom(this, newIndexFile);
559 		FileOutputStream stream = new FileOutputStream(newIndexFile, false);
560 		int offsetToHeader = -1;
561 		try {
562 			newDiskIndex.writeAllDocumentNames(docNames, stream);
563 			docNames = null; // free up the space
564 
565 			// add each new/changed doc to empty category tables using its new position #
566 			if (indexedDocuments.elementSize > 0) {
567 				Object[] names = indexedDocuments.keyTable;
568 				Object[] integerPositions = indexedDocuments.valueTable;
569 				for (int i = 0, l = names.length; i < l; i++)
570 					if (names[i] != null)
571 						newDiskIndex.copyQueryResults(
572 							(HashtableOfObject) memoryIndex.docsToReferences.get(names[i]), ((Integer) integerPositions[i]).intValue());
573 			}
574 			indexedDocuments = null; // free up the space
575 
576 			// merge each category table with the new ones & write them out
577 			if (previousLength == 0)
578 				newDiskIndex.writeCategories(stream);
579 			else
580 				newDiskIndex.mergeCategories(this, positions, stream);
581 			offsetToHeader = newDiskIndex.streamEnd;
582 			newDiskIndex.writeHeaderInfo(stream);
583 			positions = null; // free up the space
584 		} finally {
585 			stream.close();
586 			this.streamBuffer = null;
587 		}
588 		newDiskIndex.writeOffsetToHeader(offsetToHeader);
589 
590 		// rename file by deleting previous index file & renaming temp one
591 		if (oldIndexFile.exists() && !oldIndexFile.delete()) {
592 			if (DEBUG)
593 				System.out.println("mergeWith - Failed to delete " + this.indexLocation); //$NON-NLS-1$
594 			throw new IOException("Failed to delete index file " + this.indexLocation); //$NON-NLS-1$
595 		}
596 		if (!usingTmp && !newIndexFile.renameTo(oldIndexFile)) {
597 			// try again after waiting for two milli secs
598 			try {
599 				Thread.sleep(2);
600 			} catch (InterruptedException e) {
601 				//ignore
602 			}
603 			if (!newIndexFile.renameTo(oldIndexFile)) {
604 				if (DEBUG)
605 					System.out.println("mergeWith - Failed to rename " + this.indexLocation); //$NON-NLS-1$
606 				usingTmp = true;
607 			}
608 		}
609 	} catch (IOException e) {
610 		if (newIndexFile.exists() && !newIndexFile.delete())
611 			if (DEBUG)
612 				System.out.println("mergeWith - Failed to delete temp index " + newDiskIndex.indexLocation); //$NON-NLS-1$
613 		throw e;
614 	}
615 
616 	if (!usingTmp) // rename done, use the new file
617 		newDiskIndex.indexLocation = this.indexLocation;
618 	return newDiskIndex;
619 }
readAllDocumentNames()620 private synchronized String[] readAllDocumentNames() throws IOException {
621 	if (this.numberOfChunks <= 0)
622 		return CharOperation.NO_STRINGS;
623 
624 	InputStream stream = this.indexLocation.getInputStream();
625 	try {
626 		int offset = this.chunkOffsets[0];
627 		stream.skip(offset);
628 		this.streamBuffer = new byte[BUFFER_READ_SIZE];
629 		this.bufferIndex = 0;
630 		this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
631 		int lastIndex = this.numberOfChunks - 1;
632 		String[] docNames = new String[lastIndex * CHUNK_SIZE + this.sizeOfLastChunk];
633 		for (int i = 0; i < this.numberOfChunks; i++)
634 			readChunk(docNames, stream, i * CHUNK_SIZE, i < lastIndex ? CHUNK_SIZE : this.sizeOfLastChunk);
635 		return docNames;
636 	} finally {
637 		stream.close();
638 		this.indexLocation.close();
639 		this.streamBuffer = null;
640 	}
641 }
readCategoryTable(char[] categoryName, boolean readDocNumbers)642 private synchronized HashtableOfObject readCategoryTable(char[] categoryName, boolean readDocNumbers) throws IOException {
643 	// result will be null if categoryName is unknown
644 	int offset = this.categoryOffsets.get(categoryName);
645 	if (offset == HashtableOfIntValues.NO_VALUE) {
646 		return null;
647 	}
648 
649 	if (this.categoryTables == null) {
650 		this.categoryTables = new HashtableOfObject(3);
651 	} else {
652 		HashtableOfObject cachedTable = (HashtableOfObject) this.categoryTables.get(categoryName);
653 		if (cachedTable != null) {
654 			if (readDocNumbers) { // must cache remaining document number arrays
655 				Object[] arrayOffsets = cachedTable.valueTable;
656 				for (int i = 0, l = arrayOffsets.length; i < l; i++)
657 					if (arrayOffsets[i] instanceof Integer)
658 						arrayOffsets[i] = readDocumentNumbers(arrayOffsets[i]);
659 			}
660 			return cachedTable;
661 		}
662 	}
663 
664 	InputStream stream = this.indexLocation.getInputStream();
665 	HashtableOfObject categoryTable = null;
666 	char[][] matchingWords = null;
667 	int count = 0;
668 	int firstOffset = -1;
669 	this.streamBuffer = new byte[BUFFER_READ_SIZE];
670 	try {
671 		stream.skip(offset);
672 		this.bufferIndex = 0;
673 		this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
674 		int size = readStreamInt(stream);
675 		try {
676 			if (size < 0) { // DEBUG
677 				System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$
678 				System.err.println("file = "+this.indexLocation); //$NON-NLS-1$
679 				System.err.println("offset = "+offset); //$NON-NLS-1$
680 				System.err.println("size = "+size); //$NON-NLS-1$
681 				System.err.println("--------------------   END   --------------------"); //$NON-NLS-1$
682 			}
683 			categoryTable = new HashtableOfObject(size);
684 		} catch (OutOfMemoryError oom) {
685 			// DEBUG
686 			oom.printStackTrace();
687 			System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$
688 			System.err.println("file = "+this.indexLocation); //$NON-NLS-1$
689 			System.err.println("offset = "+offset); //$NON-NLS-1$
690 			System.err.println("size = "+size); //$NON-NLS-1$
691 			System.err.println("--------------------   END   --------------------"); //$NON-NLS-1$
692 			throw oom;
693 		}
694 		int largeArraySize = 256;
695 		for (int i = 0; i < size; i++) {
696 			char[] word = readStreamChars(stream);
697 			int arrayOffset = readStreamInt(stream);
698 			// if arrayOffset is:
699 			//		<= 0 then the array size == 1 with the value -> -arrayOffset
700 			//		> 1 & < 256 then the size of the array is > 1 & < 256, the document array follows immediately
701 			//		256 if the array size >= 256 followed by another int which is the offset to the array (written prior to the table)
702 			if (arrayOffset <= 0) {
703 				categoryTable.putUnsafely(word, new int[] {-arrayOffset}); // store 1 element array by negating documentNumber
704 			} else if (arrayOffset < largeArraySize) {
705 				categoryTable.putUnsafely(word, readStreamDocumentArray(stream, arrayOffset)); // read in-lined array providing size
706 			} else {
707 				arrayOffset = readStreamInt(stream); // read actual offset
708 				if (readDocNumbers) {
709 					if (matchingWords == null)
710 						matchingWords = new char[size][];
711 					if (count == 0)
712 						firstOffset = arrayOffset;
713 					matchingWords[count++] = word;
714 				}
715 				categoryTable.putUnsafely(word, Integer.valueOf(arrayOffset)); // offset to array in the file
716 			}
717 		}
718 		this.categoryTables.put(INTERNED_CATEGORY_NAMES.get(categoryName), categoryTable);
719 		// cache the table as long as its not too big
720 		// in practice, some tables can be greater than 500K when they contain more than 10K elements
721 		this.cachedCategoryName = categoryTable.elementSize < 20000 ? categoryName : null;
722 	} catch (IOException ioe) {
723 		this.streamBuffer = null;
724 		throw ioe;
725 	} finally {
726 		stream.close();
727 		this.indexLocation.close();
728 	}
729 
730 	if (matchingWords != null && count > 0) {
731 		stream = this.indexLocation.getInputStream();
732 		try {
733 			stream.skip(firstOffset);
734 			this.bufferIndex = 0;
735 			this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
736 			for (int i = 0; i < count; i++) { // each array follows the previous one
737 				categoryTable.put(matchingWords[i], readStreamDocumentArray(stream, readStreamInt(stream)));
738 			}
739 		} catch (IOException ioe) {
740 			this.streamBuffer = null;
741 			throw ioe;
742 		} finally {
743 			stream.close();
744 			this.indexLocation.close();
745 		}
746 	}
747 	this.streamBuffer = null;
748 	return categoryTable;
749 }
readChunk(String[] docNames, InputStream stream, int index, int size)750 private void readChunk(String[] docNames, InputStream stream, int index, int size) throws IOException {
751 	String current = new String(readStreamChars(stream));
752 	docNames[index++] = current;
753 	for (int i = 1; i < size; i++) {
754 		if (stream != null && this.bufferIndex + 2 >= this.bufferEnd)
755 			readStreamBuffer(stream);
756 		int start = this.streamBuffer[this.bufferIndex++] & 0xFF;
757 		int end = this.streamBuffer[this.bufferIndex++] & 0xFF;
758 		String next  = new String(readStreamChars(stream));
759 		if (start > 0) {
760 			if (end > 0) {
761 				int length = current.length();
762 				next = current.substring(0, start) + next + current.substring(length - end, length);
763 			} else {
764 				next = current.substring(0, start) + next;
765 			}
766 		} else if (end > 0) {
767 			int length = current.length();
768 			next = next + current.substring(length - end, length);
769 		}
770 		docNames[index++] = next;
771 		current = next;
772 	}
773 }
readDocumentName(int docNumber)774 synchronized String readDocumentName(int docNumber) throws IOException {
775 	if (this.cachedChunks == null)
776 		this.cachedChunks = new String[this.numberOfChunks][];
777 
778 	int chunkNumber = docNumber / CHUNK_SIZE;
779 	String[] chunk = this.cachedChunks[chunkNumber];
780 	if (chunk == null) {
781 		boolean isLastChunk = chunkNumber == this.numberOfChunks - 1;
782 		int start = this.chunkOffsets[chunkNumber];
783 		int numberOfBytes = (isLastChunk ? this.startOfCategoryTables : this.chunkOffsets[chunkNumber + 1]) - start;
784 		if (numberOfBytes < 0)
785 			throw new IllegalArgumentException();
786 		this.streamBuffer = new byte[numberOfBytes];
787 		this.bufferIndex = 0;
788 		InputStream file = this.indexLocation.getInputStream();
789 		try {
790 			file.skip(start);
791 			if (file.read(this.streamBuffer, 0, numberOfBytes) != numberOfBytes)
792 				throw new IOException();
793 		} catch (IOException ioe) {
794 			this.streamBuffer = null;
795 			throw ioe;
796 		} finally {
797 			file.close();
798 			this.indexLocation.close();
799 		}
800 		int numberOfNames = isLastChunk ? this.sizeOfLastChunk : CHUNK_SIZE;
801 		chunk = new String[numberOfNames];
802 		try {
803 			readChunk(chunk, null, 0, numberOfNames);
804 		} catch (IOException ioe) {
805 			this.streamBuffer = null;
806 			throw ioe;
807 		}
808 		this.cachedChunks[chunkNumber] = chunk;
809 	}
810 	this.streamBuffer = null;
811 	return chunk[docNumber - (chunkNumber * CHUNK_SIZE)];
812 }
readDocumentNumbers(Object arrayOffset)813 synchronized int[] readDocumentNumbers(Object arrayOffset) throws IOException {
814 	// arrayOffset is either a cached array of docNumbers or an Integer offset in the file
815 	if (arrayOffset instanceof int[])
816 		return (int[]) arrayOffset;
817 
818 	InputStream stream = this.indexLocation.getInputStream();
819 	try {
820 		int offset = ((Integer) arrayOffset).intValue();
821 		stream.skip(offset);
822 		this.streamBuffer = new byte[BUFFER_READ_SIZE];
823 		this.bufferIndex = 0;
824 		this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
825 		return readStreamDocumentArray(stream, readStreamInt(stream));
826 	} finally {
827 		stream.close();
828 		this.indexLocation.close();
829 		this.streamBuffer = null;
830 	}
831 }
readHeaderInfo(InputStream stream)832 private void readHeaderInfo(InputStream stream) throws IOException {
833 
834 	// must be same order as writeHeaderInfo()
835 	this.numberOfChunks = readStreamInt(stream);
836 	this.sizeOfLastChunk = this.streamBuffer[this.bufferIndex++] & 0xFF;
837 	this.documentReferenceSize = this.streamBuffer[this.bufferIndex++] & 0xFF;
838 	this.separator = (char) (this.streamBuffer[this.bufferIndex++] & 0xFF);
839 	long length = this.indexLocation.length();
840 	if (length != -1 && this.numberOfChunks > length) {
841 		// not an accurate check, but good enough https://bugs.eclipse.org/bugs/show_bug.cgi?id=350612
842 		if (DEBUG)
843 			System.out.println("Index file is corrupted " + this.indexLocation); //$NON-NLS-1$
844 		throw new IOException("Index file is corrupted " + this.indexLocation); //$NON-NLS-1$
845 	}
846 	this.chunkOffsets = new int[this.numberOfChunks];
847 	for (int i = 0; i < this.numberOfChunks; i++)
848 		this.chunkOffsets[i] = readStreamInt(stream);
849 
850 	this.startOfCategoryTables = readStreamInt(stream);
851 
852 	int size = readStreamInt(stream);
853 	this.categoryOffsets = new HashtableOfIntValues(size);
854 	this.categoryEnds = new HashtableOfIntValues(size);
855 	if (length != -1 && size > length) {
856 		//  not an accurate check, but good enough  https://bugs.eclipse.org/bugs/show_bug.cgi?id=350612
857 		if (DEBUG)
858 			System.out.println("Index file is corrupted " + this.indexLocation); //$NON-NLS-1$
859 		throw new IOException("Index file is corrupted " + this.indexLocation); //$NON-NLS-1$
860 	}
861 	char[] previousCategory = null;
862 	int offset = -1;
863 	for (int i = 0; i < size; i++) {
864 		char[] categoryName = INTERNED_CATEGORY_NAMES.get(readStreamChars(stream));
865 		offset = readStreamInt(stream);
866 		this.categoryOffsets.put(categoryName, offset); // cache offset to category table
867 		if (previousCategory != null) {
868 			this.categoryEnds.put(previousCategory, offset); // cache end of the category table
869 		}
870 		previousCategory = categoryName;
871 	}
872 	if (previousCategory != null) {
873 		this.categoryEnds.put(previousCategory, this.headerInfoOffset); // cache end of the category table
874 	}
875 	this.categoryTables = new HashtableOfObject(3);
876 }
startQuery()877 synchronized void startQuery() {
878 	this.cacheUserCount++;
879 }
stopQuery()880 synchronized void stopQuery() {
881 	if (--this.cacheUserCount < 0) {
882 		// clear cached items
883 		this.cacheUserCount = -1;
884 		this.cachedChunks = null;
885 		if (this.categoryTables != null) {
886 			if (this.cachedCategoryName == null) {
887 				this.categoryTables = null;
888 			} else if (this.categoryTables.elementSize > 1) {
889 				HashtableOfObject newTables = new HashtableOfObject(3);
890 				newTables.put(this.cachedCategoryName, this.categoryTables.get(this.cachedCategoryName));
891 				this.categoryTables = newTables;
892 			}
893 		}
894 	}
895 }
readStreamBuffer(InputStream stream)896 private void readStreamBuffer(InputStream stream) throws IOException {
897 	// if we're about to read a known amount at the end of the existing buffer, but it does not completely fit
898 	// so we need to shift the remaining bytes to be read, and fill the buffer from the stream
899 	if (this.bufferEnd < this.streamBuffer.length) {
900 		if (stream.available() == 0)
901 			return; // we're at the end of the stream - nothing left to read
902 	}
903 
904 	int bytesInBuffer = this.bufferEnd - this.bufferIndex;
905 	if (bytesInBuffer > 0)
906 		System.arraycopy(this.streamBuffer, this.bufferIndex, this.streamBuffer, 0, bytesInBuffer);
907 	this.bufferEnd = bytesInBuffer + stream.read(this.streamBuffer, bytesInBuffer, this.bufferIndex);
908 	this.bufferIndex = 0;
909 }
910 /**
911  * Reads in a string from the specified data input stream. The
912  * string has been encoded using a modified UTF-8 format.
913  * <p>
914  * The first two bytes are read as an unsigned short.
915  * This value gives the number of following bytes that are in the encoded string,
916  * not the length of the resulting string. The following bytes are then
917  * interpreted as bytes encoding characters in the UTF-8 format
918  * and are converted into characters.
919  * <p>
920  * This method blocks until all the bytes are read, the end of the
921  * stream is detected, or an exception is thrown.
922  *
923  * @param      stream   a data input stream.
924  * @return     UTF decoded string as a char array
925  * @exception  EOFException if this end of data input is reached while reading it.
926  * @exception  IOException if an I/O error occurs while reading data input.
927  * @exception  UTFDataFormatException  if the bytes do not represent a
928  *               valid UTF-8 encoding of a Unicode string.
929  */
readStreamChars(InputStream stream)930 private char[] readStreamChars(InputStream stream) throws IOException {
931 	// read chars array length
932 	if (stream != null && this.bufferIndex + 2 >= this.bufferEnd)
933 		readStreamBuffer(stream);
934 	int length = (this.streamBuffer[this.bufferIndex++] & 0xFF) << 8;
935 	length += this.streamBuffer[this.bufferIndex++] & 0xFF;
936 
937 	// fill the chars from bytes buffer
938 	char[] word = new char[length];
939 	int i = 0;
940 	while (i < length) {
941 		// how many characters can be decoded without refilling the buffer?
942 		int charsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 3);
943 		// all the characters must already be in the buffer if we're at the end of the stream
944 		if (charsInBuffer > length || stream == null  || (this.bufferEnd != this.streamBuffer.length && stream.available() == 0))
945 			charsInBuffer = length;
946 		while (i < charsInBuffer) {
947 			byte b = this.streamBuffer[this.bufferIndex++];
948 			switch (b & 0xF0) {
949 				case 0x00 :
950 				case 0x10 :
951 				case 0x20 :
952 				case 0x30 :
953 				case 0x40 :
954 				case 0x50 :
955 				case 0x60 :
956 				case 0x70 :
957 					word[i++]= (char) b;
958 					break;
959 				case 0xC0 :
960 				case 0xD0 :
961 					char next = (char) this.streamBuffer[this.bufferIndex++];
962 					if ((next & 0xC0) != 0x80) {
963 						throw new UTFDataFormatException();
964 					}
965 					char ch = (char) ((b & 0x1F) << 6);
966 					ch |= next & 0x3F;
967 					word[i++] = ch;
968 					break;
969 				case 0xE0 :
970 					char first = (char) this.streamBuffer[this.bufferIndex++];
971 					char second = (char) this.streamBuffer[this.bufferIndex++];
972 					if ((first & second & 0xC0) != 0x80) {
973 						throw new UTFDataFormatException();
974 					}
975 					ch = (char) ((b & 0x0F) << 12);
976 					ch |= ((first& 0x3F) << 6);
977 					ch |= second & 0x3F;
978 					word[i++] = ch;
979 					break;
980 				default:
981 					throw new UTFDataFormatException();
982 			}
983 		}
984 		if (i < length && stream != null)
985 			readStreamBuffer(stream);
986 	}
987 	return word;
988 }
readStreamDocumentArray(InputStream stream, int arraySize)989 private int[] readStreamDocumentArray(InputStream stream, int arraySize) throws IOException {
990 	int[] indexes = new int[arraySize];
991 	if (arraySize == 0) return indexes;
992 
993 	int i = 0;
994 	switch (this.documentReferenceSize) {
995 		case 1 :
996 			while (i < arraySize) {
997 				// how many bytes without refilling the buffer?
998 				int bytesInBuffer = i + this.bufferEnd - this.bufferIndex;
999 				if (bytesInBuffer > arraySize)
1000 					bytesInBuffer = arraySize;
1001 				while (i < bytesInBuffer) {
1002 					indexes[i++] = this.streamBuffer[this.bufferIndex++] & 0xFF;
1003 				}
1004 				if (i < arraySize && stream != null)
1005 					readStreamBuffer(stream);
1006 			}
1007 			break;
1008 		case 2 :
1009 			while (i < arraySize) {
1010 				// how many shorts without refilling the buffer?
1011 				int shortsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 2);
1012 				if (shortsInBuffer > arraySize)
1013 					shortsInBuffer = arraySize;
1014 				while (i < shortsInBuffer) {
1015 					int val = (this.streamBuffer[this.bufferIndex++] & 0xFF) << 8;
1016 					indexes[i++] = val + (this.streamBuffer[this.bufferIndex++] & 0xFF);
1017 				}
1018 				if (i < arraySize && stream != null)
1019 					readStreamBuffer(stream);
1020 			}
1021 			break;
1022 		default :
1023 			while (i < arraySize) {
1024 				indexes[i++] = readStreamInt(stream);
1025 			}
1026 			break;
1027 	}
1028 	return indexes;
1029 }
readStreamInt(InputStream stream)1030 private int readStreamInt(InputStream stream) throws IOException {
1031 	if (this.bufferIndex + 4 >= this.bufferEnd) {
1032 		readStreamBuffer(stream);
1033 	}
1034 	int val = (this.streamBuffer[this.bufferIndex++] & 0xFF) << 24;
1035 	val += (this.streamBuffer[this.bufferIndex++] & 0xFF) << 16;
1036 	val += (this.streamBuffer[this.bufferIndex++] & 0xFF) << 8;
1037 	return val + (this.streamBuffer[this.bufferIndex++] & 0xFF);
1038 }
writeAllDocumentNames(String[] sortedDocNames, FileOutputStream stream)1039 private void writeAllDocumentNames(String[] sortedDocNames, FileOutputStream stream) throws IOException {
1040 	if (sortedDocNames.length == 0)
1041 		throw new IllegalArgumentException();
1042 
1043 	// assume the file was just created by initializeFrom()
1044 	this.streamBuffer = new byte[BUFFER_WRITE_SIZE];
1045 	this.bufferIndex = 0;
1046 	this.streamEnd = 0;
1047 
1048 	// in order, write: SIGNATURE & headerInfoOffset place holder, then each compressed chunk of document names
1049 	writeStreamChars(stream, SIGNATURE_CHARS);
1050 	this.headerInfoOffset = this.streamEnd;
1051 	writeStreamInt(stream, -1); // will overwrite with correct value later
1052 
1053 	int size = sortedDocNames.length;
1054 	this.numberOfChunks = (size / CHUNK_SIZE) + 1;
1055 	this.sizeOfLastChunk = size % CHUNK_SIZE;
1056 	if (this.sizeOfLastChunk == 0) {
1057 		this.numberOfChunks--;
1058 		this.sizeOfLastChunk = CHUNK_SIZE;
1059 	}
1060 	this.documentReferenceSize = size <= 0x7F ? 1 : (size <= 0x7FFF ? 2 : 4); // number of bytes used to encode a reference
1061 
1062 	this.chunkOffsets = new int[this.numberOfChunks];
1063 	int lastIndex = this.numberOfChunks - 1;
1064 	for (int i = 0; i < this.numberOfChunks; i++) {
1065 		this.chunkOffsets[i] = this.streamEnd;
1066 
1067 		int chunkSize = i == lastIndex ? this.sizeOfLastChunk : CHUNK_SIZE;
1068 		int chunkIndex = i * CHUNK_SIZE;
1069 		String current = sortedDocNames[chunkIndex];
1070 		writeStreamChars(stream, current.toCharArray());
1071 		for (int j = 1; j < chunkSize; j++) {
1072 			String next = sortedDocNames[chunkIndex + j];
1073 			int len1 = current.length();
1074 			int len2 = next.length();
1075 			int max = len1 < len2 ? len1 : len2;
1076 			int start = 0; // number of identical characters at the beginning (also the index of first character that is different)
1077 			while (current.charAt(start) == next.charAt(start)) {
1078 				start++;
1079 				if (max == start) break; // current is 'abba', next is 'abbab'
1080 			}
1081 			if (start > 255) start = 255;
1082 
1083 			int end = 0; // number of identical characters at the end
1084 			while (current.charAt(--len1) == next.charAt(--len2)) {
1085 				end++;
1086 				if (len2 == start) break; // current is 'abbba', next is 'abba'
1087 				if (len1 == 0) break; // current is 'xabc', next is 'xyabc'
1088 			}
1089 			if (end > 255) end = 255;
1090 			if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE)  {
1091 				stream.write(this.streamBuffer, 0, this.bufferIndex);
1092 				this.bufferIndex = 0;
1093 			}
1094 			this.streamBuffer[this.bufferIndex++] = (byte) start;
1095 			this.streamBuffer[this.bufferIndex++] = (byte) end;
1096 			this.streamEnd += 2;
1097 
1098 			int last = next.length() - end;
1099 			writeStreamChars(stream, (start < last ? CharOperation.subarray(next.toCharArray(), start, last) : CharOperation.NO_CHAR));
1100 			current = next;
1101 		}
1102 	}
1103 	this.startOfCategoryTables = this.streamEnd + 1;
1104 }
writeCategories(FileOutputStream stream)1105 private void writeCategories(FileOutputStream stream) throws IOException {
1106 	char[][] categoryNames = this.categoryTables.keyTable;
1107 	Object[] tables = this.categoryTables.valueTable;
1108 	for (int i = 0, l = categoryNames.length; i < l; i++)
1109 		if (categoryNames[i] != null)
1110 			writeCategoryTable(categoryNames[i], (HashtableOfObject) tables[i], stream);
1111 	this.categoryTables = null;
1112 }
writeCategoryTable(char[] categoryName, HashtableOfObject wordsToDocs, FileOutputStream stream)1113 private void writeCategoryTable(char[] categoryName, HashtableOfObject wordsToDocs, FileOutputStream stream) throws IOException {
1114 	// the format of a category table is as follows:
1115 	// any document number arrays with >= 256 elements are written before the table (the offset to each array is remembered)
1116 	// then the number of word->int[] pairs in the table is written
1117 	// for each word -> int[] pair, the word is written followed by:
1118 	//		an int <= 0 if the array size == 1
1119 	//		an int > 1 & < 256 for the size of the array if its > 1 & < 256, the document array follows immediately
1120 	//		256 if the array size >= 256 followed by another int which is the offset to the array (written prior to the table)
1121 
1122 	int largeArraySize = 256;
1123 	Object[] values = wordsToDocs.valueTable;
1124 	for (int i = 0, l = values.length; i < l; i++) {
1125 		Object o = values[i];
1126 		if (o != null) {
1127 			if (o instanceof IntList)
1128 				o = values[i] = ((IntList) values[i]).asArray();
1129 			int[] documentNumbers = (int[]) o;
1130 			if (documentNumbers.length >= largeArraySize) {
1131 				values[i] = Integer.valueOf(this.streamEnd);
1132 				writeDocumentNumbers(documentNumbers, stream);
1133 			}
1134 		}
1135 	}
1136 
1137 	this.categoryOffsets.put(categoryName, this.streamEnd); // remember the offset to the start of the table
1138 	this.categoryTables.put(categoryName, null); // flush cached table
1139 	writeStreamInt(stream, wordsToDocs.elementSize);
1140 	char[][] words = wordsToDocs.keyTable;
1141 	for (int i = 0, l = words.length; i < l; i++) {
1142 		Object o = values[i];
1143 		if (o != null) {
1144 			writeStreamChars(stream, words[i]);
1145 			if (o instanceof int[]) {
1146 				int[] documentNumbers = (int[]) o;
1147 				if (documentNumbers.length == 1)
1148 					writeStreamInt(stream, -documentNumbers[0]); // store an array of 1 element by negating the documentNumber (can be zero)
1149 				else
1150 					writeDocumentNumbers(documentNumbers, stream);
1151 			} else {
1152 				writeStreamInt(stream, largeArraySize); // mark to identify that an offset follows
1153 				writeStreamInt(stream, ((Integer) o).intValue()); // offset in the file of the array of document numbers
1154 			}
1155 		}
1156 	}
1157 }
writeDocumentNumbers(int[] documentNumbers, FileOutputStream stream)1158 private void writeDocumentNumbers(int[] documentNumbers, FileOutputStream stream) throws IOException {
1159 	// must store length as a positive int to detect in-lined array of 1 element
1160 	int length = documentNumbers.length;
1161 	writeStreamInt(stream, length);
1162 	Util.sort(documentNumbers);
1163 	int start = 0;
1164 	switch (this.documentReferenceSize) {
1165 		case 1 :
1166 			while ((this.bufferIndex + length - start) >= BUFFER_WRITE_SIZE) {
1167 				// when documentNumbers is large, write BUFFER_WRITE_SIZE parts & fall thru to write the last part
1168 				int bytesLeft = BUFFER_WRITE_SIZE - this.bufferIndex;
1169 				for (int i=0; i < bytesLeft; i++) {
1170 					this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
1171 				}
1172 				stream.write(this.streamBuffer, 0, this.bufferIndex);
1173 				this.bufferIndex = 0;
1174 			}
1175 			while (start < length) {
1176 				this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
1177 			}
1178 			this.streamEnd += length;
1179 			break;
1180 		case 2 :
1181 			while ((this.bufferIndex + ((length - start) * 2)) >= BUFFER_WRITE_SIZE) {
1182 				// when documentNumbers is large, write BUFFER_WRITE_SIZE parts & fall thru to write the last part
1183 				int shortsLeft = (BUFFER_WRITE_SIZE - this.bufferIndex) / 2;
1184 				for (int i=0; i < shortsLeft; i++) {
1185 					this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8);
1186 					this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
1187 				}
1188 				stream.write(this.streamBuffer, 0, this.bufferIndex);
1189 				this.bufferIndex = 0;
1190 			}
1191 			while (start < length) {
1192 				this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8);
1193 				this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
1194 			}
1195 			this.streamEnd += length * 2;
1196 			break;
1197 		default :
1198 			while (start < length) {
1199 				writeStreamInt(stream, documentNumbers[start++]);
1200 			}
1201 			break;
1202 	}
1203 }
writeHeaderInfo(FileOutputStream stream)1204 private void writeHeaderInfo(FileOutputStream stream) throws IOException {
1205 	writeStreamInt(stream, this.numberOfChunks);
1206 	if ((this.bufferIndex + 3) >= BUFFER_WRITE_SIZE)  {
1207 		stream.write(this.streamBuffer, 0, this.bufferIndex);
1208 		this.bufferIndex = 0;
1209 	}
1210 	this.streamBuffer[this.bufferIndex++] = (byte) this.sizeOfLastChunk;
1211 	this.streamBuffer[this.bufferIndex++] = (byte) this.documentReferenceSize;
1212 	this.streamBuffer[this.bufferIndex++] = (byte) this.separator;
1213 	this.streamEnd += 3;
1214 
1215 	// apend the file with chunk offsets
1216 	for (int i = 0; i < this.numberOfChunks; i++) {
1217 		writeStreamInt(stream, this.chunkOffsets[i]);
1218 	}
1219 
1220 	writeStreamInt(stream, this.startOfCategoryTables);
1221 
1222 	// append the file with the category offsets... # of name -> offset pairs, followed by each name & an offset to its word->doc# table
1223 	writeStreamInt(stream, this.categoryOffsets.elementSize);
1224 	char[][] categoryNames = this.categoryOffsets.keyTable;
1225 	int[] offsets = this.categoryOffsets.valueTable;
1226 	for (int i = 0, l = categoryNames.length; i < l; i++) {
1227 		if (categoryNames[i] != null) {
1228 			writeStreamChars(stream, categoryNames[i]);
1229 			writeStreamInt(stream, offsets[i]);
1230 		}
1231 	}
1232 	// ensure buffer is written to the stream
1233 	if (this.bufferIndex > 0) {
1234 		stream.write(this.streamBuffer, 0, this.bufferIndex);
1235 		this.bufferIndex = 0;
1236 	}
1237 }
writeOffsetToHeader(int offsetToHeader)1238 private void writeOffsetToHeader(int offsetToHeader) throws IOException {
1239 	if (offsetToHeader > 0) {
1240 		RandomAccessFile file = new RandomAccessFile(this.indexLocation.getIndexFile(), "rw"); //$NON-NLS-1$
1241 		try {
1242 			file.seek(this.headerInfoOffset); // offset to position in header
1243 			file.writeInt(offsetToHeader);
1244 			this.headerInfoOffset = offsetToHeader; // update to reflect the correct offset
1245 		} finally {
1246 			file.close();
1247 		}
1248 	}
1249 }
1250 /**
1251  * Writes a string to the given output stream using UTF-8
1252  * encoding in a machine-independent manner.
1253  * <p>
1254  * First, two bytes of the array are giving the number of bytes to
1255  * follow. This value is the number of bytes actually written out,
1256  * not the length of the string. Following the length, each character
1257  * of the string is put in the bytes array, in sequence, using the UTF-8
1258  * encoding for the character.
1259  * </p>
1260  * <p>
1261  * Then the entire byte array is written to the output stream
1262  * using {@link OutputStream#write(byte[], int, int)} method.
1263  * </p>
1264  *
1265  * @param array char array to be written.
1266  * @exception  IOException  if an I/O error occurs while writting
1267  * 	the bytes array to the stream.
1268  */
writeStreamChars(FileOutputStream stream, char[] array)1269 private void writeStreamChars(FileOutputStream stream, char[] array) throws IOException {
1270 	if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE)  {
1271 		stream.write(this.streamBuffer, 0, this.bufferIndex);
1272 		this.bufferIndex = 0;
1273 	}
1274 	int length = array.length;
1275 	this.streamBuffer[this.bufferIndex++] = (byte) ((length >>> 8) & 0xFF); // store chars array length instead of bytes
1276 	this.streamBuffer[this.bufferIndex++] = (byte) (length & 0xFF); // this will allow to read it faster
1277 	this.streamEnd += 2;
1278 
1279 	// we're assuming that very few char[] are so large that we need to flush the buffer more than once, if at all
1280 	int totalBytesNeeded = length * 3;
1281 	if (totalBytesNeeded <= BUFFER_WRITE_SIZE) {
1282 		if (this.bufferIndex + totalBytesNeeded > BUFFER_WRITE_SIZE) {
1283 			// flush the buffer now to make sure there is room for the array
1284 			stream.write(this.streamBuffer, 0, this.bufferIndex);
1285 			this.bufferIndex = 0;
1286 		}
1287 		writeStreamChars(stream, array, 0, length);
1288 	} else {
1289 		int charsPerWrite = BUFFER_WRITE_SIZE / 3;
1290 		int start = 0;
1291 		while (start < length) {
1292 			stream.write(this.streamBuffer, 0, this.bufferIndex);
1293 			this.bufferIndex = 0;
1294 			int charsLeftToWrite = length - start;
1295 			int end = start + (charsPerWrite < charsLeftToWrite ? charsPerWrite : charsLeftToWrite);
1296 			writeStreamChars(stream, array, start, end);
1297 			start = end;
1298 		}
1299 	}
1300 }
writeStreamChars(FileOutputStream stream, char[] array, int start, int end)1301 private void writeStreamChars(FileOutputStream stream, char[] array, int start, int end) throws IOException {
1302 	// start can NOT be == end
1303 	// must have checked that there is enough room for end - start * 3 bytes in the buffer
1304 
1305 	int oldIndex = this.bufferIndex;
1306 	while (start < end) {
1307 		int ch = array[start++];
1308 		if ((ch & 0x007F) == ch) {
1309 			this.streamBuffer[this.bufferIndex++] = (byte) ch;
1310 		} else if ((ch & 0x07FF) == ch) {
1311 			// first two bits are stored in first byte
1312 			byte b = (byte) (ch >> 6);
1313 			b &= 0x1F;
1314 			b |= 0xC0;
1315 			this.streamBuffer[this.bufferIndex++] = b;
1316 			// last six bits are stored in second byte
1317 			b = (byte) (ch & 0x3F);
1318 			b |= 0x80;
1319 			this.streamBuffer[this.bufferIndex++] = b;
1320 		} else {
1321 			// first four bits are stored in first byte
1322 			byte b = (byte) (ch >> 12);
1323 			b &= 0x0F;
1324 			b |= 0xE0;
1325 			this.streamBuffer[this.bufferIndex++] = b;
1326 			// six following bits are stored in second byte
1327 			b = (byte) (ch >> 6);
1328 			b &= 0x3F;
1329 			b |= 0x80;
1330 			this.streamBuffer[this.bufferIndex++] = b;
1331 			// last six bits are stored in third byte
1332 			b = (byte) (ch & 0x3F);
1333 			b |= 0x80;
1334 			this.streamBuffer[this.bufferIndex++] = b;
1335 		}
1336 	}
1337 	this.streamEnd += this.bufferIndex - oldIndex;
1338 }
writeStreamInt(FileOutputStream stream, int val)1339 private void writeStreamInt(FileOutputStream stream, int val) throws IOException {
1340 	if ((this.bufferIndex + 4) >= BUFFER_WRITE_SIZE)  {
1341 		stream.write(this.streamBuffer, 0, this.bufferIndex);
1342 		this.bufferIndex = 0;
1343 	}
1344 	this.streamBuffer[this.bufferIndex++] = (byte) (val >> 24);
1345 	this.streamBuffer[this.bufferIndex++] = (byte) (val >> 16);
1346 	this.streamBuffer[this.bufferIndex++] = (byte) (val >> 8);
1347 	this.streamBuffer[this.bufferIndex++] = (byte) val;
1348 	this.streamEnd += 4;
1349 }
1350 }
1351