1 /*******************************************************************************
2 * Copyright (c) 2000, 2018 IBM Corporation and others.
3 *
4 * This program and the accompanying materials
5 * are made available under the terms of the Eclipse Public License 2.0
6 * which accompanies this distribution, and is available at
7 * https://www.eclipse.org/legal/epl-2.0/
8 *
9 * SPDX-License-Identifier: EPL-2.0
10 *
11 * Contributors:
12 * IBM Corporation - initial API and implementation
13 *******************************************************************************/
14 package org.eclipse.jdt.internal.core.index;
15
16 import java.io.*;
17 import java.util.regex.Pattern;
18
19 import org.eclipse.jdt.core.compiler.CharOperation;
20 import org.eclipse.jdt.core.search.*;
21 import org.eclipse.jdt.internal.core.util.*;
22 import org.eclipse.jdt.internal.compiler.util.HashtableOfIntValues;
23 import org.eclipse.jdt.internal.compiler.util.HashtableOfObject;
24 import org.eclipse.jdt.internal.compiler.util.SimpleLookupTable;
25 import org.eclipse.jdt.internal.compiler.util.SimpleSet;
26 import org.eclipse.jdt.internal.compiler.util.SimpleSetOfCharArray;
27
28 public class DiskIndex {
29
30 IndexLocation indexLocation;
31
32 private int headerInfoOffset;
33 private int numberOfChunks;
34 private int sizeOfLastChunk;
35 private int[] chunkOffsets;
36 private int documentReferenceSize; // 1, 2 or more bytes... depends on # of document names
37 private int startOfCategoryTables;
38 private HashtableOfIntValues categoryOffsets, categoryEnds;
39
40 private int cacheUserCount;
41 private String[][] cachedChunks; // decompressed chunks of document names
42 private HashtableOfObject categoryTables; // category name -> HashtableOfObject(words -> int[] of document #'s) or offset if not read yet
43 private char[] cachedCategoryName;
44
45 private static final int DEFAULT_BUFFER_SIZE = 2048;
46 private static int BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE;
47 private static final int BUFFER_WRITE_SIZE = DEFAULT_BUFFER_SIZE;
48 private byte[] streamBuffer;
49 private int bufferIndex, bufferEnd; // used when reading from the file into the streamBuffer
50 private int streamEnd; // used when writing data from the streamBuffer to the file
51 char separator = Index.DEFAULT_SEPARATOR;
52
53 public static final String SIGNATURE= "INDEX VERSION 1.131"; //$NON-NLS-1$
54 private static final char[] SIGNATURE_CHARS = SIGNATURE.toCharArray();
55 public static boolean DEBUG = false;
56
57 private static final int RE_INDEXED = -1;
58 private static final int DELETED = -2;
59
60 private static final int CHUNK_SIZE = 100;
61
62 private static final SimpleSetOfCharArray INTERNED_CATEGORY_NAMES = new SimpleSetOfCharArray(20);
63 private static final String TMP_EXT = ".tmp"; //$NON-NLS-1$
64
65 static class IntList {
66
67 int size;
68 int[] elements;
69
IntList(int[] elements)70 IntList(int[] elements) {
71 this.elements = elements;
72 this.size = elements.length;
73 }
add(int newElement)74 void add(int newElement) {
75 if (this.size == this.elements.length) {
76 int newSize = this.size * 3;
77 if (newSize < 7) newSize = 7;
78 System.arraycopy(this.elements, 0, this.elements = new int[newSize], 0, this.size);
79 }
80 this.elements[this.size++] = newElement;
81 }
asArray()82 int[] asArray() {
83 int[] result = new int[this.size];
84 System.arraycopy(this.elements, 0, result, 0, this.size);
85 return result;
86 }
87 }
88
89
DiskIndex()90 DiskIndex() {
91 this.headerInfoOffset = -1;
92 this.numberOfChunks = -1;
93 this.sizeOfLastChunk = -1;
94 this.chunkOffsets = null;
95 this.documentReferenceSize = -1;
96 this.cacheUserCount = -1;
97 this.cachedChunks = null;
98 this.categoryTables = null;
99 this.cachedCategoryName = null;
100 this.categoryOffsets = null;
101 this.categoryEnds = null;
102 }
DiskIndex(IndexLocation location)103 DiskIndex(IndexLocation location) throws IOException {
104 this();
105 if (location == null) {
106 throw new IllegalArgumentException();
107 }
108 this.indexLocation = location;
109 }
addDocumentNames(String substring, MemoryIndex memoryIndex)110 SimpleSet addDocumentNames(String substring, MemoryIndex memoryIndex) throws IOException {
111 // must skip over documents which have been added/changed/deleted in the memory index
112 String[] docNames = readAllDocumentNames();
113 SimpleSet results = new SimpleSet(docNames.length);
114 if (substring == null) {
115 if (memoryIndex == null) {
116 for (int i = 0, l = docNames.length; i < l; i++)
117 results.add(docNames[i]);
118 } else {
119 SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
120 for (int i = 0, l = docNames.length; i < l; i++) {
121 String docName = docNames[i];
122 if (!docsToRefs.containsKey(docName))
123 results.add(docName);
124 }
125 }
126 } else {
127 if (memoryIndex == null) {
128 for (int i = 0, l = docNames.length; i < l; i++)
129 if (docNames[i].startsWith(substring, 0))
130 results.add(docNames[i]);
131 } else {
132 SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
133 for (int i = 0, l = docNames.length; i < l; i++) {
134 String docName = docNames[i];
135 if (docName.startsWith(substring, 0) && !docsToRefs.containsKey(docName))
136 results.add(docName);
137 }
138 }
139 }
140 return results;
141 }
addQueryResult(HashtableOfObject results, char[] word, Object docs, MemoryIndex memoryIndex, boolean prevResults)142 private HashtableOfObject addQueryResult(HashtableOfObject results, char[] word, Object docs, MemoryIndex memoryIndex, boolean prevResults) throws IOException {
143 // must skip over documents which have been added/changed/deleted in the memory index
144 if (results == null)
145 results = new HashtableOfObject(13);
146 EntryResult result = prevResults ? (EntryResult) results.get(word) : null;
147 if (memoryIndex == null) {
148 if (result == null)
149 results.putUnsafely(word, new EntryResult(word, docs));
150 else
151 result.addDocumentTable(docs);
152 } else {
153 SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
154 if (result == null) result = new EntryResult(word, null);
155 int[] docNumbers = readDocumentNumbers(docs);
156 for (int i = 0, l = docNumbers.length; i < l; i++) {
157 String docName = readDocumentName(docNumbers[i]);
158 if (!docsToRefs.containsKey(docName))
159 result.addDocumentName(docName);
160 }
161 if (!result.isEmpty())
162 results.put(word, result);
163 }
164 return results;
165 }
addQueryResults(char[][] categories, char[] key, int matchRule, MemoryIndex memoryIndex)166 HashtableOfObject addQueryResults(char[][] categories, char[] key, int matchRule, MemoryIndex memoryIndex) throws IOException {
167 // assumes sender has called startQuery() & will call stopQuery() when finished
168 if (this.categoryOffsets == null) return null; // file is empty
169
170 HashtableOfObject results = null; // initialized if needed
171
172 // No need to check the results table for duplicates while processing the
173 // first category table or if the first category tables doesn't have any results.
174 boolean prevResults = false;
175 if (key == null) {
176 for (int i = 0, l = categories.length; i < l; i++) {
177 HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], true); // cache if key is null since its a definite match
178 if (wordsToDocNumbers != null) {
179 char[][] words = wordsToDocNumbers.keyTable;
180 Object[] values = wordsToDocNumbers.valueTable;
181 if (results == null)
182 results = new HashtableOfObject(wordsToDocNumbers.elementSize);
183 for (int j = 0, m = words.length; j < m; j++)
184 if (words[j] != null)
185 results = addQueryResult(results, words[j], values[j], memoryIndex, prevResults);
186 }
187 prevResults = results != null;
188 }
189 if (results != null && this.cachedChunks == null)
190 cacheDocumentNames();
191 } else {
192 switch (matchRule) {
193 case SearchPattern.R_EXACT_MATCH | SearchPattern.R_CASE_SENSITIVE:
194 for (int i = 0, l = categories.length; i < l; i++) {
195 HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
196 Object value;
197 if (wordsToDocNumbers != null && (value = wordsToDocNumbers.get(key)) != null)
198 results = addQueryResult(results, key, value, memoryIndex, prevResults);
199 prevResults = results != null;
200 }
201 break;
202 case SearchPattern.R_PREFIX_MATCH | SearchPattern.R_CASE_SENSITIVE:
203 for (int i = 0, l = categories.length; i < l; i++) {
204 HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
205 if (wordsToDocNumbers != null) {
206 char[][] words = wordsToDocNumbers.keyTable;
207 Object[] values = wordsToDocNumbers.valueTable;
208 for (int j = 0, m = words.length; j < m; j++) {
209 char[] word = words[j];
210 if (word != null && key[0] == word[0] && CharOperation.prefixEquals(key, word))
211 results = addQueryResult(results, word, values[j], memoryIndex, prevResults);
212 }
213 }
214 prevResults = results != null;
215 }
216 break;
217 case SearchPattern.R_REGEXP_MATCH:
218 Pattern pattern = Pattern.compile(new String(key));
219 for (int i = 0, l = categories.length; i < l; i++) {
220 HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
221 if (wordsToDocNumbers != null) {
222 char[][] words = wordsToDocNumbers.keyTable;
223 Object[] values = wordsToDocNumbers.valueTable;
224 for (int j = 0, m = words.length; j < m; j++) {
225 char[] word = words[j];
226 if (word != null && pattern.matcher(new String(word)).matches())
227 results = addQueryResult(results, word, values[j], memoryIndex, prevResults);
228 }
229 }
230 prevResults = results != null;
231 }
232 break;
233 default:
234 for (int i = 0, l = categories.length; i < l; i++) {
235 HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
236 if (wordsToDocNumbers != null) {
237 char[][] words = wordsToDocNumbers.keyTable;
238 Object[] values = wordsToDocNumbers.valueTable;
239 for (int j = 0, m = words.length; j < m; j++) {
240 char[] word = words[j];
241 if (word != null && Index.isMatch(key, word, matchRule))
242 results = addQueryResult(results, word, values[j], memoryIndex, prevResults);
243 }
244 }
245 prevResults = results != null;
246 }
247 }
248 }
249
250 return results;
251 }
cacheDocumentNames()252 private void cacheDocumentNames() throws IOException {
253 // will need all document names so get them now
254 this.cachedChunks = new String[this.numberOfChunks][];
255 InputStream stream = this.indexLocation.getInputStream();
256 try {
257 if (this.numberOfChunks > 5) BUFFER_READ_SIZE <<= 1;
258 int offset = this.chunkOffsets[0];
259 stream.skip(offset);
260 this.streamBuffer = new byte[BUFFER_READ_SIZE];
261 this.bufferIndex = 0;
262 this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
263 for (int i = 0; i < this.numberOfChunks; i++) {
264 int size = i == this.numberOfChunks - 1 ? this.sizeOfLastChunk : CHUNK_SIZE;
265 readChunk(this.cachedChunks[i] = new String[size], stream, 0, size);
266 }
267 } catch (IOException e) {
268 this.cachedChunks = null;
269 throw e;
270 } finally {
271 stream.close();
272 this.indexLocation.close();
273 this.streamBuffer = null;
274 BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE;
275 }
276 }
computeDocumentNames(String[] onDiskNames, int[] positions, SimpleLookupTable indexedDocuments, MemoryIndex memoryIndex)277 private String[] computeDocumentNames(String[] onDiskNames, int[] positions, SimpleLookupTable indexedDocuments, MemoryIndex memoryIndex) {
278 int onDiskLength = onDiskNames.length;
279 Object[] docNames = memoryIndex.docsToReferences.keyTable;
280 Object[] referenceTables = memoryIndex.docsToReferences.valueTable;
281 if (onDiskLength == 0) {
282 // disk index was empty, so add every indexed document
283 for (int i = 0, l = referenceTables.length; i < l; i++)
284 if (referenceTables[i] != null)
285 indexedDocuments.put(docNames[i], null); // remember each new document
286
287 String[] newDocNames = new String[indexedDocuments.elementSize];
288 int count = 0;
289 Object[] added = indexedDocuments.keyTable;
290 for (int i = 0, l = added.length; i < l; i++)
291 if (added[i] != null)
292 newDocNames[count++] = (String) added[i];
293 Util.sort(newDocNames);
294 for (int i = 0, l = newDocNames.length; i < l; i++)
295 indexedDocuments.put(newDocNames[i], Integer.valueOf(i));
296 return newDocNames;
297 }
298
299 // initialize positions as if each document will remain in the same position
300 for (int i = 0; i < onDiskLength; i++)
301 positions[i] = i;
302
303 // find out if the memory index has any new or deleted documents, if not then the names & positions are the same
304 int numDeletedDocNames = 0;
305 nextPath : for (int i = 0, l = docNames.length; i < l; i++) {
306 String docName = (String) docNames[i];
307 if (docName != null) {
308 for (int j = 0; j < onDiskLength; j++) {
309 if (docName.equals(onDiskNames[j])) {
310 if (referenceTables[i] == null) {
311 positions[j] = DELETED;
312 numDeletedDocNames++;
313 } else {
314 positions[j] = RE_INDEXED;
315 }
316 continue nextPath;
317 }
318 }
319 if (referenceTables[i] != null)
320 indexedDocuments.put(docName, null); // remember each new document, skip deleted documents which were never saved
321 }
322 }
323
324 String[] newDocNames = onDiskNames;
325 if (numDeletedDocNames > 0 || indexedDocuments.elementSize > 0) {
326 // some new documents have been added or some old ones deleted
327 newDocNames = new String[onDiskLength + indexedDocuments.elementSize - numDeletedDocNames];
328 int count = 0;
329 for (int i = 0; i < onDiskLength; i++)
330 if (positions[i] >= RE_INDEXED)
331 newDocNames[count++] = onDiskNames[i]; // keep each unchanged document
332 Object[] added = indexedDocuments.keyTable;
333 for (int i = 0, l = added.length; i < l; i++)
334 if (added[i] != null)
335 newDocNames[count++] = (String) added[i]; // add each new document
336 Util.sort(newDocNames);
337 for (int i = 0, l = newDocNames.length; i < l; i++)
338 if (indexedDocuments.containsKey(newDocNames[i]))
339 indexedDocuments.put(newDocNames[i], Integer.valueOf(i)); // remember the position for each new document
340 }
341
342 // need to be able to look up an old position (ref# from a ref[]) and map it to its new position
343 // if its old position == DELETED then its forgotton
344 // if its old position == ReINDEXED then its also forgotten but its new position is needed to map references
345 int count = -1;
346 for (int i = 0; i < onDiskLength;) {
347 switch(positions[i]) {
348 case DELETED :
349 i++; // skip over deleted... references are forgotten
350 break;
351 case RE_INDEXED :
352 String newName = newDocNames[++count];
353 if (newName.equals(onDiskNames[i])) {
354 indexedDocuments.put(newName, Integer.valueOf(count)); // the reindexed docName that was at position i is now at position count
355 i++;
356 }
357 break;
358 default :
359 if (newDocNames[++count].equals(onDiskNames[i]))
360 positions[i++] = count; // the unchanged docName that was at position i is now at position count
361 }
362 }
363 return newDocNames;
364 }
copyQueryResults(HashtableOfObject categoryToWords, int newPosition)365 private void copyQueryResults(HashtableOfObject categoryToWords, int newPosition) {
366 char[][] categoryNames = categoryToWords.keyTable;
367 Object[] wordSets = categoryToWords.valueTable;
368 for (int i = 0, l = categoryNames.length; i < l; i++) {
369 char[] categoryName = categoryNames[i];
370 if (categoryName != null) {
371 SimpleWordSet wordSet = (SimpleWordSet) wordSets[i];
372 HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName);
373 if (wordsToDocs == null)
374 this.categoryTables.put(categoryName, wordsToDocs = new HashtableOfObject(wordSet.elementSize));
375
376 char[][] words = wordSet.words;
377 for (int j = 0, m = words.length; j < m; j++) {
378 char[] word = words[j];
379 if (word != null) {
380 Object o = wordsToDocs.get(word);
381 if (o == null) {
382 wordsToDocs.putUnsafely(word, new int[] {newPosition});
383 } else if (o instanceof IntList) {
384 ((IntList) o).add(newPosition);
385 } else {
386 IntList list = new IntList((int[]) o);
387 list.add(newPosition);
388 wordsToDocs.put(word, list);
389 }
390 }
391 }
392 }
393 }
394 }
initialize(boolean reuseExistingFile)395 void initialize(boolean reuseExistingFile) throws IOException {
396 if (this.indexLocation.exists()) {
397 if (reuseExistingFile) {
398 InputStream stream = this.indexLocation.getInputStream();
399 if (stream == null) {
400 throw new IOException("Failed to use the index file"); //$NON-NLS-1$
401 }
402 this.streamBuffer = new byte[BUFFER_READ_SIZE];
403 this.bufferIndex = 0;
404 this.bufferEnd = stream.read(this.streamBuffer, 0, 128);
405 try {
406 char[] signature = readStreamChars(stream);
407 if (!CharOperation.equals(signature, SIGNATURE_CHARS)) {
408 throw new IOException(Messages.exception_wrongFormat);
409 }
410 this.headerInfoOffset = readStreamInt(stream);
411 if (this.headerInfoOffset > 0) { // file is empty if its not set
412 stream.skip(this.headerInfoOffset - this.bufferEnd); // assume that the header info offset is over current buffer end
413 this.bufferIndex = 0;
414 this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
415 readHeaderInfo(stream);
416 }
417 } finally {
418 stream.close();
419 this.indexLocation.close();
420 }
421 return;
422 }
423 if (!this.indexLocation.delete()) {
424 if (DEBUG)
425 System.out.println("initialize - Failed to delete index " + this.indexLocation); //$NON-NLS-1$
426 throw new IOException("Failed to delete index " + this.indexLocation); //$NON-NLS-1$
427 }
428 }
429 if (this.indexLocation.createNewFile()) {
430 FileOutputStream stream = new FileOutputStream(this.indexLocation.getIndexFile(), false);
431 try {
432 this.streamBuffer = new byte[BUFFER_READ_SIZE];
433 this.bufferIndex = 0;
434 writeStreamChars(stream, SIGNATURE_CHARS);
435 writeStreamInt(stream, -1); // file is empty
436 // write the buffer to the stream
437 if (this.bufferIndex > 0) {
438 stream.write(this.streamBuffer, 0, this.bufferIndex);
439 this.bufferIndex = 0;
440 }
441 } finally {
442 stream.close();
443 }
444 } else {
445 if (DEBUG)
446 System.out.println("initialize - Failed to create new index " + this.indexLocation); //$NON-NLS-1$
447 throw new IOException("Failed to create new index " + this.indexLocation); //$NON-NLS-1$
448 }
449 }
initializeFrom(DiskIndex diskIndex, File newIndexFile)450 private void initializeFrom(DiskIndex diskIndex, File newIndexFile) throws IOException {
451 if (newIndexFile.exists() && !newIndexFile.delete()) { // delete the temporary index file
452 if (DEBUG)
453 System.out.println("initializeFrom - Failed to delete temp index " + this.indexLocation); //$NON-NLS-1$
454 } else if (!newIndexFile.createNewFile()) {
455 if (DEBUG)
456 System.out.println("initializeFrom - Failed to create temp index " + this.indexLocation); //$NON-NLS-1$
457 throw new IOException("Failed to create temp index " + this.indexLocation); //$NON-NLS-1$
458 }
459
460 int size = diskIndex.categoryOffsets == null ? 8 : diskIndex.categoryOffsets.elementSize;
461 this.categoryOffsets = new HashtableOfIntValues(size);
462 this.categoryEnds = new HashtableOfIntValues(size);
463 this.categoryTables = new HashtableOfObject(size);
464 this.separator = diskIndex.separator;
465 }
mergeCategories(DiskIndex onDisk, int[] positions, FileOutputStream stream)466 private void mergeCategories(DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException {
467 // at this point, this.categoryTables contains the names -> wordsToDocs added in copyQueryResults()
468 char[][] oldNames = onDisk.categoryOffsets.keyTable;
469 for (int i = 0, l = oldNames.length; i < l; i++) {
470 char[] oldName = oldNames[i];
471 if (oldName != null && !this.categoryTables.containsKey(oldName))
472 this.categoryTables.put(oldName, null);
473 }
474
475 char[][] categoryNames = this.categoryTables.keyTable;
476 for (int i = 0, l = categoryNames.length; i < l; i++)
477 if (categoryNames[i] != null)
478 mergeCategory(categoryNames[i], onDisk, positions, stream);
479 this.categoryTables = null;
480 }
mergeCategory(char[] categoryName, DiskIndex onDisk, int[] positions, FileOutputStream stream)481 private void mergeCategory(char[] categoryName, DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException {
482 HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName);
483 if (wordsToDocs == null)
484 wordsToDocs = new HashtableOfObject(3);
485
486 HashtableOfObject oldWordsToDocs = onDisk.readCategoryTable(categoryName, true);
487 if (oldWordsToDocs != null) {
488 char[][] oldWords = oldWordsToDocs.keyTable;
489 Object[] oldArrayOffsets = oldWordsToDocs.valueTable;
490 nextWord: for (int i = 0, l = oldWords.length; i < l; i++) {
491 char[] oldWord = oldWords[i];
492 if (oldWord != null) {
493 int[] oldDocNumbers = (int[]) oldArrayOffsets[i];
494 int length = oldDocNumbers.length;
495 int[] mappedNumbers = new int[length];
496 int count = 0;
497 for (int j = 0; j < length; j++) {
498 int pos = positions[oldDocNumbers[j]];
499 if (pos > RE_INDEXED) // forget any reference to a document which was deleted or re_indexed
500 mappedNumbers[count++] = pos;
501 }
502 if (count < length) {
503 if (count == 0) continue nextWord; // skip words which no longer have any references
504 System.arraycopy(mappedNumbers, 0, mappedNumbers = new int[count], 0, count);
505 }
506
507 Object o = wordsToDocs.get(oldWord);
508 if (o == null) {
509 wordsToDocs.putUnsafely(oldWord, mappedNumbers);
510 } else {
511 IntList list = null;
512 if (o instanceof IntList) {
513 list = (IntList) o;
514 } else {
515 list = new IntList((int[]) o);
516 wordsToDocs.put(oldWord, list);
517 }
518 for (int j = 0; j < count; j++)
519 list.add(mappedNumbers[j]);
520 }
521 }
522 }
523 onDisk.categoryTables.put(categoryName, null); // flush cached table
524 }
525 writeCategoryTable(categoryName, wordsToDocs, stream);
526 }
mergeWith(MemoryIndex memoryIndex)527 DiskIndex mergeWith(MemoryIndex memoryIndex) throws IOException {
528 // assume write lock is held
529 // compute & write out new docNames
530 if (this.indexLocation == null) {
531 throw new IOException("Pre-built index file not writeable"); //$NON-NLS-1$
532 }
533 String[] docNames = readAllDocumentNames();
534 int previousLength = docNames.length;
535 int[] positions = new int[previousLength]; // keeps track of the position of each document in the new sorted docNames
536 SimpleLookupTable indexedDocuments = new SimpleLookupTable(3); // for each new/changed document in the memoryIndex
537 docNames = computeDocumentNames(docNames, positions, indexedDocuments, memoryIndex);
538 if (docNames.length == 0) {
539 if (previousLength == 0) return this; // nothing to do... memory index contained deleted documents that had never been saved
540
541 // index is now empty since all the saved documents were removed
542 DiskIndex newDiskIndex = new DiskIndex(this.indexLocation);
543 newDiskIndex.initialize(false);
544 return newDiskIndex;
545 }
546 boolean usingTmp = false;
547 File oldIndexFile = this.indexLocation.getIndexFile();
548 String indexFilePath = oldIndexFile.getPath();
549 if (indexFilePath.endsWith(TMP_EXT)) { // the tmp file could not be renamed last time
550 indexFilePath = indexFilePath.substring(0, indexFilePath.length()-TMP_EXT.length());
551 usingTmp = true;
552 } else {
553 indexFilePath += TMP_EXT;
554 }
555 DiskIndex newDiskIndex = new DiskIndex(new FileIndexLocation(new File(indexFilePath)));
556 File newIndexFile = newDiskIndex.indexLocation.getIndexFile();
557 try {
558 newDiskIndex.initializeFrom(this, newIndexFile);
559 FileOutputStream stream = new FileOutputStream(newIndexFile, false);
560 int offsetToHeader = -1;
561 try {
562 newDiskIndex.writeAllDocumentNames(docNames, stream);
563 docNames = null; // free up the space
564
565 // add each new/changed doc to empty category tables using its new position #
566 if (indexedDocuments.elementSize > 0) {
567 Object[] names = indexedDocuments.keyTable;
568 Object[] integerPositions = indexedDocuments.valueTable;
569 for (int i = 0, l = names.length; i < l; i++)
570 if (names[i] != null)
571 newDiskIndex.copyQueryResults(
572 (HashtableOfObject) memoryIndex.docsToReferences.get(names[i]), ((Integer) integerPositions[i]).intValue());
573 }
574 indexedDocuments = null; // free up the space
575
576 // merge each category table with the new ones & write them out
577 if (previousLength == 0)
578 newDiskIndex.writeCategories(stream);
579 else
580 newDiskIndex.mergeCategories(this, positions, stream);
581 offsetToHeader = newDiskIndex.streamEnd;
582 newDiskIndex.writeHeaderInfo(stream);
583 positions = null; // free up the space
584 } finally {
585 stream.close();
586 this.streamBuffer = null;
587 }
588 newDiskIndex.writeOffsetToHeader(offsetToHeader);
589
590 // rename file by deleting previous index file & renaming temp one
591 if (oldIndexFile.exists() && !oldIndexFile.delete()) {
592 if (DEBUG)
593 System.out.println("mergeWith - Failed to delete " + this.indexLocation); //$NON-NLS-1$
594 throw new IOException("Failed to delete index file " + this.indexLocation); //$NON-NLS-1$
595 }
596 if (!usingTmp && !newIndexFile.renameTo(oldIndexFile)) {
597 // try again after waiting for two milli secs
598 try {
599 Thread.sleep(2);
600 } catch (InterruptedException e) {
601 //ignore
602 }
603 if (!newIndexFile.renameTo(oldIndexFile)) {
604 if (DEBUG)
605 System.out.println("mergeWith - Failed to rename " + this.indexLocation); //$NON-NLS-1$
606 usingTmp = true;
607 }
608 }
609 } catch (IOException e) {
610 if (newIndexFile.exists() && !newIndexFile.delete())
611 if (DEBUG)
612 System.out.println("mergeWith - Failed to delete temp index " + newDiskIndex.indexLocation); //$NON-NLS-1$
613 throw e;
614 }
615
616 if (!usingTmp) // rename done, use the new file
617 newDiskIndex.indexLocation = this.indexLocation;
618 return newDiskIndex;
619 }
readAllDocumentNames()620 private synchronized String[] readAllDocumentNames() throws IOException {
621 if (this.numberOfChunks <= 0)
622 return CharOperation.NO_STRINGS;
623
624 InputStream stream = this.indexLocation.getInputStream();
625 try {
626 int offset = this.chunkOffsets[0];
627 stream.skip(offset);
628 this.streamBuffer = new byte[BUFFER_READ_SIZE];
629 this.bufferIndex = 0;
630 this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
631 int lastIndex = this.numberOfChunks - 1;
632 String[] docNames = new String[lastIndex * CHUNK_SIZE + this.sizeOfLastChunk];
633 for (int i = 0; i < this.numberOfChunks; i++)
634 readChunk(docNames, stream, i * CHUNK_SIZE, i < lastIndex ? CHUNK_SIZE : this.sizeOfLastChunk);
635 return docNames;
636 } finally {
637 stream.close();
638 this.indexLocation.close();
639 this.streamBuffer = null;
640 }
641 }
readCategoryTable(char[] categoryName, boolean readDocNumbers)642 private synchronized HashtableOfObject readCategoryTable(char[] categoryName, boolean readDocNumbers) throws IOException {
643 // result will be null if categoryName is unknown
644 int offset = this.categoryOffsets.get(categoryName);
645 if (offset == HashtableOfIntValues.NO_VALUE) {
646 return null;
647 }
648
649 if (this.categoryTables == null) {
650 this.categoryTables = new HashtableOfObject(3);
651 } else {
652 HashtableOfObject cachedTable = (HashtableOfObject) this.categoryTables.get(categoryName);
653 if (cachedTable != null) {
654 if (readDocNumbers) { // must cache remaining document number arrays
655 Object[] arrayOffsets = cachedTable.valueTable;
656 for (int i = 0, l = arrayOffsets.length; i < l; i++)
657 if (arrayOffsets[i] instanceof Integer)
658 arrayOffsets[i] = readDocumentNumbers(arrayOffsets[i]);
659 }
660 return cachedTable;
661 }
662 }
663
664 InputStream stream = this.indexLocation.getInputStream();
665 HashtableOfObject categoryTable = null;
666 char[][] matchingWords = null;
667 int count = 0;
668 int firstOffset = -1;
669 this.streamBuffer = new byte[BUFFER_READ_SIZE];
670 try {
671 stream.skip(offset);
672 this.bufferIndex = 0;
673 this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
674 int size = readStreamInt(stream);
675 try {
676 if (size < 0) { // DEBUG
677 System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$
678 System.err.println("file = "+this.indexLocation); //$NON-NLS-1$
679 System.err.println("offset = "+offset); //$NON-NLS-1$
680 System.err.println("size = "+size); //$NON-NLS-1$
681 System.err.println("-------------------- END --------------------"); //$NON-NLS-1$
682 }
683 categoryTable = new HashtableOfObject(size);
684 } catch (OutOfMemoryError oom) {
685 // DEBUG
686 oom.printStackTrace();
687 System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$
688 System.err.println("file = "+this.indexLocation); //$NON-NLS-1$
689 System.err.println("offset = "+offset); //$NON-NLS-1$
690 System.err.println("size = "+size); //$NON-NLS-1$
691 System.err.println("-------------------- END --------------------"); //$NON-NLS-1$
692 throw oom;
693 }
694 int largeArraySize = 256;
695 for (int i = 0; i < size; i++) {
696 char[] word = readStreamChars(stream);
697 int arrayOffset = readStreamInt(stream);
698 // if arrayOffset is:
699 // <= 0 then the array size == 1 with the value -> -arrayOffset
700 // > 1 & < 256 then the size of the array is > 1 & < 256, the document array follows immediately
701 // 256 if the array size >= 256 followed by another int which is the offset to the array (written prior to the table)
702 if (arrayOffset <= 0) {
703 categoryTable.putUnsafely(word, new int[] {-arrayOffset}); // store 1 element array by negating documentNumber
704 } else if (arrayOffset < largeArraySize) {
705 categoryTable.putUnsafely(word, readStreamDocumentArray(stream, arrayOffset)); // read in-lined array providing size
706 } else {
707 arrayOffset = readStreamInt(stream); // read actual offset
708 if (readDocNumbers) {
709 if (matchingWords == null)
710 matchingWords = new char[size][];
711 if (count == 0)
712 firstOffset = arrayOffset;
713 matchingWords[count++] = word;
714 }
715 categoryTable.putUnsafely(word, Integer.valueOf(arrayOffset)); // offset to array in the file
716 }
717 }
718 this.categoryTables.put(INTERNED_CATEGORY_NAMES.get(categoryName), categoryTable);
719 // cache the table as long as its not too big
720 // in practice, some tables can be greater than 500K when they contain more than 10K elements
721 this.cachedCategoryName = categoryTable.elementSize < 20000 ? categoryName : null;
722 } catch (IOException ioe) {
723 this.streamBuffer = null;
724 throw ioe;
725 } finally {
726 stream.close();
727 this.indexLocation.close();
728 }
729
730 if (matchingWords != null && count > 0) {
731 stream = this.indexLocation.getInputStream();
732 try {
733 stream.skip(firstOffset);
734 this.bufferIndex = 0;
735 this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
736 for (int i = 0; i < count; i++) { // each array follows the previous one
737 categoryTable.put(matchingWords[i], readStreamDocumentArray(stream, readStreamInt(stream)));
738 }
739 } catch (IOException ioe) {
740 this.streamBuffer = null;
741 throw ioe;
742 } finally {
743 stream.close();
744 this.indexLocation.close();
745 }
746 }
747 this.streamBuffer = null;
748 return categoryTable;
749 }
readChunk(String[] docNames, InputStream stream, int index, int size)750 private void readChunk(String[] docNames, InputStream stream, int index, int size) throws IOException {
751 String current = new String(readStreamChars(stream));
752 docNames[index++] = current;
753 for (int i = 1; i < size; i++) {
754 if (stream != null && this.bufferIndex + 2 >= this.bufferEnd)
755 readStreamBuffer(stream);
756 int start = this.streamBuffer[this.bufferIndex++] & 0xFF;
757 int end = this.streamBuffer[this.bufferIndex++] & 0xFF;
758 String next = new String(readStreamChars(stream));
759 if (start > 0) {
760 if (end > 0) {
761 int length = current.length();
762 next = current.substring(0, start) + next + current.substring(length - end, length);
763 } else {
764 next = current.substring(0, start) + next;
765 }
766 } else if (end > 0) {
767 int length = current.length();
768 next = next + current.substring(length - end, length);
769 }
770 docNames[index++] = next;
771 current = next;
772 }
773 }
readDocumentName(int docNumber)774 synchronized String readDocumentName(int docNumber) throws IOException {
775 if (this.cachedChunks == null)
776 this.cachedChunks = new String[this.numberOfChunks][];
777
778 int chunkNumber = docNumber / CHUNK_SIZE;
779 String[] chunk = this.cachedChunks[chunkNumber];
780 if (chunk == null) {
781 boolean isLastChunk = chunkNumber == this.numberOfChunks - 1;
782 int start = this.chunkOffsets[chunkNumber];
783 int numberOfBytes = (isLastChunk ? this.startOfCategoryTables : this.chunkOffsets[chunkNumber + 1]) - start;
784 if (numberOfBytes < 0)
785 throw new IllegalArgumentException();
786 this.streamBuffer = new byte[numberOfBytes];
787 this.bufferIndex = 0;
788 InputStream file = this.indexLocation.getInputStream();
789 try {
790 file.skip(start);
791 if (file.read(this.streamBuffer, 0, numberOfBytes) != numberOfBytes)
792 throw new IOException();
793 } catch (IOException ioe) {
794 this.streamBuffer = null;
795 throw ioe;
796 } finally {
797 file.close();
798 this.indexLocation.close();
799 }
800 int numberOfNames = isLastChunk ? this.sizeOfLastChunk : CHUNK_SIZE;
801 chunk = new String[numberOfNames];
802 try {
803 readChunk(chunk, null, 0, numberOfNames);
804 } catch (IOException ioe) {
805 this.streamBuffer = null;
806 throw ioe;
807 }
808 this.cachedChunks[chunkNumber] = chunk;
809 }
810 this.streamBuffer = null;
811 return chunk[docNumber - (chunkNumber * CHUNK_SIZE)];
812 }
readDocumentNumbers(Object arrayOffset)813 synchronized int[] readDocumentNumbers(Object arrayOffset) throws IOException {
814 // arrayOffset is either a cached array of docNumbers or an Integer offset in the file
815 if (arrayOffset instanceof int[])
816 return (int[]) arrayOffset;
817
818 InputStream stream = this.indexLocation.getInputStream();
819 try {
820 int offset = ((Integer) arrayOffset).intValue();
821 stream.skip(offset);
822 this.streamBuffer = new byte[BUFFER_READ_SIZE];
823 this.bufferIndex = 0;
824 this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
825 return readStreamDocumentArray(stream, readStreamInt(stream));
826 } finally {
827 stream.close();
828 this.indexLocation.close();
829 this.streamBuffer = null;
830 }
831 }
readHeaderInfo(InputStream stream)832 private void readHeaderInfo(InputStream stream) throws IOException {
833
834 // must be same order as writeHeaderInfo()
835 this.numberOfChunks = readStreamInt(stream);
836 this.sizeOfLastChunk = this.streamBuffer[this.bufferIndex++] & 0xFF;
837 this.documentReferenceSize = this.streamBuffer[this.bufferIndex++] & 0xFF;
838 this.separator = (char) (this.streamBuffer[this.bufferIndex++] & 0xFF);
839 long length = this.indexLocation.length();
840 if (length != -1 && this.numberOfChunks > length) {
841 // not an accurate check, but good enough https://bugs.eclipse.org/bugs/show_bug.cgi?id=350612
842 if (DEBUG)
843 System.out.println("Index file is corrupted " + this.indexLocation); //$NON-NLS-1$
844 throw new IOException("Index file is corrupted " + this.indexLocation); //$NON-NLS-1$
845 }
846 this.chunkOffsets = new int[this.numberOfChunks];
847 for (int i = 0; i < this.numberOfChunks; i++)
848 this.chunkOffsets[i] = readStreamInt(stream);
849
850 this.startOfCategoryTables = readStreamInt(stream);
851
852 int size = readStreamInt(stream);
853 this.categoryOffsets = new HashtableOfIntValues(size);
854 this.categoryEnds = new HashtableOfIntValues(size);
855 if (length != -1 && size > length) {
856 // not an accurate check, but good enough https://bugs.eclipse.org/bugs/show_bug.cgi?id=350612
857 if (DEBUG)
858 System.out.println("Index file is corrupted " + this.indexLocation); //$NON-NLS-1$
859 throw new IOException("Index file is corrupted " + this.indexLocation); //$NON-NLS-1$
860 }
861 char[] previousCategory = null;
862 int offset = -1;
863 for (int i = 0; i < size; i++) {
864 char[] categoryName = INTERNED_CATEGORY_NAMES.get(readStreamChars(stream));
865 offset = readStreamInt(stream);
866 this.categoryOffsets.put(categoryName, offset); // cache offset to category table
867 if (previousCategory != null) {
868 this.categoryEnds.put(previousCategory, offset); // cache end of the category table
869 }
870 previousCategory = categoryName;
871 }
872 if (previousCategory != null) {
873 this.categoryEnds.put(previousCategory, this.headerInfoOffset); // cache end of the category table
874 }
875 this.categoryTables = new HashtableOfObject(3);
876 }
startQuery()877 synchronized void startQuery() {
878 this.cacheUserCount++;
879 }
stopQuery()880 synchronized void stopQuery() {
881 if (--this.cacheUserCount < 0) {
882 // clear cached items
883 this.cacheUserCount = -1;
884 this.cachedChunks = null;
885 if (this.categoryTables != null) {
886 if (this.cachedCategoryName == null) {
887 this.categoryTables = null;
888 } else if (this.categoryTables.elementSize > 1) {
889 HashtableOfObject newTables = new HashtableOfObject(3);
890 newTables.put(this.cachedCategoryName, this.categoryTables.get(this.cachedCategoryName));
891 this.categoryTables = newTables;
892 }
893 }
894 }
895 }
readStreamBuffer(InputStream stream)896 private void readStreamBuffer(InputStream stream) throws IOException {
897 // if we're about to read a known amount at the end of the existing buffer, but it does not completely fit
898 // so we need to shift the remaining bytes to be read, and fill the buffer from the stream
899 if (this.bufferEnd < this.streamBuffer.length) {
900 if (stream.available() == 0)
901 return; // we're at the end of the stream - nothing left to read
902 }
903
904 int bytesInBuffer = this.bufferEnd - this.bufferIndex;
905 if (bytesInBuffer > 0)
906 System.arraycopy(this.streamBuffer, this.bufferIndex, this.streamBuffer, 0, bytesInBuffer);
907 this.bufferEnd = bytesInBuffer + stream.read(this.streamBuffer, bytesInBuffer, this.bufferIndex);
908 this.bufferIndex = 0;
909 }
910 /**
911 * Reads in a string from the specified data input stream. The
912 * string has been encoded using a modified UTF-8 format.
913 * <p>
914 * The first two bytes are read as an unsigned short.
915 * This value gives the number of following bytes that are in the encoded string,
916 * not the length of the resulting string. The following bytes are then
917 * interpreted as bytes encoding characters in the UTF-8 format
918 * and are converted into characters.
919 * <p>
920 * This method blocks until all the bytes are read, the end of the
921 * stream is detected, or an exception is thrown.
922 *
923 * @param stream a data input stream.
924 * @return UTF decoded string as a char array
925 * @exception EOFException if this end of data input is reached while reading it.
926 * @exception IOException if an I/O error occurs while reading data input.
927 * @exception UTFDataFormatException if the bytes do not represent a
928 * valid UTF-8 encoding of a Unicode string.
929 */
readStreamChars(InputStream stream)930 private char[] readStreamChars(InputStream stream) throws IOException {
931 // read chars array length
932 if (stream != null && this.bufferIndex + 2 >= this.bufferEnd)
933 readStreamBuffer(stream);
934 int length = (this.streamBuffer[this.bufferIndex++] & 0xFF) << 8;
935 length += this.streamBuffer[this.bufferIndex++] & 0xFF;
936
937 // fill the chars from bytes buffer
938 char[] word = new char[length];
939 int i = 0;
940 while (i < length) {
941 // how many characters can be decoded without refilling the buffer?
942 int charsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 3);
943 // all the characters must already be in the buffer if we're at the end of the stream
944 if (charsInBuffer > length || stream == null || (this.bufferEnd != this.streamBuffer.length && stream.available() == 0))
945 charsInBuffer = length;
946 while (i < charsInBuffer) {
947 byte b = this.streamBuffer[this.bufferIndex++];
948 switch (b & 0xF0) {
949 case 0x00 :
950 case 0x10 :
951 case 0x20 :
952 case 0x30 :
953 case 0x40 :
954 case 0x50 :
955 case 0x60 :
956 case 0x70 :
957 word[i++]= (char) b;
958 break;
959 case 0xC0 :
960 case 0xD0 :
961 char next = (char) this.streamBuffer[this.bufferIndex++];
962 if ((next & 0xC0) != 0x80) {
963 throw new UTFDataFormatException();
964 }
965 char ch = (char) ((b & 0x1F) << 6);
966 ch |= next & 0x3F;
967 word[i++] = ch;
968 break;
969 case 0xE0 :
970 char first = (char) this.streamBuffer[this.bufferIndex++];
971 char second = (char) this.streamBuffer[this.bufferIndex++];
972 if ((first & second & 0xC0) != 0x80) {
973 throw new UTFDataFormatException();
974 }
975 ch = (char) ((b & 0x0F) << 12);
976 ch |= ((first& 0x3F) << 6);
977 ch |= second & 0x3F;
978 word[i++] = ch;
979 break;
980 default:
981 throw new UTFDataFormatException();
982 }
983 }
984 if (i < length && stream != null)
985 readStreamBuffer(stream);
986 }
987 return word;
988 }
readStreamDocumentArray(InputStream stream, int arraySize)989 private int[] readStreamDocumentArray(InputStream stream, int arraySize) throws IOException {
990 int[] indexes = new int[arraySize];
991 if (arraySize == 0) return indexes;
992
993 int i = 0;
994 switch (this.documentReferenceSize) {
995 case 1 :
996 while (i < arraySize) {
997 // how many bytes without refilling the buffer?
998 int bytesInBuffer = i + this.bufferEnd - this.bufferIndex;
999 if (bytesInBuffer > arraySize)
1000 bytesInBuffer = arraySize;
1001 while (i < bytesInBuffer) {
1002 indexes[i++] = this.streamBuffer[this.bufferIndex++] & 0xFF;
1003 }
1004 if (i < arraySize && stream != null)
1005 readStreamBuffer(stream);
1006 }
1007 break;
1008 case 2 :
1009 while (i < arraySize) {
1010 // how many shorts without refilling the buffer?
1011 int shortsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 2);
1012 if (shortsInBuffer > arraySize)
1013 shortsInBuffer = arraySize;
1014 while (i < shortsInBuffer) {
1015 int val = (this.streamBuffer[this.bufferIndex++] & 0xFF) << 8;
1016 indexes[i++] = val + (this.streamBuffer[this.bufferIndex++] & 0xFF);
1017 }
1018 if (i < arraySize && stream != null)
1019 readStreamBuffer(stream);
1020 }
1021 break;
1022 default :
1023 while (i < arraySize) {
1024 indexes[i++] = readStreamInt(stream);
1025 }
1026 break;
1027 }
1028 return indexes;
1029 }
readStreamInt(InputStream stream)1030 private int readStreamInt(InputStream stream) throws IOException {
1031 if (this.bufferIndex + 4 >= this.bufferEnd) {
1032 readStreamBuffer(stream);
1033 }
1034 int val = (this.streamBuffer[this.bufferIndex++] & 0xFF) << 24;
1035 val += (this.streamBuffer[this.bufferIndex++] & 0xFF) << 16;
1036 val += (this.streamBuffer[this.bufferIndex++] & 0xFF) << 8;
1037 return val + (this.streamBuffer[this.bufferIndex++] & 0xFF);
1038 }
writeAllDocumentNames(String[] sortedDocNames, FileOutputStream stream)1039 private void writeAllDocumentNames(String[] sortedDocNames, FileOutputStream stream) throws IOException {
1040 if (sortedDocNames.length == 0)
1041 throw new IllegalArgumentException();
1042
1043 // assume the file was just created by initializeFrom()
1044 this.streamBuffer = new byte[BUFFER_WRITE_SIZE];
1045 this.bufferIndex = 0;
1046 this.streamEnd = 0;
1047
1048 // in order, write: SIGNATURE & headerInfoOffset place holder, then each compressed chunk of document names
1049 writeStreamChars(stream, SIGNATURE_CHARS);
1050 this.headerInfoOffset = this.streamEnd;
1051 writeStreamInt(stream, -1); // will overwrite with correct value later
1052
1053 int size = sortedDocNames.length;
1054 this.numberOfChunks = (size / CHUNK_SIZE) + 1;
1055 this.sizeOfLastChunk = size % CHUNK_SIZE;
1056 if (this.sizeOfLastChunk == 0) {
1057 this.numberOfChunks--;
1058 this.sizeOfLastChunk = CHUNK_SIZE;
1059 }
1060 this.documentReferenceSize = size <= 0x7F ? 1 : (size <= 0x7FFF ? 2 : 4); // number of bytes used to encode a reference
1061
1062 this.chunkOffsets = new int[this.numberOfChunks];
1063 int lastIndex = this.numberOfChunks - 1;
1064 for (int i = 0; i < this.numberOfChunks; i++) {
1065 this.chunkOffsets[i] = this.streamEnd;
1066
1067 int chunkSize = i == lastIndex ? this.sizeOfLastChunk : CHUNK_SIZE;
1068 int chunkIndex = i * CHUNK_SIZE;
1069 String current = sortedDocNames[chunkIndex];
1070 writeStreamChars(stream, current.toCharArray());
1071 for (int j = 1; j < chunkSize; j++) {
1072 String next = sortedDocNames[chunkIndex + j];
1073 int len1 = current.length();
1074 int len2 = next.length();
1075 int max = len1 < len2 ? len1 : len2;
1076 int start = 0; // number of identical characters at the beginning (also the index of first character that is different)
1077 while (current.charAt(start) == next.charAt(start)) {
1078 start++;
1079 if (max == start) break; // current is 'abba', next is 'abbab'
1080 }
1081 if (start > 255) start = 255;
1082
1083 int end = 0; // number of identical characters at the end
1084 while (current.charAt(--len1) == next.charAt(--len2)) {
1085 end++;
1086 if (len2 == start) break; // current is 'abbba', next is 'abba'
1087 if (len1 == 0) break; // current is 'xabc', next is 'xyabc'
1088 }
1089 if (end > 255) end = 255;
1090 if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE) {
1091 stream.write(this.streamBuffer, 0, this.bufferIndex);
1092 this.bufferIndex = 0;
1093 }
1094 this.streamBuffer[this.bufferIndex++] = (byte) start;
1095 this.streamBuffer[this.bufferIndex++] = (byte) end;
1096 this.streamEnd += 2;
1097
1098 int last = next.length() - end;
1099 writeStreamChars(stream, (start < last ? CharOperation.subarray(next.toCharArray(), start, last) : CharOperation.NO_CHAR));
1100 current = next;
1101 }
1102 }
1103 this.startOfCategoryTables = this.streamEnd + 1;
1104 }
writeCategories(FileOutputStream stream)1105 private void writeCategories(FileOutputStream stream) throws IOException {
1106 char[][] categoryNames = this.categoryTables.keyTable;
1107 Object[] tables = this.categoryTables.valueTable;
1108 for (int i = 0, l = categoryNames.length; i < l; i++)
1109 if (categoryNames[i] != null)
1110 writeCategoryTable(categoryNames[i], (HashtableOfObject) tables[i], stream);
1111 this.categoryTables = null;
1112 }
writeCategoryTable(char[] categoryName, HashtableOfObject wordsToDocs, FileOutputStream stream)1113 private void writeCategoryTable(char[] categoryName, HashtableOfObject wordsToDocs, FileOutputStream stream) throws IOException {
1114 // the format of a category table is as follows:
1115 // any document number arrays with >= 256 elements are written before the table (the offset to each array is remembered)
1116 // then the number of word->int[] pairs in the table is written
1117 // for each word -> int[] pair, the word is written followed by:
1118 // an int <= 0 if the array size == 1
1119 // an int > 1 & < 256 for the size of the array if its > 1 & < 256, the document array follows immediately
1120 // 256 if the array size >= 256 followed by another int which is the offset to the array (written prior to the table)
1121
1122 int largeArraySize = 256;
1123 Object[] values = wordsToDocs.valueTable;
1124 for (int i = 0, l = values.length; i < l; i++) {
1125 Object o = values[i];
1126 if (o != null) {
1127 if (o instanceof IntList)
1128 o = values[i] = ((IntList) values[i]).asArray();
1129 int[] documentNumbers = (int[]) o;
1130 if (documentNumbers.length >= largeArraySize) {
1131 values[i] = Integer.valueOf(this.streamEnd);
1132 writeDocumentNumbers(documentNumbers, stream);
1133 }
1134 }
1135 }
1136
1137 this.categoryOffsets.put(categoryName, this.streamEnd); // remember the offset to the start of the table
1138 this.categoryTables.put(categoryName, null); // flush cached table
1139 writeStreamInt(stream, wordsToDocs.elementSize);
1140 char[][] words = wordsToDocs.keyTable;
1141 for (int i = 0, l = words.length; i < l; i++) {
1142 Object o = values[i];
1143 if (o != null) {
1144 writeStreamChars(stream, words[i]);
1145 if (o instanceof int[]) {
1146 int[] documentNumbers = (int[]) o;
1147 if (documentNumbers.length == 1)
1148 writeStreamInt(stream, -documentNumbers[0]); // store an array of 1 element by negating the documentNumber (can be zero)
1149 else
1150 writeDocumentNumbers(documentNumbers, stream);
1151 } else {
1152 writeStreamInt(stream, largeArraySize); // mark to identify that an offset follows
1153 writeStreamInt(stream, ((Integer) o).intValue()); // offset in the file of the array of document numbers
1154 }
1155 }
1156 }
1157 }
writeDocumentNumbers(int[] documentNumbers, FileOutputStream stream)1158 private void writeDocumentNumbers(int[] documentNumbers, FileOutputStream stream) throws IOException {
1159 // must store length as a positive int to detect in-lined array of 1 element
1160 int length = documentNumbers.length;
1161 writeStreamInt(stream, length);
1162 Util.sort(documentNumbers);
1163 int start = 0;
1164 switch (this.documentReferenceSize) {
1165 case 1 :
1166 while ((this.bufferIndex + length - start) >= BUFFER_WRITE_SIZE) {
1167 // when documentNumbers is large, write BUFFER_WRITE_SIZE parts & fall thru to write the last part
1168 int bytesLeft = BUFFER_WRITE_SIZE - this.bufferIndex;
1169 for (int i=0; i < bytesLeft; i++) {
1170 this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
1171 }
1172 stream.write(this.streamBuffer, 0, this.bufferIndex);
1173 this.bufferIndex = 0;
1174 }
1175 while (start < length) {
1176 this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
1177 }
1178 this.streamEnd += length;
1179 break;
1180 case 2 :
1181 while ((this.bufferIndex + ((length - start) * 2)) >= BUFFER_WRITE_SIZE) {
1182 // when documentNumbers is large, write BUFFER_WRITE_SIZE parts & fall thru to write the last part
1183 int shortsLeft = (BUFFER_WRITE_SIZE - this.bufferIndex) / 2;
1184 for (int i=0; i < shortsLeft; i++) {
1185 this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8);
1186 this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
1187 }
1188 stream.write(this.streamBuffer, 0, this.bufferIndex);
1189 this.bufferIndex = 0;
1190 }
1191 while (start < length) {
1192 this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8);
1193 this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
1194 }
1195 this.streamEnd += length * 2;
1196 break;
1197 default :
1198 while (start < length) {
1199 writeStreamInt(stream, documentNumbers[start++]);
1200 }
1201 break;
1202 }
1203 }
writeHeaderInfo(FileOutputStream stream)1204 private void writeHeaderInfo(FileOutputStream stream) throws IOException {
1205 writeStreamInt(stream, this.numberOfChunks);
1206 if ((this.bufferIndex + 3) >= BUFFER_WRITE_SIZE) {
1207 stream.write(this.streamBuffer, 0, this.bufferIndex);
1208 this.bufferIndex = 0;
1209 }
1210 this.streamBuffer[this.bufferIndex++] = (byte) this.sizeOfLastChunk;
1211 this.streamBuffer[this.bufferIndex++] = (byte) this.documentReferenceSize;
1212 this.streamBuffer[this.bufferIndex++] = (byte) this.separator;
1213 this.streamEnd += 3;
1214
1215 // apend the file with chunk offsets
1216 for (int i = 0; i < this.numberOfChunks; i++) {
1217 writeStreamInt(stream, this.chunkOffsets[i]);
1218 }
1219
1220 writeStreamInt(stream, this.startOfCategoryTables);
1221
1222 // append the file with the category offsets... # of name -> offset pairs, followed by each name & an offset to its word->doc# table
1223 writeStreamInt(stream, this.categoryOffsets.elementSize);
1224 char[][] categoryNames = this.categoryOffsets.keyTable;
1225 int[] offsets = this.categoryOffsets.valueTable;
1226 for (int i = 0, l = categoryNames.length; i < l; i++) {
1227 if (categoryNames[i] != null) {
1228 writeStreamChars(stream, categoryNames[i]);
1229 writeStreamInt(stream, offsets[i]);
1230 }
1231 }
1232 // ensure buffer is written to the stream
1233 if (this.bufferIndex > 0) {
1234 stream.write(this.streamBuffer, 0, this.bufferIndex);
1235 this.bufferIndex = 0;
1236 }
1237 }
writeOffsetToHeader(int offsetToHeader)1238 private void writeOffsetToHeader(int offsetToHeader) throws IOException {
1239 if (offsetToHeader > 0) {
1240 RandomAccessFile file = new RandomAccessFile(this.indexLocation.getIndexFile(), "rw"); //$NON-NLS-1$
1241 try {
1242 file.seek(this.headerInfoOffset); // offset to position in header
1243 file.writeInt(offsetToHeader);
1244 this.headerInfoOffset = offsetToHeader; // update to reflect the correct offset
1245 } finally {
1246 file.close();
1247 }
1248 }
1249 }
1250 /**
1251 * Writes a string to the given output stream using UTF-8
1252 * encoding in a machine-independent manner.
1253 * <p>
1254 * First, two bytes of the array are giving the number of bytes to
1255 * follow. This value is the number of bytes actually written out,
1256 * not the length of the string. Following the length, each character
1257 * of the string is put in the bytes array, in sequence, using the UTF-8
1258 * encoding for the character.
1259 * </p>
1260 * <p>
1261 * Then the entire byte array is written to the output stream
1262 * using {@link OutputStream#write(byte[], int, int)} method.
1263 * </p>
1264 *
1265 * @param array char array to be written.
1266 * @exception IOException if an I/O error occurs while writting
1267 * the bytes array to the stream.
1268 */
writeStreamChars(FileOutputStream stream, char[] array)1269 private void writeStreamChars(FileOutputStream stream, char[] array) throws IOException {
1270 if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE) {
1271 stream.write(this.streamBuffer, 0, this.bufferIndex);
1272 this.bufferIndex = 0;
1273 }
1274 int length = array.length;
1275 this.streamBuffer[this.bufferIndex++] = (byte) ((length >>> 8) & 0xFF); // store chars array length instead of bytes
1276 this.streamBuffer[this.bufferIndex++] = (byte) (length & 0xFF); // this will allow to read it faster
1277 this.streamEnd += 2;
1278
1279 // we're assuming that very few char[] are so large that we need to flush the buffer more than once, if at all
1280 int totalBytesNeeded = length * 3;
1281 if (totalBytesNeeded <= BUFFER_WRITE_SIZE) {
1282 if (this.bufferIndex + totalBytesNeeded > BUFFER_WRITE_SIZE) {
1283 // flush the buffer now to make sure there is room for the array
1284 stream.write(this.streamBuffer, 0, this.bufferIndex);
1285 this.bufferIndex = 0;
1286 }
1287 writeStreamChars(stream, array, 0, length);
1288 } else {
1289 int charsPerWrite = BUFFER_WRITE_SIZE / 3;
1290 int start = 0;
1291 while (start < length) {
1292 stream.write(this.streamBuffer, 0, this.bufferIndex);
1293 this.bufferIndex = 0;
1294 int charsLeftToWrite = length - start;
1295 int end = start + (charsPerWrite < charsLeftToWrite ? charsPerWrite : charsLeftToWrite);
1296 writeStreamChars(stream, array, start, end);
1297 start = end;
1298 }
1299 }
1300 }
writeStreamChars(FileOutputStream stream, char[] array, int start, int end)1301 private void writeStreamChars(FileOutputStream stream, char[] array, int start, int end) throws IOException {
1302 // start can NOT be == end
1303 // must have checked that there is enough room for end - start * 3 bytes in the buffer
1304
1305 int oldIndex = this.bufferIndex;
1306 while (start < end) {
1307 int ch = array[start++];
1308 if ((ch & 0x007F) == ch) {
1309 this.streamBuffer[this.bufferIndex++] = (byte) ch;
1310 } else if ((ch & 0x07FF) == ch) {
1311 // first two bits are stored in first byte
1312 byte b = (byte) (ch >> 6);
1313 b &= 0x1F;
1314 b |= 0xC0;
1315 this.streamBuffer[this.bufferIndex++] = b;
1316 // last six bits are stored in second byte
1317 b = (byte) (ch & 0x3F);
1318 b |= 0x80;
1319 this.streamBuffer[this.bufferIndex++] = b;
1320 } else {
1321 // first four bits are stored in first byte
1322 byte b = (byte) (ch >> 12);
1323 b &= 0x0F;
1324 b |= 0xE0;
1325 this.streamBuffer[this.bufferIndex++] = b;
1326 // six following bits are stored in second byte
1327 b = (byte) (ch >> 6);
1328 b &= 0x3F;
1329 b |= 0x80;
1330 this.streamBuffer[this.bufferIndex++] = b;
1331 // last six bits are stored in third byte
1332 b = (byte) (ch & 0x3F);
1333 b |= 0x80;
1334 this.streamBuffer[this.bufferIndex++] = b;
1335 }
1336 }
1337 this.streamEnd += this.bufferIndex - oldIndex;
1338 }
writeStreamInt(FileOutputStream stream, int val)1339 private void writeStreamInt(FileOutputStream stream, int val) throws IOException {
1340 if ((this.bufferIndex + 4) >= BUFFER_WRITE_SIZE) {
1341 stream.write(this.streamBuffer, 0, this.bufferIndex);
1342 this.bufferIndex = 0;
1343 }
1344 this.streamBuffer[this.bufferIndex++] = (byte) (val >> 24);
1345 this.streamBuffer[this.bufferIndex++] = (byte) (val >> 16);
1346 this.streamBuffer[this.bufferIndex++] = (byte) (val >> 8);
1347 this.streamBuffer[this.bufferIndex++] = (byte) val;
1348 this.streamEnd += 4;
1349 }
1350 }
1351