1 /*
2  * This file is part of ELKI:
3  * Environment for Developing KDD-Applications Supported by Index-Structures
4  *
5  * Copyright (C) 2018
6  * ELKI Development Team
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Affero General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
20  */
21 package de.lmu.ifi.dbs.elki.persistent;
22 
23 import java.io.File;
24 import java.io.IOException;
25 import java.io.RandomAccessFile;
26 import java.nio.ByteBuffer;
27 import java.nio.MappedByteBuffer;
28 import java.nio.channels.FileChannel.MapMode;
29 import java.nio.channels.FileLock;
30 
31 import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil;
32 
33 /**
34  * On Disc Array storage for records of a given size.
35  *
36  * This can be used to implement various fixed size record-based data
37  * structures. The file format is designed to have a fixed-size header followed
38  * by the actual data.
39  *
40  * @author Erich Schubert
41  * @since 0.2
42  *
43  * @composed - - - RandomAccessFile
44  */
45 // TODO: ensure file doesn't become to big - check for overflows in recordsize *
46 // numrecs + headersize
47 public class OnDiskArray implements AutoCloseable {
48   /**
49    * Serial version.
50    *
51    * NOTE: Change this version whenever the file structure is changed in an
52    * incompatible way: This will modify the file magic, and thus prevent
53    * applications from reading incompatible files.
54    */
55   private static final long serialVersionUID = 7586497243452875056L;
56 
57   /**
58    * Magic number used to identify files.
59    */
60   protected int magic;
61 
62   /**
63    * Size of the header in the file. Note that the internal header is four
64    * integers already.
65    */
66   private int headersize;
67 
68   /**
69    * Size of the records in the file.
70    */
71   private int recordsize;
72 
73   /**
74    * Number of records in the file.
75    */
76   private int numrecs;
77 
78   /**
79    * File name.
80    */
81   private File filename;
82 
83   /**
84    * Random Access File object.
85    */
86   final private RandomAccessFile file;
87 
88   /**
89    * Lock for the file that will be kept while writing.
90    */
91   private FileLock lock = null;
92 
93   /**
94    * Writable or read-only object.
95    */
96   private boolean writable;
97 
98   /**
99    * The memory mapped buffer.
100    */
101   private MappedByteBuffer map;
102 
103   /**
104    * Size of the classes header size.
105    */
106   private static final int INTERNAL_HEADER_SIZE = 4 * ByteArrayUtil.SIZE_INT;
107 
108   /**
109    * Position of file size (in records).
110    */
111   private static final int HEADER_POS_SIZE = 3 * ByteArrayUtil.SIZE_INT;
112 
113   /**
114    * Constructor to write a new file.
115    *
116    * @param filename File name to be opened.
117    * @param magicseed Magic number to derive real magic from.
118    * @param extraheadersize header size NOT including the internal header
119    * @param recordsize Record size
120    * @param initialsize Initial file size (in records)
121    * @throws IOException on IO errors
122    */
OnDiskArray(File filename, int magicseed, int extraheadersize, int recordsize, int initialsize)123   public OnDiskArray(File filename, int magicseed, int extraheadersize, int recordsize, int initialsize) throws IOException {
124     this.magic = mixMagic((int) serialVersionUID, magicseed);
125     this.headersize = extraheadersize + INTERNAL_HEADER_SIZE;
126     this.recordsize = recordsize;
127     this.filename = filename;
128     this.writable = true;
129 
130     // do not allow overwriting, unless empty
131     if (filename.exists() && filename.length() > 0) {
132       throw new IOException("File already exists");
133     }
134 
135     // open file.
136     file = new RandomAccessFile(filename, "rw");
137     // and acquire a file write lock
138     lock = file.getChannel().lock();
139 
140     // write magic header
141     file.writeInt(this.magic);
142 
143     // write header size
144     file.writeInt(this.headersize);
145 
146     // write size of a single record
147     file.writeInt(this.recordsize);
148 
149     // write number of records
150     // verify position.
151     if (file.getFilePointer() != HEADER_POS_SIZE) {
152       // TODO: more appropriate exception class?
153       throw new IOException("File position doesn't match when writing file size.");
154     }
155     file.writeInt(initialsize);
156 
157     // we should have written the complete internal header now.
158     if (file.getFilePointer() != INTERNAL_HEADER_SIZE) {
159       // TODO: more appropriate exception class?
160       throw new IOException("File position doesn't match header size after writing header.");
161     }
162     // resize file
163     resizeFile(initialsize);
164 
165     // map array
166     mapArray();
167   }
168 
169   /**
170    * Constructor to open an existing file. The provided record size must match
171    * the record size stored within the files header. If you don't know this size
172    * yet and/or need to access the extra header you should use the other
173    * constructor below
174    *
175    * @param filename File name to be opened.
176    * @param magicseed Magic number to derive real magic from.
177    * @param extraheadersize header size NOT including the internal header
178    * @param recordsize Record size
179    * @param writable flag to open the file writable
180    * @throws IOException on IO errors
181    */
OnDiskArray(File filename, int magicseed, int extraheadersize, int recordsize, boolean writable)182   public OnDiskArray(File filename, int magicseed, int extraheadersize, int recordsize, boolean writable) throws IOException {
183     this.magic = mixMagic((int) serialVersionUID, magicseed);
184     this.headersize = extraheadersize + INTERNAL_HEADER_SIZE;
185     this.recordsize = recordsize;
186     this.filename = filename;
187     this.writable = writable;
188 
189     String mode = writable ? "rw" : "r";
190 
191     file = new RandomAccessFile(filename, mode);
192     if (writable) {
193       // acquire a file write lock
194       lock = file.getChannel().lock();
195     }
196 
197     validateHeader(true);
198     mapArray();
199   }
200 
201   /**
202    * Constructor to open an existing file. The record size is read from the
203    * file's header and can be obtained by <code>getRecordsize()</code>
204    *
205    * @param filename File name to be opened.
206    * @param magicseed Magic number to derive real magic from.
207    * @param extraheadersize header size NOT including the internal header
208    * @param writable flag to open the file writable
209    * @throws IOException on IO errors
210    */
OnDiskArray(File filename, int magicseed, int extraheadersize, boolean writable)211   public OnDiskArray(File filename, int magicseed, int extraheadersize, boolean writable) throws IOException {
212     this.magic = mixMagic((int) serialVersionUID, magicseed);
213     this.headersize = extraheadersize + INTERNAL_HEADER_SIZE;
214     this.filename = filename;
215     this.writable = writable;
216 
217     String mode = writable ? "rw" : "r";
218 
219     file = new RandomAccessFile(filename, mode);
220     if (writable) {
221       // acquire a file write lock
222       lock = file.getChannel().lock();
223     }
224 
225     validateHeader(false);
226     mapArray();
227   }
228 
229   /**
230    * (Re-) map the data array.
231    *
232    * @throws IOException on mapping error.
233    */
mapArray()234   private synchronized void mapArray() throws IOException {
235     if (map != null) {
236       ByteArrayUtil.unmapByteBuffer(map);
237       map = null;
238     }
239     MapMode mode = writable ? MapMode.READ_WRITE : MapMode.READ_ONLY;
240     map = file.getChannel().map(mode, headersize, recordsize * numrecs);
241   }
242 
243   /**
244    * Validates the header and throws an IOException if the header is invalid. If
245    * validateRecordSize is set to true the record size must match exactly the
246    * stored record size within the files header, else the record size is read
247    * from the header and used.
248    *
249    * @param validateRecordSize
250    * @throws IOException
251    */
validateHeader(boolean validateRecordSize)252   private void validateHeader(boolean validateRecordSize) throws IOException {
253     int readmagic = file.readInt();
254     // Validate magic number
255     if (readmagic != this.magic) {
256       file.close();
257       throw new IOException("Magic in LinearDiskCache does not match: " + readmagic + " instead of " + this.magic);
258     }
259     // Validate header size
260     if (file.readInt() != this.headersize) {
261       file.close();
262       throw new IOException("Header size in LinearDiskCache does not match.");
263     }
264 
265     if (validateRecordSize) {
266       // Validate record size
267       if (file.readInt() != this.recordsize) {
268         file.close();
269         throw new IOException("Recordsize in LinearDiskCache does not match.");
270       }
271     } else {
272       // or just read it from file
273       this.recordsize = file.readInt();
274     }
275 
276     // read the number of records and validate with file size.
277     if (file.getFilePointer() != HEADER_POS_SIZE) {
278       throw new IOException("Incorrect file position when reading header.");
279     }
280     this.numrecs = file.readInt();
281     if (numrecs < 0 || file.length() != indexToFileposition(numrecs)) {
282       throw new IOException("File size and number of records do not agree.");
283     }
284     // yet another sanity check. We should have read all of our internal header
285     // now.
286     if (file.getFilePointer() != INTERNAL_HEADER_SIZE) {
287       throw new IOException("Incorrect file position after reading header.");
288     }
289   }
290 
291   /**
292    * Mix two magic numbers into one, to obtain a combined magic. Note:
293    * mixMagic(a,b) != mixMagic(b,a) usually.
294    *
295    * @param magic1 Magic number to mix.
296    * @param magic2 Magic number to mix.
297    * @return Mixed magic number.
298    */
mixMagic(int magic1, int magic2)299   public static final int mixMagic(int magic1, int magic2) {
300     final long prime = 2654435761L;
301     long result = 1;
302     result = prime * result + magic1;
303     result = prime * result + magic2;
304     return (int) result;
305   }
306 
307   /**
308    * Compute file position from index number
309    *
310    * @param index Index offset
311    * @return file position
312    */
indexToFileposition(long index)313   private long indexToFileposition(long index) {
314     long pos = headersize + index * recordsize;
315     return pos;
316   }
317 
318   /**
319    * Resize file to the intended size
320    *
321    * @param newsize New file size.
322    * @throws IOException on IO errors
323    */
resizeFile(int newsize)324   public synchronized void resizeFile(int newsize) throws IOException {
325     if (!writable) {
326       throw new IOException("File is not writeable!");
327     }
328     // update the number of records
329     this.numrecs = newsize;
330     file.seek(HEADER_POS_SIZE);
331     file.writeInt(numrecs);
332 
333     // resize file
334     file.setLength(indexToFileposition(numrecs));
335     mapArray();
336   }
337 
338   /**
339    * Get a record buffer
340    *
341    * @param index Record index
342    * @return Byte buffer for the record
343    * @throws IOException on IO errors
344    */
getRecordBuffer(int index)345   public synchronized ByteBuffer getRecordBuffer(int index) throws IOException {
346     if (index < 0 || index >= numrecs) {
347       throw new IOException("Access beyond end of file.");
348     }
349     // Adjust buffer view
350     synchronized (map) {
351       map.limit(recordsize * (index + 1));
352       map.position(recordsize * index);
353       return map.slice();
354     }
355   }
356 
357   /**
358    * Return the size of the extra header. Accessor.
359    *
360    * @return Extra header size
361    */
getExtraHeaderSize()362   protected int getExtraHeaderSize() {
363     return headersize - INTERNAL_HEADER_SIZE;
364   }
365 
366   /**
367    * Read the extra header data.
368    *
369    * @return additional header data
370    * @throws IOException on IO errors
371    */
getExtraHeader()372   public synchronized ByteBuffer getExtraHeader() throws IOException {
373     final int size = headersize - INTERNAL_HEADER_SIZE;
374     final MapMode mode = writable ? MapMode.READ_WRITE : MapMode.READ_ONLY;
375     return file.getChannel().map(mode, INTERNAL_HEADER_SIZE, size);
376   }
377 
378   /**
379    * Get the size of a single record.
380    *
381    * @return Record size.
382    */
getRecordsize()383   protected int getRecordsize() {
384     return recordsize;
385   }
386 
387   /**
388    * Get the file name.
389    *
390    * @return File name
391    */
getFilename()392   public File getFilename() {
393     return filename;
394   }
395 
396   /**
397    * Check if the file is writable.
398    *
399    * @return true if the file is writable.
400    */
isWritable()401   public boolean isWritable() {
402     return writable;
403   }
404 
405   /**
406    * Explicitly close the file. Note: following operations will likely cause
407    * IOExceptions.
408    *
409    * @throws IOException on IO errors
410    */
close()411   public synchronized void close() throws IOException {
412     writable = false;
413     if (map != null) {
414       ByteArrayUtil.unmapByteBuffer(map);
415       map = null;
416     }
417     if (lock != null) {
418       lock.release();
419       lock = null;
420     }
421     file.close();
422   }
423 
424   /**
425    * Get number of records in file.
426    *
427    * @return Number of records in the file.
428    */
getNumRecords()429   public int getNumRecords() {
430     return numrecs;
431   }
432 
433   /**
434    * Ensure that the file can fit the given number of records.
435    *
436    * @param size Size
437    * @throws IOException
438    */
ensureSize(int size)439   public void ensureSize(int size) throws IOException {
440     if (size > getNumRecords()) {
441       resizeFile(size);
442     }
443   }
444 }
445