1 /* 2 * This file is part of ELKI: 3 * Environment for Developing KDD-Applications Supported by Index-Structures 4 * 5 * Copyright (C) 2018 6 * ELKI Development Team 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Affero General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Affero General Public License for more details. 17 * 18 * You should have received a copy of the GNU Affero General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 package de.lmu.ifi.dbs.elki.persistent; 22 23 import java.io.File; 24 import java.io.IOException; 25 import java.io.RandomAccessFile; 26 import java.nio.ByteBuffer; 27 import java.nio.MappedByteBuffer; 28 import java.nio.channels.FileChannel.MapMode; 29 import java.nio.channels.FileLock; 30 31 import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil; 32 33 /** 34 * On Disc Array storage for records of a given size. 35 * 36 * This can be used to implement various fixed size record-based data 37 * structures. The file format is designed to have a fixed-size header followed 38 * by the actual data. 39 * 40 * @author Erich Schubert 41 * @since 0.2 42 * 43 * @composed - - - RandomAccessFile 44 */ 45 // TODO: ensure file doesn't become to big - check for overflows in recordsize * 46 // numrecs + headersize 47 public class OnDiskArray implements AutoCloseable { 48 /** 49 * Serial version. 50 * 51 * NOTE: Change this version whenever the file structure is changed in an 52 * incompatible way: This will modify the file magic, and thus prevent 53 * applications from reading incompatible files. 54 */ 55 private static final long serialVersionUID = 7586497243452875056L; 56 57 /** 58 * Magic number used to identify files. 59 */ 60 protected int magic; 61 62 /** 63 * Size of the header in the file. Note that the internal header is four 64 * integers already. 65 */ 66 private int headersize; 67 68 /** 69 * Size of the records in the file. 70 */ 71 private int recordsize; 72 73 /** 74 * Number of records in the file. 75 */ 76 private int numrecs; 77 78 /** 79 * File name. 80 */ 81 private File filename; 82 83 /** 84 * Random Access File object. 85 */ 86 final private RandomAccessFile file; 87 88 /** 89 * Lock for the file that will be kept while writing. 90 */ 91 private FileLock lock = null; 92 93 /** 94 * Writable or read-only object. 95 */ 96 private boolean writable; 97 98 /** 99 * The memory mapped buffer. 100 */ 101 private MappedByteBuffer map; 102 103 /** 104 * Size of the classes header size. 105 */ 106 private static final int INTERNAL_HEADER_SIZE = 4 * ByteArrayUtil.SIZE_INT; 107 108 /** 109 * Position of file size (in records). 110 */ 111 private static final int HEADER_POS_SIZE = 3 * ByteArrayUtil.SIZE_INT; 112 113 /** 114 * Constructor to write a new file. 115 * 116 * @param filename File name to be opened. 117 * @param magicseed Magic number to derive real magic from. 118 * @param extraheadersize header size NOT including the internal header 119 * @param recordsize Record size 120 * @param initialsize Initial file size (in records) 121 * @throws IOException on IO errors 122 */ OnDiskArray(File filename, int magicseed, int extraheadersize, int recordsize, int initialsize)123 public OnDiskArray(File filename, int magicseed, int extraheadersize, int recordsize, int initialsize) throws IOException { 124 this.magic = mixMagic((int) serialVersionUID, magicseed); 125 this.headersize = extraheadersize + INTERNAL_HEADER_SIZE; 126 this.recordsize = recordsize; 127 this.filename = filename; 128 this.writable = true; 129 130 // do not allow overwriting, unless empty 131 if (filename.exists() && filename.length() > 0) { 132 throw new IOException("File already exists"); 133 } 134 135 // open file. 136 file = new RandomAccessFile(filename, "rw"); 137 // and acquire a file write lock 138 lock = file.getChannel().lock(); 139 140 // write magic header 141 file.writeInt(this.magic); 142 143 // write header size 144 file.writeInt(this.headersize); 145 146 // write size of a single record 147 file.writeInt(this.recordsize); 148 149 // write number of records 150 // verify position. 151 if (file.getFilePointer() != HEADER_POS_SIZE) { 152 // TODO: more appropriate exception class? 153 throw new IOException("File position doesn't match when writing file size."); 154 } 155 file.writeInt(initialsize); 156 157 // we should have written the complete internal header now. 158 if (file.getFilePointer() != INTERNAL_HEADER_SIZE) { 159 // TODO: more appropriate exception class? 160 throw new IOException("File position doesn't match header size after writing header."); 161 } 162 // resize file 163 resizeFile(initialsize); 164 165 // map array 166 mapArray(); 167 } 168 169 /** 170 * Constructor to open an existing file. The provided record size must match 171 * the record size stored within the files header. If you don't know this size 172 * yet and/or need to access the extra header you should use the other 173 * constructor below 174 * 175 * @param filename File name to be opened. 176 * @param magicseed Magic number to derive real magic from. 177 * @param extraheadersize header size NOT including the internal header 178 * @param recordsize Record size 179 * @param writable flag to open the file writable 180 * @throws IOException on IO errors 181 */ OnDiskArray(File filename, int magicseed, int extraheadersize, int recordsize, boolean writable)182 public OnDiskArray(File filename, int magicseed, int extraheadersize, int recordsize, boolean writable) throws IOException { 183 this.magic = mixMagic((int) serialVersionUID, magicseed); 184 this.headersize = extraheadersize + INTERNAL_HEADER_SIZE; 185 this.recordsize = recordsize; 186 this.filename = filename; 187 this.writable = writable; 188 189 String mode = writable ? "rw" : "r"; 190 191 file = new RandomAccessFile(filename, mode); 192 if (writable) { 193 // acquire a file write lock 194 lock = file.getChannel().lock(); 195 } 196 197 validateHeader(true); 198 mapArray(); 199 } 200 201 /** 202 * Constructor to open an existing file. The record size is read from the 203 * file's header and can be obtained by <code>getRecordsize()</code> 204 * 205 * @param filename File name to be opened. 206 * @param magicseed Magic number to derive real magic from. 207 * @param extraheadersize header size NOT including the internal header 208 * @param writable flag to open the file writable 209 * @throws IOException on IO errors 210 */ OnDiskArray(File filename, int magicseed, int extraheadersize, boolean writable)211 public OnDiskArray(File filename, int magicseed, int extraheadersize, boolean writable) throws IOException { 212 this.magic = mixMagic((int) serialVersionUID, magicseed); 213 this.headersize = extraheadersize + INTERNAL_HEADER_SIZE; 214 this.filename = filename; 215 this.writable = writable; 216 217 String mode = writable ? "rw" : "r"; 218 219 file = new RandomAccessFile(filename, mode); 220 if (writable) { 221 // acquire a file write lock 222 lock = file.getChannel().lock(); 223 } 224 225 validateHeader(false); 226 mapArray(); 227 } 228 229 /** 230 * (Re-) map the data array. 231 * 232 * @throws IOException on mapping error. 233 */ mapArray()234 private synchronized void mapArray() throws IOException { 235 if (map != null) { 236 ByteArrayUtil.unmapByteBuffer(map); 237 map = null; 238 } 239 MapMode mode = writable ? MapMode.READ_WRITE : MapMode.READ_ONLY; 240 map = file.getChannel().map(mode, headersize, recordsize * numrecs); 241 } 242 243 /** 244 * Validates the header and throws an IOException if the header is invalid. If 245 * validateRecordSize is set to true the record size must match exactly the 246 * stored record size within the files header, else the record size is read 247 * from the header and used. 248 * 249 * @param validateRecordSize 250 * @throws IOException 251 */ validateHeader(boolean validateRecordSize)252 private void validateHeader(boolean validateRecordSize) throws IOException { 253 int readmagic = file.readInt(); 254 // Validate magic number 255 if (readmagic != this.magic) { 256 file.close(); 257 throw new IOException("Magic in LinearDiskCache does not match: " + readmagic + " instead of " + this.magic); 258 } 259 // Validate header size 260 if (file.readInt() != this.headersize) { 261 file.close(); 262 throw new IOException("Header size in LinearDiskCache does not match."); 263 } 264 265 if (validateRecordSize) { 266 // Validate record size 267 if (file.readInt() != this.recordsize) { 268 file.close(); 269 throw new IOException("Recordsize in LinearDiskCache does not match."); 270 } 271 } else { 272 // or just read it from file 273 this.recordsize = file.readInt(); 274 } 275 276 // read the number of records and validate with file size. 277 if (file.getFilePointer() != HEADER_POS_SIZE) { 278 throw new IOException("Incorrect file position when reading header."); 279 } 280 this.numrecs = file.readInt(); 281 if (numrecs < 0 || file.length() != indexToFileposition(numrecs)) { 282 throw new IOException("File size and number of records do not agree."); 283 } 284 // yet another sanity check. We should have read all of our internal header 285 // now. 286 if (file.getFilePointer() != INTERNAL_HEADER_SIZE) { 287 throw new IOException("Incorrect file position after reading header."); 288 } 289 } 290 291 /** 292 * Mix two magic numbers into one, to obtain a combined magic. Note: 293 * mixMagic(a,b) != mixMagic(b,a) usually. 294 * 295 * @param magic1 Magic number to mix. 296 * @param magic2 Magic number to mix. 297 * @return Mixed magic number. 298 */ mixMagic(int magic1, int magic2)299 public static final int mixMagic(int magic1, int magic2) { 300 final long prime = 2654435761L; 301 long result = 1; 302 result = prime * result + magic1; 303 result = prime * result + magic2; 304 return (int) result; 305 } 306 307 /** 308 * Compute file position from index number 309 * 310 * @param index Index offset 311 * @return file position 312 */ indexToFileposition(long index)313 private long indexToFileposition(long index) { 314 long pos = headersize + index * recordsize; 315 return pos; 316 } 317 318 /** 319 * Resize file to the intended size 320 * 321 * @param newsize New file size. 322 * @throws IOException on IO errors 323 */ resizeFile(int newsize)324 public synchronized void resizeFile(int newsize) throws IOException { 325 if (!writable) { 326 throw new IOException("File is not writeable!"); 327 } 328 // update the number of records 329 this.numrecs = newsize; 330 file.seek(HEADER_POS_SIZE); 331 file.writeInt(numrecs); 332 333 // resize file 334 file.setLength(indexToFileposition(numrecs)); 335 mapArray(); 336 } 337 338 /** 339 * Get a record buffer 340 * 341 * @param index Record index 342 * @return Byte buffer for the record 343 * @throws IOException on IO errors 344 */ getRecordBuffer(int index)345 public synchronized ByteBuffer getRecordBuffer(int index) throws IOException { 346 if (index < 0 || index >= numrecs) { 347 throw new IOException("Access beyond end of file."); 348 } 349 // Adjust buffer view 350 synchronized (map) { 351 map.limit(recordsize * (index + 1)); 352 map.position(recordsize * index); 353 return map.slice(); 354 } 355 } 356 357 /** 358 * Return the size of the extra header. Accessor. 359 * 360 * @return Extra header size 361 */ getExtraHeaderSize()362 protected int getExtraHeaderSize() { 363 return headersize - INTERNAL_HEADER_SIZE; 364 } 365 366 /** 367 * Read the extra header data. 368 * 369 * @return additional header data 370 * @throws IOException on IO errors 371 */ getExtraHeader()372 public synchronized ByteBuffer getExtraHeader() throws IOException { 373 final int size = headersize - INTERNAL_HEADER_SIZE; 374 final MapMode mode = writable ? MapMode.READ_WRITE : MapMode.READ_ONLY; 375 return file.getChannel().map(mode, INTERNAL_HEADER_SIZE, size); 376 } 377 378 /** 379 * Get the size of a single record. 380 * 381 * @return Record size. 382 */ getRecordsize()383 protected int getRecordsize() { 384 return recordsize; 385 } 386 387 /** 388 * Get the file name. 389 * 390 * @return File name 391 */ getFilename()392 public File getFilename() { 393 return filename; 394 } 395 396 /** 397 * Check if the file is writable. 398 * 399 * @return true if the file is writable. 400 */ isWritable()401 public boolean isWritable() { 402 return writable; 403 } 404 405 /** 406 * Explicitly close the file. Note: following operations will likely cause 407 * IOExceptions. 408 * 409 * @throws IOException on IO errors 410 */ close()411 public synchronized void close() throws IOException { 412 writable = false; 413 if (map != null) { 414 ByteArrayUtil.unmapByteBuffer(map); 415 map = null; 416 } 417 if (lock != null) { 418 lock.release(); 419 lock = null; 420 } 421 file.close(); 422 } 423 424 /** 425 * Get number of records in file. 426 * 427 * @return Number of records in the file. 428 */ getNumRecords()429 public int getNumRecords() { 430 return numrecs; 431 } 432 433 /** 434 * Ensure that the file can fit the given number of records. 435 * 436 * @param size Size 437 * @throws IOException 438 */ ensureSize(int size)439 public void ensureSize(int size) throws IOException { 440 if (size > getNumRecords()) { 441 resizeFile(size); 442 } 443 } 444 } 445