1 /////////////////////////////////////////////////////////////////////////////// 2 // Copyright (c) 2001, Eric D. Friedman All Rights Reserved. 3 // Copyright (c) 2009, Rob Eden All Rights Reserved. 4 // Copyright (c) 2009, Jeff Randall All Rights Reserved. 5 // 6 // This library is free software; you can redistribute it and/or 7 // modify it under the terms of the GNU Lesser General Public 8 // License as published by the Free Software Foundation; either 9 // version 2.1 of the License, or (at your option) any later version. 10 // 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 // 16 // You should have received a copy of the GNU Lesser General Public 17 // License along with this program; if not, write to the Free Software 18 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 19 /////////////////////////////////////////////////////////////////////////////// 20 21 package gnu.trove.impl.hash; 22 23 import gnu.trove.procedure.TObjectProcedure; 24 25 import java.io.IOException; 26 import java.io.ObjectInput; 27 import java.io.ObjectOutput; 28 import java.util.Arrays; 29 import java.util.HashSet; 30 import java.util.Set; 31 32 33 /** 34 * An open addressed hashing implementation for Object types. 35 * <p/> 36 * Created: Sun Nov 4 08:56:06 2001 37 * 38 * @author Eric D. Friedman 39 * @author Rob Eden 40 * @author Jeff Randall 41 * @version $Id: TObjectHash.java,v 1.1.2.6 2009/11/07 03:36:44 robeden Exp $ 42 */ 43 abstract public class TObjectHash<T> extends THash { 44 45 @SuppressWarnings({"UnusedDeclaration"}) 46 static final long serialVersionUID = -3461112548087185871L; 47 48 49 /** 50 * the set of Objects 51 */ 52 public transient Object[] _set; 53 54 public static final Object REMOVED = new Object(), FREE = new Object(); 55 56 /** 57 * Indicates whether the last insertKey() call used a FREE slot. This field 58 * should be inspected right after call insertKey() 59 */ 60 protected boolean consumeFreeSlot; 61 62 63 /** 64 * Creates a new <code>TObjectHash</code> instance with the 65 * default capacity and load factor. 66 */ TObjectHash()67 public TObjectHash() { 68 super(); 69 } 70 71 72 /** 73 * Creates a new <code>TObjectHash</code> instance whose capacity 74 * is the next highest prime above <tt>initialCapacity + 1</tt> 75 * unless that value is already prime. 76 * 77 * @param initialCapacity an <code>int</code> value 78 */ TObjectHash(int initialCapacity)79 public TObjectHash(int initialCapacity) { 80 super(initialCapacity); 81 } 82 83 84 /** 85 * Creates a new <code>TObjectHash</code> instance with a prime 86 * value at or near the specified capacity and load factor. 87 * 88 * @param initialCapacity used to find a prime capacity for the table. 89 * @param loadFactor used to calculate the threshold over which 90 * rehashing takes place. 91 */ TObjectHash(int initialCapacity, float loadFactor)92 public TObjectHash(int initialCapacity, float loadFactor) { 93 super(initialCapacity, loadFactor); 94 } 95 96 capacity()97 public int capacity() { 98 return _set.length; 99 } 100 101 removeAt(int index)102 protected void removeAt(int index) { 103 _set[index] = REMOVED; 104 super.removeAt(index); 105 } 106 107 108 /** 109 * initializes the Object set of this hash table. 110 * 111 * @param initialCapacity an <code>int</code> value 112 * @return an <code>int</code> value 113 */ setUp(int initialCapacity)114 public int setUp(int initialCapacity) { 115 int capacity; 116 117 capacity = super.setUp(initialCapacity); 118 _set = new Object[capacity]; 119 Arrays.fill(_set, FREE); 120 return capacity; 121 } 122 123 124 /** 125 * Executes <tt>procedure</tt> for each element in the set. 126 * 127 * @param procedure a <code>TObjectProcedure</code> value 128 * @return false if the loop over the set terminated because 129 * the procedure returned false for some value. 130 */ 131 @SuppressWarnings({"unchecked"}) forEach(TObjectProcedure<? super T> procedure)132 public boolean forEach(TObjectProcedure<? super T> procedure) { 133 Object[] set = _set; 134 for (int i = set.length; i-- > 0;) { 135 if (set[i] != FREE 136 && set[i] != REMOVED 137 && !procedure.execute((T) set[i])) { 138 return false; 139 } 140 } 141 return true; 142 } 143 144 145 /** 146 * Searches the set for <tt>obj</tt> 147 * 148 * @param obj an <code>Object</code> value 149 * @return a <code>boolean</code> value 150 */ 151 @SuppressWarnings({"unchecked"}) contains(Object obj)152 public boolean contains(Object obj) { 153 return index(obj) >= 0; 154 } 155 156 157 /** 158 * Locates the index of <tt>obj</tt>. 159 * 160 * @param obj an <code>Object</code> value 161 * @return the index of <tt>obj</tt> or -1 if it isn't in the set. 162 */ index(Object obj)163 protected int index(Object obj) { 164 if (obj == null) 165 return indexForNull(); 166 167 // From here on we know obj to be non-null 168 final int hash = hash(obj) & 0x7fffffff; 169 int index = hash % _set.length; 170 Object cur = _set[index]; 171 172 173 if (cur == FREE) { 174 return -1; 175 } 176 177 if (cur == obj || equals(obj, cur)) { 178 return index; 179 } 180 181 return indexRehashed(obj, index, hash, cur); 182 } 183 184 /** 185 * Locates the index of non-null <tt>obj</tt>. 186 * 187 * @param obj target key, know to be non-null 188 * @param index we start from 189 * @param hash 190 * @param cur 191 * @return 192 */ indexRehashed(Object obj, int index, int hash, Object cur)193 private int indexRehashed(Object obj, int index, int hash, Object cur) { 194 final Object[] set = _set; 195 final int length = set.length; 196 197 // NOTE: here it has to be REMOVED or FULL (some user-given value) 198 // see Knuth, p. 529 199 int probe = 1 + (hash % (length - 2)); 200 201 final int loopIndex = index; 202 203 do { 204 index -= probe; 205 if (index < 0) { 206 index += length; 207 } 208 cur = set[index]; 209 // 210 if (cur == FREE) 211 return -1; 212 213 // 214 if ((cur == obj || equals(obj, cur))) 215 return index; 216 } while (index != loopIndex); 217 218 return -1; 219 } 220 221 /** 222 * Locates the index <tt>null</tt>. 223 * <p/> 224 * null specific loop exploiting several properties to simplify the iteration logic 225 * - the null value hashes to 0 we so we can iterate from the beginning. 226 * - the probe value is 1 for this case 227 * - object identity can be used to match this case 228 * <p/> 229 * --> this result a simpler loop 230 * 231 * @return 232 */ indexForNull()233 private int indexForNull() { 234 int index = 0; 235 for (Object o : _set) { 236 if (o == null) 237 return index; 238 239 if (o == FREE) 240 return -1; 241 242 index++; 243 } 244 245 return -1; 246 } 247 248 /** 249 * Alias introduced to avoid breaking the API. The new method name insertKey() reflects the 250 * changes made to the logic. 251 * 252 * @param obj 253 * @return 254 * @deprecated use {@link #insertKey} instead 255 */ 256 @Deprecated insertionIndex(T obj)257 protected int insertionIndex(T obj) { 258 return insertKey(obj); 259 } 260 261 /** 262 * Locates the index at which <tt>key</tt> can be inserted. if 263 * there is already a value equal()ing <tt>key</tt> in the set, 264 * returns that value's index as <tt>-index - 1</tt>. 265 * <p/> 266 * If a slot is found the value is inserted. When a FREE slot is used the consumeFreeSlot field is 267 * set to true. This field should be used in the method invoking insertKey() to pass to postInsertHook() 268 * 269 * @param key an <code>Object</code> value 270 * @return the index of a FREE slot at which key can be inserted 271 * or, if key is already stored in the hash, the negative value of 272 * that index, minus 1: -index -1. 273 */ insertKey(T key)274 protected int insertKey(T key) { 275 consumeFreeSlot = false; 276 277 if (key == null) 278 return insertKeyForNull(); 279 280 final int hash = hash(key) & 0x7fffffff; 281 int index = hash % _set.length; 282 Object cur = _set[index]; 283 284 if (cur == FREE) { 285 consumeFreeSlot = true; 286 _set[index] = key; // insert value 287 return index; // empty, all done 288 } 289 290 if (cur == key || equals(key, cur)) { 291 return -index - 1; // already stored 292 } 293 294 return insertKeyRehash(key, index, hash, cur); 295 } 296 297 /** 298 * Looks for a slot using double hashing for a non-null key values and inserts the value 299 * in the slot 300 * 301 * @param key non-null key value 302 * @param index natural index 303 * @param hash 304 * @param cur value of first matched slot 305 * @return 306 */ insertKeyRehash(T key, int index, int hash, Object cur)307 private int insertKeyRehash(T key, int index, int hash, Object cur) { 308 final Object[] set = _set; 309 final int length = set.length; 310 // already FULL or REMOVED, must probe 311 // compute the double hash 312 final int probe = 1 + (hash % (length - 2)); 313 314 final int loopIndex = index; 315 int firstRemoved = -1; 316 317 /** 318 * Look until FREE slot or we start to loop 319 */ 320 do { 321 // Identify first removed slot 322 if (cur == REMOVED && firstRemoved == -1) 323 firstRemoved = index; 324 325 index -= probe; 326 if (index < 0) { 327 index += length; 328 } 329 cur = set[index]; 330 331 // A FREE slot stops the search 332 if (cur == FREE) { 333 if (firstRemoved != -1) { 334 _set[firstRemoved] = key; 335 return firstRemoved; 336 } else { 337 consumeFreeSlot = true; 338 _set[index] = key; // insert value 339 return index; 340 } 341 } 342 343 if (cur == key || equals(key, cur)) { 344 return -index - 1; 345 } 346 347 // Detect loop 348 } while (index != loopIndex); 349 350 // We inspected all reachable slots and did not find a FREE one 351 // If we found a REMOVED slot we return the first one found 352 if (firstRemoved != -1) { 353 _set[firstRemoved] = key; 354 return firstRemoved; 355 } 356 357 // Can a resizing strategy be found that resizes the set? 358 throw new IllegalStateException("No free or removed slots available. Key set full?!!"); 359 } 360 361 /** 362 * Looks for a slot using double hashing for a null key value and inserts the value. 363 * <p/> 364 * null specific loop exploiting several properties to simplify the iteration logic 365 * - the null value hashes to 0 we so we can iterate from the beginning. 366 * - the probe value is 1 for this case 367 * - object identity can be used to match this case 368 * 369 * @return 370 */ insertKeyForNull()371 private int insertKeyForNull() { 372 int index = 0; 373 int firstRemoved = -1; 374 375 // Look for a slot containing the 'null' value as key 376 for (Object o : _set) { 377 // Locate first removed 378 if (o == REMOVED && firstRemoved == -1) 379 firstRemoved = index; 380 381 if (o == FREE) { 382 if (firstRemoved != -1) { 383 _set[firstRemoved] = null; 384 return firstRemoved; 385 } else { 386 consumeFreeSlot = true; 387 _set[index] = null; // insert value 388 return index; 389 } 390 } 391 392 if (o == null) { 393 return -index - 1; 394 } 395 396 index++; 397 } 398 399 // We inspected all reachable slots and did not find a FREE one 400 // If we found a REMOVED slot we return the first one found 401 if (firstRemoved != -1) { 402 _set[firstRemoved] = null; 403 return firstRemoved; 404 } 405 406 // We scanned the entire key set and found nothing, is set full? 407 // Can a resizing strategy be found that resizes the set? 408 throw new IllegalStateException("Could not find insertion index for null key. Key set full!?!!"); 409 } 410 411 412 /** 413 * Convenience methods for subclasses to use in throwing exceptions about 414 * badly behaved user objects employed as keys. We have to throw an 415 * IllegalArgumentException with a rather verbose message telling the 416 * user that they need to fix their object implementation to conform 417 * to the general contract for java.lang.Object. 418 * 419 * 420 * @param o1 the first of the equal elements with unequal hash codes. 421 * @param o2 the second of the equal elements with unequal hash codes. 422 * @throws IllegalArgumentException the whole point of this method. 423 */ throwObjectContractViolation(Object o1, Object o2)424 protected final void throwObjectContractViolation(Object o1, Object o2) 425 throws IllegalArgumentException { 426 throw buildObjectContractViolation(o1, o2, ""); 427 } 428 429 /** 430 * Convenience methods for subclasses to use in throwing exceptions about 431 * badly behaved user objects employed as keys. We have to throw an 432 * IllegalArgumentException with a rather verbose message telling the 433 * user that they need to fix their object implementation to conform 434 * to the general contract for java.lang.Object. 435 * 436 * 437 * @param o1 the first of the equal elements with unequal hash codes. 438 * @param o2 the second of the equal elements with unequal hash codes. 439 * @param size 440 *@param oldSize 441 * @param oldKeys @throws IllegalArgumentException the whole point of this method. 442 */ throwObjectContractViolation(Object o1, Object o2, int size, int oldSize, Object[] oldKeys)443 protected final void throwObjectContractViolation(Object o1, Object o2, int size, int oldSize, Object[] oldKeys) 444 throws IllegalArgumentException { 445 String extra = dumpExtraInfo(o1, o2, size(), oldSize, oldKeys); 446 447 448 throw buildObjectContractViolation(o1, o2, extra); 449 } 450 451 /** 452 * Convenience methods for subclasses to use in throwing exceptions about 453 * badly behaved user objects employed as keys. We have to throw an 454 * IllegalArgumentException with a rather verbose message telling the 455 * user that they need to fix their object implementation to conform 456 * to the general contract for java.lang.Object. 457 * 458 * 459 * @param o1 the first of the equal elements with unequal hash codes. 460 * @param o2 the second of the equal elements with unequal hash codes. 461 * @throws IllegalArgumentException the whole point of this method. 462 */ buildObjectContractViolation(Object o1, Object o2, String extra )463 protected final IllegalArgumentException buildObjectContractViolation(Object o1, Object o2, String extra ) { 464 return new IllegalArgumentException("Equal objects must have equal hashcodes. " + 465 "During rehashing, Trove discovered that the following two objects claim " + 466 "to be equal (as in java.lang.Object.equals()) but their hashCodes (or " + 467 "those calculated by your TObjectHashingStrategy) are not equal." + 468 "This violates the general contract of java.lang.Object.hashCode(). See " + 469 "bullet point two in that method's documentation. object #1 =" + objectInfo(o1) + 470 "; object #2 =" + objectInfo(o2) + "\n" + extra); 471 } 472 473 equals(Object notnull, Object two)474 protected boolean equals(Object notnull, Object two) { 475 if (two == null || two == REMOVED) 476 return false; 477 478 return notnull.equals(two); 479 } 480 hash(Object notnull)481 protected int hash(Object notnull) { 482 return notnull.hashCode(); 483 } 484 reportPotentialConcurrentMod(int newSize, int oldSize)485 protected static String reportPotentialConcurrentMod(int newSize, int oldSize) { 486 // Note that we would not be able to detect concurrent paired of put()-remove() 487 // operations with this simple check 488 if (newSize != oldSize) 489 return "[Warning] apparent concurrent modification of the key set. " + 490 "Size before and after rehash() do not match " + oldSize + " vs " + newSize; 491 492 return ""; 493 } 494 495 /** 496 * 497 * @param newVal the key being inserted 498 * @param oldVal the key already stored at that position 499 * @param currentSize size of the key set during rehashing 500 * @param oldSize size of the key set before rehashing 501 * @param oldKeys the old key set 502 */ dumpExtraInfo(Object newVal, Object oldVal, int currentSize, int oldSize, Object[] oldKeys)503 protected String dumpExtraInfo(Object newVal, Object oldVal, int currentSize, int oldSize, Object[] oldKeys) { 504 StringBuilder b = new StringBuilder(); 505 // 506 b.append(dumpKeyTypes(newVal, oldVal)); 507 508 b.append(reportPotentialConcurrentMod(currentSize, oldSize)); 509 b.append(detectKeyLoss(oldKeys, oldSize)); 510 511 // Is de same object already present? Double insert? 512 if (newVal == oldVal) { 513 b.append("Inserting same object twice, rehashing bug. Object= ").append(oldVal); 514 } 515 516 return b.toString(); 517 } 518 519 /** 520 * Detect inconsistent hashCode() and/or equals() methods 521 * 522 * @param keys 523 * @param oldSize 524 * @return 525 */ detectKeyLoss(Object[] keys, int oldSize)526 private static String detectKeyLoss(Object[] keys, int oldSize) { 527 StringBuilder buf = new StringBuilder(); 528 Set<Object> k = makeKeySet(keys); 529 if (k.size() != oldSize) { 530 buf.append("\nhashCode() and/or equals() have inconsistent implementation"); 531 buf.append("\nKey set lost entries, now got ").append(k.size()).append(" instead of ").append(oldSize); 532 buf.append(". This can manifest itself as an apparent duplicate key."); 533 } 534 535 return buf.toString(); 536 } 537 makeKeySet(Object[] keys)538 private static Set<Object> makeKeySet(Object[] keys) { 539 Set<Object> types = new HashSet<Object>(); 540 for (Object o : keys) { 541 if (o != FREE && o != REMOVED) { 542 types.add(o); 543 } 544 } 545 546 return types; 547 } 548 equalsSymmetryInfo(Object a, Object b)549 private static String equalsSymmetryInfo(Object a, Object b) { 550 StringBuilder buf = new StringBuilder(); 551 if (a == b) { 552 return "a == b"; 553 } 554 555 if (a.getClass() != b.getClass()) { 556 buf.append("Class of objects differ a=").append(a.getClass()).append(" vs b=").append(b.getClass()); 557 558 boolean aEb = a.equals(b); 559 boolean bEa = b.equals(a); 560 if (aEb != bEa) { 561 buf.append("\nequals() of a or b object are asymmetric"); 562 buf.append("\na.equals(b) =").append(aEb); 563 buf.append("\nb.equals(a) =").append(bEa); 564 } 565 } 566 567 return buf.toString(); 568 } 569 objectInfo(Object o)570 protected static String objectInfo(Object o) { 571 return (o == null ? "class null" : o.getClass()) + " id= " + System.identityHashCode(o) 572 + " hashCode= " + (o == null ? 0 : o.hashCode()) + " toString= " + String.valueOf(o); 573 } 574 dumpKeyTypes(Object newVal, Object oldVal)575 private String dumpKeyTypes(Object newVal, Object oldVal) { 576 StringBuilder buf = new StringBuilder(); 577 Set<Class<?>> types = new HashSet<Class<?>>(); 578 for (Object o : _set) { 579 if (o != FREE && o != REMOVED) { 580 if (o != null) 581 types.add(o.getClass()); 582 else 583 types.add(null); 584 } 585 } 586 587 if (types.size() > 1) { 588 buf.append("\nMore than one type used for keys. Watch out for asymmetric equals(). " + 589 "Read about the 'Liskov substitution principle' and the implications for equals() in java."); 590 591 buf.append("\nKey types: ").append(types); 592 buf.append(equalsSymmetryInfo(newVal, oldVal)); 593 } 594 595 return buf.toString(); 596 } 597 598 599 @Override writeExternal(ObjectOutput out)600 public void writeExternal(ObjectOutput out) throws IOException { 601 // VERSION 602 out.writeByte(0); 603 604 // SUPER 605 super.writeExternal(out); 606 } 607 608 609 @Override readExternal(ObjectInput in)610 public void readExternal(ObjectInput in) 611 throws IOException, ClassNotFoundException { 612 613 // VERSION 614 in.readByte(); 615 616 // SUPER 617 super.readExternal(in); 618 } 619 } // TObjectHash 620