1 ///////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2001, Eric D. Friedman All Rights Reserved.
3 // Copyright (c) 2009, Rob Eden All Rights Reserved.
4 // Copyright (c) 2009, Jeff Randall All Rights Reserved.
5 //
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License, or (at your option) any later version.
10 //
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 //
16 // You should have received a copy of the GNU Lesser General Public
17 // License along with this program; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19 ///////////////////////////////////////////////////////////////////////////////
20 
21 package gnu.trove.impl.hash;
22 
23 import gnu.trove.procedure.TObjectProcedure;
24 
25 import java.io.IOException;
26 import java.io.ObjectInput;
27 import java.io.ObjectOutput;
28 import java.util.Arrays;
29 import java.util.HashSet;
30 import java.util.Set;
31 
32 
33 /**
34  * An open addressed hashing implementation for Object types.
35  * <p/>
36  * Created: Sun Nov  4 08:56:06 2001
37  *
38  * @author Eric D. Friedman
39  * @author Rob Eden
40  * @author Jeff Randall
41  * @version $Id: TObjectHash.java,v 1.1.2.6 2009/11/07 03:36:44 robeden Exp $
42  */
43 abstract public class TObjectHash<T> extends THash {
44 
45     @SuppressWarnings({"UnusedDeclaration"})
46     static final long serialVersionUID = -3461112548087185871L;
47 
48 
49     /**
50      * the set of Objects
51      */
52     public transient Object[] _set;
53 
54     public static final Object REMOVED = new Object(), FREE = new Object();
55 
56     /**
57      * Indicates whether the last insertKey() call used a FREE slot. This field
58      * should be inspected right after call insertKey()
59      */
60     protected boolean consumeFreeSlot;
61 
62 
63     /**
64      * Creates a new <code>TObjectHash</code> instance with the
65      * default capacity and load factor.
66      */
TObjectHash()67     public TObjectHash() {
68         super();
69     }
70 
71 
72     /**
73      * Creates a new <code>TObjectHash</code> instance whose capacity
74      * is the next highest prime above <tt>initialCapacity + 1</tt>
75      * unless that value is already prime.
76      *
77      * @param initialCapacity an <code>int</code> value
78      */
TObjectHash(int initialCapacity)79     public TObjectHash(int initialCapacity) {
80         super(initialCapacity);
81     }
82 
83 
84     /**
85      * Creates a new <code>TObjectHash</code> instance with a prime
86      * value at or near the specified capacity and load factor.
87      *
88      * @param initialCapacity used to find a prime capacity for the table.
89      * @param loadFactor      used to calculate the threshold over which
90      *                        rehashing takes place.
91      */
TObjectHash(int initialCapacity, float loadFactor)92     public TObjectHash(int initialCapacity, float loadFactor) {
93         super(initialCapacity, loadFactor);
94     }
95 
96 
capacity()97     public int capacity() {
98         return _set.length;
99     }
100 
101 
removeAt(int index)102     protected void removeAt(int index) {
103         _set[index] = REMOVED;
104         super.removeAt(index);
105     }
106 
107 
108     /**
109      * initializes the Object set of this hash table.
110      *
111      * @param initialCapacity an <code>int</code> value
112      * @return an <code>int</code> value
113      */
setUp(int initialCapacity)114     public int setUp(int initialCapacity) {
115         int capacity;
116 
117         capacity = super.setUp(initialCapacity);
118         _set = new Object[capacity];
119         Arrays.fill(_set, FREE);
120         return capacity;
121     }
122 
123 
124     /**
125      * Executes <tt>procedure</tt> for each element in the set.
126      *
127      * @param procedure a <code>TObjectProcedure</code> value
128      * @return false if the loop over the set terminated because
129      *         the procedure returned false for some value.
130      */
131     @SuppressWarnings({"unchecked"})
forEach(TObjectProcedure<? super T> procedure)132     public boolean forEach(TObjectProcedure<? super T> procedure) {
133         Object[] set = _set;
134         for (int i = set.length; i-- > 0;) {
135             if (set[i] != FREE
136                     && set[i] != REMOVED
137                     && !procedure.execute((T) set[i])) {
138                 return false;
139             }
140         }
141         return true;
142     }
143 
144 
145     /**
146      * Searches the set for <tt>obj</tt>
147      *
148      * @param obj an <code>Object</code> value
149      * @return a <code>boolean</code> value
150      */
151     @SuppressWarnings({"unchecked"})
contains(Object obj)152     public boolean contains(Object obj) {
153         return index(obj) >= 0;
154     }
155 
156 
157     /**
158      * Locates the index of <tt>obj</tt>.
159      *
160      * @param obj an <code>Object</code> value
161      * @return the index of <tt>obj</tt> or -1 if it isn't in the set.
162      */
index(Object obj)163     protected int index(Object obj) {
164         if (obj == null)
165             return indexForNull();
166 
167         // From here on we know obj to be non-null
168         final int hash = hash(obj) & 0x7fffffff;
169         int index = hash % _set.length;
170         Object cur = _set[index];
171 
172 
173         if (cur == FREE) {
174             return -1;
175         }
176 
177         if (cur == obj || equals(obj, cur)) {
178             return index;
179         }
180 
181         return indexRehashed(obj, index, hash, cur);
182     }
183 
184     /**
185      * Locates the index of non-null <tt>obj</tt>.
186      *
187      * @param obj   target key, know to be non-null
188      * @param index we start from
189      * @param hash
190      * @param cur
191      * @return
192      */
indexRehashed(Object obj, int index, int hash, Object cur)193     private int indexRehashed(Object obj, int index, int hash, Object cur) {
194         final Object[] set = _set;
195         final int length = set.length;
196 
197         // NOTE: here it has to be REMOVED or FULL (some user-given value)
198         // see Knuth, p. 529
199         int probe = 1 + (hash % (length - 2));
200 
201         final int loopIndex = index;
202 
203         do {
204             index -= probe;
205             if (index < 0) {
206                 index += length;
207             }
208             cur = set[index];
209             //
210             if (cur == FREE)
211                 return -1;
212 
213             //
214             if ((cur == obj || equals(obj, cur)))
215                 return index;
216         } while (index != loopIndex);
217 
218         return -1;
219     }
220 
221     /**
222      * Locates the index <tt>null</tt>.
223      * <p/>
224      * null specific loop exploiting several properties to simplify the iteration logic
225      * - the null value hashes to 0 we so we can iterate from the beginning.
226      * - the probe value is 1 for this case
227      * - object identity can be used to match this case
228      * <p/>
229      * --> this result a simpler loop
230      *
231      * @return
232      */
indexForNull()233     private int indexForNull() {
234         int index = 0;
235         for (Object o : _set) {
236             if (o == null)
237                 return index;
238 
239             if (o == FREE)
240                 return -1;
241 
242             index++;
243         }
244 
245         return -1;
246     }
247 
248     /**
249      * Alias introduced to avoid breaking the API. The new method name insertKey() reflects the
250      * changes made to the logic.
251      *
252      * @param obj
253      * @return
254      * @deprecated use {@link #insertKey} instead
255      */
256     @Deprecated
insertionIndex(T obj)257     protected int insertionIndex(T obj) {
258         return insertKey(obj);
259     }
260 
261     /**
262      * Locates the index at which <tt>key</tt> can be inserted.  if
263      * there is already a value equal()ing <tt>key</tt> in the set,
264      * returns that value's index as <tt>-index - 1</tt>.
265      * <p/>
266      * If a slot is found the value is inserted. When a FREE slot is used the consumeFreeSlot field is
267      * set to true. This field should be used in the method invoking insertKey() to pass to postInsertHook()
268      *
269      * @param key an <code>Object</code> value
270      * @return the index of a FREE slot at which key can be inserted
271      *         or, if key is already stored in the hash, the negative value of
272      *         that index, minus 1: -index -1.
273      */
insertKey(T key)274     protected int insertKey(T key) {
275         consumeFreeSlot = false;
276 
277         if (key == null)
278             return insertKeyForNull();
279 
280         final int hash = hash(key) & 0x7fffffff;
281         int index = hash % _set.length;
282         Object cur = _set[index];
283 
284         if (cur == FREE) {
285             consumeFreeSlot = true;
286             _set[index] = key;  // insert value
287             return index;       // empty, all done
288         }
289 
290         if (cur == key || equals(key, cur)) {
291             return -index - 1;   // already stored
292         }
293 
294         return insertKeyRehash(key, index, hash, cur);
295     }
296 
297     /**
298      * Looks for a slot using double hashing for a non-null key values and inserts the value
299      * in the slot
300      *
301      * @param key   non-null key value
302      * @param index natural index
303      * @param hash
304      * @param cur   value of first matched slot
305      * @return
306      */
insertKeyRehash(T key, int index, int hash, Object cur)307     private int insertKeyRehash(T key, int index, int hash, Object cur) {
308         final Object[] set = _set;
309         final int length = set.length;
310         // already FULL or REMOVED, must probe
311         // compute the double hash
312         final int probe = 1 + (hash % (length - 2));
313 
314         final int loopIndex = index;
315         int firstRemoved = -1;
316 
317         /**
318          * Look until FREE slot or we start to loop
319          */
320         do {
321             // Identify first removed slot
322             if (cur == REMOVED && firstRemoved == -1)
323                 firstRemoved = index;
324 
325             index -= probe;
326             if (index < 0) {
327                 index += length;
328             }
329             cur = set[index];
330 
331             // A FREE slot stops the search
332             if (cur == FREE) {
333                 if (firstRemoved != -1) {
334                     _set[firstRemoved] = key;
335                     return firstRemoved;
336                 } else {
337                     consumeFreeSlot = true;
338                     _set[index] = key;  // insert value
339                     return index;
340                 }
341             }
342 
343             if (cur == key || equals(key, cur)) {
344                 return -index - 1;
345             }
346 
347             // Detect loop
348         } while (index != loopIndex);
349 
350         // We inspected all reachable slots and did not find a FREE one
351         // If we found a REMOVED slot we return the first one found
352         if (firstRemoved != -1) {
353             _set[firstRemoved] = key;
354             return firstRemoved;
355         }
356 
357         // Can a resizing strategy be found that resizes the set?
358         throw new IllegalStateException("No free or removed slots available. Key set full?!!");
359     }
360 
361     /**
362      * Looks for a slot using double hashing for a null key value and inserts the value.
363      * <p/>
364      * null specific loop exploiting several properties to simplify the iteration logic
365      * - the null value hashes to 0 we so we can iterate from the beginning.
366      * - the probe value is 1 for this case
367      * - object identity can be used to match this case
368      *
369      * @return
370      */
insertKeyForNull()371     private int insertKeyForNull() {
372         int index = 0;
373         int firstRemoved = -1;
374 
375         // Look for a slot containing the 'null' value as key
376         for (Object o : _set) {
377             // Locate first removed
378             if (o == REMOVED && firstRemoved == -1)
379                 firstRemoved = index;
380 
381             if (o == FREE) {
382                 if (firstRemoved != -1) {
383                     _set[firstRemoved] = null;
384                     return firstRemoved;
385                 } else {
386                     consumeFreeSlot = true;
387                     _set[index] = null;  // insert value
388                     return index;
389                 }
390             }
391 
392             if (o == null) {
393                 return -index - 1;
394             }
395 
396             index++;
397         }
398 
399         // We inspected all reachable slots and did not find a FREE one
400         // If we found a REMOVED slot we return the first one found
401         if (firstRemoved != -1) {
402             _set[firstRemoved] = null;
403             return firstRemoved;
404         }
405 
406         // We scanned the entire key set and found nothing, is set full?
407         // Can a resizing strategy be found that resizes the set?
408         throw new IllegalStateException("Could not find insertion index for null key. Key set full!?!!");
409     }
410 
411 
412     /**
413      * Convenience methods for subclasses to use in throwing exceptions about
414      * badly behaved user objects employed as keys.  We have to throw an
415      * IllegalArgumentException with a rather verbose message telling the
416      * user that they need to fix their object implementation to conform
417      * to the general contract for java.lang.Object.
418      *
419      *
420      * @param o1 the first of the equal elements with unequal hash codes.
421      * @param o2 the second of the equal elements with unequal hash codes.
422      * @throws IllegalArgumentException the whole point of this method.
423      */
throwObjectContractViolation(Object o1, Object o2)424     protected final void throwObjectContractViolation(Object o1, Object o2)
425             throws IllegalArgumentException {
426         throw buildObjectContractViolation(o1, o2, "");
427     }
428 
429     /**
430      * Convenience methods for subclasses to use in throwing exceptions about
431      * badly behaved user objects employed as keys.  We have to throw an
432      * IllegalArgumentException with a rather verbose message telling the
433      * user that they need to fix their object implementation to conform
434      * to the general contract for java.lang.Object.
435      *
436      *
437      * @param o1 the first of the equal elements with unequal hash codes.
438      * @param o2 the second of the equal elements with unequal hash codes.
439      * @param size
440      *@param oldSize
441      * @param oldKeys @throws IllegalArgumentException the whole point of this method.
442      */
throwObjectContractViolation(Object o1, Object o2, int size, int oldSize, Object[] oldKeys)443     protected final void throwObjectContractViolation(Object o1, Object o2, int size, int oldSize, Object[] oldKeys)
444             throws IllegalArgumentException {
445         String extra = dumpExtraInfo(o1, o2, size(), oldSize, oldKeys);
446 
447 
448         throw buildObjectContractViolation(o1, o2, extra);
449     }
450 
451     /**
452      * Convenience methods for subclasses to use in throwing exceptions about
453      * badly behaved user objects employed as keys.  We have to throw an
454      * IllegalArgumentException with a rather verbose message telling the
455      * user that they need to fix their object implementation to conform
456      * to the general contract for java.lang.Object.
457      *
458      *
459      * @param o1 the first of the equal elements with unequal hash codes.
460      * @param o2 the second of the equal elements with unequal hash codes.
461      * @throws IllegalArgumentException the whole point of this method.
462      */
buildObjectContractViolation(Object o1, Object o2, String extra )463     protected final IllegalArgumentException buildObjectContractViolation(Object o1, Object o2, String extra ) {
464         return new IllegalArgumentException("Equal objects must have equal hashcodes. " +
465                 "During rehashing, Trove discovered that the following two objects claim " +
466                 "to be equal (as in java.lang.Object.equals()) but their hashCodes (or " +
467                 "those calculated by your TObjectHashingStrategy) are not equal." +
468                 "This violates the general contract of java.lang.Object.hashCode().  See " +
469                 "bullet point two in that method's documentation. object #1 =" + objectInfo(o1) +
470                 "; object #2 =" + objectInfo(o2) + "\n" + extra);
471     }
472 
473 
equals(Object notnull, Object two)474     protected boolean equals(Object notnull, Object two) {
475         if (two == null || two == REMOVED)
476             return false;
477 
478         return notnull.equals(two);
479     }
480 
hash(Object notnull)481     protected int hash(Object notnull) {
482         return notnull.hashCode();
483     }
484 
reportPotentialConcurrentMod(int newSize, int oldSize)485     protected static String reportPotentialConcurrentMod(int newSize, int oldSize) {
486         // Note that we would not be able to detect concurrent paired of put()-remove()
487         // operations with this simple check
488         if (newSize != oldSize)
489             return "[Warning] apparent concurrent modification of the key set. " +
490                     "Size before and after rehash() do not match " + oldSize + " vs " + newSize;
491 
492         return "";
493     }
494 
495     /**
496      *
497      * @param newVal the key being inserted
498      * @param oldVal the key already stored at that position
499      * @param currentSize size of the key set during rehashing
500      * @param oldSize size of the key set before rehashing
501      * @param oldKeys the old key set
502      */
dumpExtraInfo(Object newVal, Object oldVal, int currentSize, int oldSize, Object[] oldKeys)503     protected String dumpExtraInfo(Object newVal, Object oldVal, int currentSize, int oldSize, Object[] oldKeys) {
504         StringBuilder b = new StringBuilder();
505         //
506         b.append(dumpKeyTypes(newVal, oldVal));
507 
508         b.append(reportPotentialConcurrentMod(currentSize, oldSize));
509         b.append(detectKeyLoss(oldKeys, oldSize));
510 
511         // Is de same object already present? Double insert?
512         if (newVal == oldVal) {
513             b.append("Inserting same object twice, rehashing bug. Object= ").append(oldVal);
514         }
515 
516         return b.toString();
517     }
518 
519     /**
520      * Detect inconsistent hashCode() and/or equals() methods
521      *
522      * @param keys
523      * @param oldSize
524      * @return
525      */
detectKeyLoss(Object[] keys, int oldSize)526     private static String detectKeyLoss(Object[] keys, int oldSize) {
527         StringBuilder buf = new StringBuilder();
528         Set<Object> k = makeKeySet(keys);
529         if (k.size() != oldSize) {
530             buf.append("\nhashCode() and/or equals() have inconsistent implementation");
531             buf.append("\nKey set lost entries, now got ").append(k.size()).append(" instead of ").append(oldSize);
532             buf.append(". This can manifest itself as an apparent duplicate key.");
533         }
534 
535         return buf.toString();
536     }
537 
makeKeySet(Object[] keys)538     private static Set<Object> makeKeySet(Object[] keys) {
539         Set<Object> types = new HashSet<Object>();
540         for (Object o : keys) {
541             if (o != FREE && o != REMOVED) {
542                     types.add(o);
543             }
544         }
545 
546         return types;
547     }
548 
equalsSymmetryInfo(Object a, Object b)549     private static String equalsSymmetryInfo(Object a, Object b) {
550         StringBuilder buf = new StringBuilder();
551         if (a == b) {
552             return  "a == b";
553         }
554 
555         if (a.getClass() != b.getClass()) {
556             buf.append("Class of objects differ a=").append(a.getClass()).append(" vs b=").append(b.getClass());
557 
558             boolean aEb = a.equals(b);
559             boolean bEa = b.equals(a);
560             if (aEb != bEa) {
561                 buf.append("\nequals() of a or b object are asymmetric");
562                 buf.append("\na.equals(b) =").append(aEb);
563                 buf.append("\nb.equals(a) =").append(bEa);
564             }
565         }
566 
567         return buf.toString();
568     }
569 
objectInfo(Object o)570     protected static String objectInfo(Object o) {
571         return (o == null ? "class null" : o.getClass()) + " id= " + System.identityHashCode(o)
572                 + " hashCode= " + (o == null ? 0 : o.hashCode()) + " toString= " + String.valueOf(o);
573     }
574 
dumpKeyTypes(Object newVal, Object oldVal)575     private String dumpKeyTypes(Object newVal, Object oldVal) {
576         StringBuilder buf = new StringBuilder();
577         Set<Class<?>> types = new HashSet<Class<?>>();
578         for (Object o : _set) {
579             if (o != FREE && o != REMOVED) {
580                 if (o != null)
581                     types.add(o.getClass());
582                 else
583                     types.add(null);
584             }
585         }
586 
587         if (types.size() > 1) {
588             buf.append("\nMore than one type used for keys. Watch out for asymmetric equals(). " +
589                     "Read about the 'Liskov substitution principle' and the implications for equals() in java.");
590 
591             buf.append("\nKey types: ").append(types);
592             buf.append(equalsSymmetryInfo(newVal, oldVal));
593         }
594 
595         return buf.toString();
596     }
597 
598 
599     @Override
writeExternal(ObjectOutput out)600     public void writeExternal(ObjectOutput out) throws IOException {
601         // VERSION
602         out.writeByte(0);
603 
604         // SUPER
605         super.writeExternal(out);
606     }
607 
608 
609     @Override
readExternal(ObjectInput in)610     public void readExternal(ObjectInput in)
611             throws IOException, ClassNotFoundException {
612 
613         // VERSION
614         in.readByte();
615 
616         // SUPER
617         super.readExternal(in);
618     }
619 } // TObjectHash
620