1 package kmer; 2 3 import java.util.ArrayList; 4 import java.util.Arrays; 5 6 import shared.KillSwitch; 7 import shared.Primes; 8 import shared.Tools; 9 import structures.SuperLongList; 10 11 /** 12 * Stores kmers in a long[] and counts in an int[], with a victim cache. 13 * @author Brian Bushnell 14 * @date Oct 25, 2013 15 * 16 */ 17 public final class HashArray1D extends HashArray { 18 19 /*--------------------------------------------------------------*/ 20 /*---------------- Initialization ----------------*/ 21 /*--------------------------------------------------------------*/ 22 HashArray1D(int[] schedule_, long coreMask_)23 public HashArray1D(int[] schedule_, long coreMask_){ 24 super(schedule_, coreMask_, false); 25 values=allocInt1D(prime+extra); 26 } 27 HashArray1D(int initialSize, long coreMask, boolean autoResize_)28 public HashArray1D(int initialSize, long coreMask, boolean autoResize_){ 29 super(initialSize, coreMask, autoResize_, false); 30 values=allocInt1D(prime+extra); 31 } 32 33 /*--------------------------------------------------------------*/ 34 /*---------------- Public Methods ----------------*/ 35 /*--------------------------------------------------------------*/ 36 37 @Override increment(final long kmer, final int incr)38 public final int increment(final long kmer, final int incr){ 39 int cell=kmerToCell(kmer); 40 41 for(final int max=cell+extra; cell<max; cell++){ 42 long n=array[cell]; 43 if(n==kmer){ 44 values[cell]+=incr; 45 if(values[cell]<0){values[cell]=Integer.MAX_VALUE;} 46 return values[cell]; 47 }else if(n==NOT_PRESENT){ 48 array[cell]=kmer; 49 size++; 50 values[cell]=incr; 51 if(autoResize && size+victims.size>sizeLimit){resize();} 52 return 1; 53 } 54 } 55 int x=victims.increment(kmer, incr); 56 if(autoResize && size+victims.size>sizeLimit){resize();} 57 return x; 58 } 59 60 @Override incrementAndReturnNumCreated(final long kmer, final int incr)61 public final int incrementAndReturnNumCreated(final long kmer, final int incr){ 62 int cell=kmerToCell(kmer); 63 64 for(final int max=cell+extra; cell<max; cell++){ 65 long n=array[cell]; 66 if(n==kmer){ 67 values[cell]+=incr; 68 if(values[cell]<0){values[cell]=Integer.MAX_VALUE;} 69 return 0; 70 }else if(n==NOT_PRESENT){ 71 array[cell]=kmer; 72 size++; 73 values[cell]=incr; 74 if(autoResize && size+victims.size>sizeLimit){resize();} 75 return 1; 76 } 77 } 78 return victims.incrementAndReturnNumCreated(kmer, incr); 79 } 80 81 @Override fillHistogram(SuperLongList sll)82 public void fillHistogram(SuperLongList sll){ 83 for(int i=0; i<values.length; i++){ 84 int count=values[i]; 85 if(count>0){sll.add(count);} 86 } 87 if(victims!=null){ 88 victims.fillHistogram(sll); 89 } 90 } 91 92 /*--------------------------------------------------------------*/ 93 /*---------------- Nonpublic Methods ----------------*/ 94 /*--------------------------------------------------------------*/ 95 96 @Override readCellValue(int cell)97 public final int readCellValue(int cell) { 98 return values[cell]; 99 } 100 101 @Override readCellValues(int cell, int[] singleton)102 protected final int[] readCellValues(int cell, int[] singleton) { 103 singleton[0]=values[cell]; 104 return singleton; 105 } 106 107 @Override insertValue(long kmer, int v, int cell)108 protected final void insertValue(long kmer, int v, int cell) { 109 assert(array[cell]==kmer); 110 values[cell]=v; 111 } 112 113 @Override insertValue(long kmer, int[] vals, int cell, int vlen)114 protected final void insertValue(long kmer, int[] vals, int cell, int vlen) { 115 assert(array[cell]==kmer); 116 assert(vals.length==1); 117 values[cell]=vals[0]; 118 } 119 120 /*--------------------------------------------------------------*/ 121 /*---------------- Resizing and Rebalancing ----------------*/ 122 /*--------------------------------------------------------------*/ 123 124 @Override canRebalance()125 public final boolean canRebalance() {return false;} 126 127 // @Override 128 // protected synchronized void resize_old(){ 129 //// assert(false); 130 //// System.err.println("Resizing from "+prime+"; load="+(size*1f/prime)); 131 // if(prime>=maxPrime){ 132 // sizeLimit=0xFFFFFFFFFFFFL; 133 // return; 134 // } 135 // 136 // final long oldSize=size, oldVSize=victims.size; 137 // final long totalSize=oldSize+oldVSize; 138 // 139 // final long maxAllowedByLoadFactor=(long)(totalSize*minLoadMult); 140 // final long minAllowedByLoadFactor=(long)(totalSize*maxLoadMult); 141 // 142 //// sizeLimit=Tools.min((long)(maxLoadFactor*prime), maxPrime); 143 // 144 // assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor); 145 // if(maxAllowedByLoadFactor<prime){ 146 // sizeLimit=(long)(maxLoadFactor*prime); 147 // return; 148 // } 149 // 150 // long x=10+(long)(prime*resizeMult); 151 // x=Tools.max(x, minAllowedByLoadFactor); 152 // x=Tools.min(x, maxAllowedByLoadFactor); 153 // 154 // int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x)); 155 // 156 // if(prime2<=prime){ 157 // sizeLimit=(long)(maxLoadFactor*prime); 158 // assert(prime2==prime) : "Resizing to smaller array? "+totalSize+", "+prime+", "+x; 159 // return; 160 // } 161 // 162 // prime=prime2; 163 //// System.err.println("Resized to "+prime+"; load="+(size*1f/prime)); 164 // long[] oldk=array; 165 // int[] oldc=values; 166 // KmerNode[] oldv=victims.array; 167 // array=allocLong1D(prime2+extra); 168 // Arrays.fill(array, NOT_PRESENT); 169 // values=allocInt1D(prime2+extra); 170 // ArrayList<KmerNode> list=victims.toList(); 171 // Arrays.fill(oldv, null); 172 // victims.size=0; 173 // size=0; 174 // sizeLimit=Long.MAX_VALUE; 175 // 176 // if(TWO_PASS_RESIZE){ 177 // for(int i=0; i<oldk.length; i++){ 178 // if(oldk[i]>NOT_PRESENT && oldc[i]>1){set(oldk[i], oldc[i]);} 179 // } 180 // for(KmerNode n : list){ 181 // if(n.pivot>NOT_PRESENT && n.value()>1){set(n.pivot, n.value());} 182 // } 183 // for(int i=0; i<oldk.length; i++){ 184 // if(oldk[i]>NOT_PRESENT && oldc[i]<=1){set(oldk[i], oldc[i]);} 185 // } 186 // for(KmerNode n : list){ 187 // if(n.pivot>NOT_PRESENT && n.value()<=1){set(n.pivot, n.value());} 188 // } 189 // }else{ 190 // for(int i=0; i<oldk.length; i++){ 191 // if(oldk[i]>NOT_PRESENT){set(oldk[i], oldc[i]);} 192 // } 193 // for(KmerNode n : list){ 194 // if(n.pivot>NOT_PRESENT){set(n.pivot, n.value());} 195 // } 196 // } 197 // 198 // assert(oldSize+oldVSize==size+victims.size) : oldSize+", "+oldVSize+" -> "+size+", "+victims.size; 199 // 200 // sizeLimit=(long)(maxLoadFactor*prime); 201 // } 202 203 @Override resize()204 protected synchronized void resize(){ 205 // assert(false); 206 // System.err.println("Resizing from "+prime+"; load="+(size*1f/prime)); 207 if(prime>=maxPrime){ 208 // sizeLimit=0xFFFFFFFFFFFFL; 209 KillSwitch.memKill(new OutOfMemoryError()); 210 } 211 212 final long oldSize=size, oldVSize=victims.size; 213 if(schedule!=null){ 214 final long oldPrime=prime; 215 prime=nextScheduleSize(); 216 if(prime<=oldPrime){KillSwitch.memKill(new OutOfMemoryError());} 217 sizeLimit=(long)((atMaxSize() ? maxLoadFactorFinal : maxLoadFactor)*prime); 218 }else{//Old method 219 final long totalSize=oldSize+oldVSize; 220 221 final long maxAllowedByLoadFactor=(long)(totalSize*minLoadMult); 222 final long minAllowedByLoadFactor=(long)(totalSize*maxLoadMult); 223 224 // sizeLimit=Tools.min((long)(maxLoadFactor*prime), maxPrime); 225 226 assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor); 227 if(maxAllowedByLoadFactor<prime){ 228 sizeLimit=(long)(maxLoadFactor*prime); 229 return; 230 } 231 232 long x=10+(long)(prime*resizeMult); 233 x=Tools.max(x, minAllowedByLoadFactor); 234 x=Tools.min(x, maxAllowedByLoadFactor); 235 236 int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x)); 237 238 if(prime2<=prime){ 239 sizeLimit=(long)(maxLoadFactor*prime); 240 assert(prime2==prime) : "Resizing to smaller array? "+totalSize+", "+prime+", "+x; 241 return; 242 } 243 244 prime=prime2; 245 sizeLimit=(long)(maxLoadFactor*prime); 246 } 247 // System.err.println("Resized to "+prime+"; load="+(size*1f/prime)); 248 long[] oldk=array; 249 int[] oldc=values; 250 KmerNode[] oldv=victims.array; 251 array=allocLong1D(prime+extra); 252 Arrays.fill(array, NOT_PRESENT); 253 // System.err.print(prime+" ");//123 254 values=allocInt1D(prime+extra); 255 ArrayList<KmerNode> list=victims.toList(); 256 Arrays.fill(oldv, null); 257 victims.size=0; 258 size=0; 259 260 if(TWO_PASS_RESIZE){ 261 for(int i=0; i<oldk.length; i++){ 262 if(oldk[i]>NOT_PRESENT && oldc[i]>1){set(oldk[i], oldc[i]);} 263 } 264 for(KmerNode n : list){ 265 if(n.pivot>NOT_PRESENT && n.value()>1){set(n.pivot, n.value());} 266 } 267 for(int i=0; i<oldk.length; i++){ 268 if(oldk[i]>NOT_PRESENT && oldc[i]<=1){set(oldk[i], oldc[i]);} 269 } 270 for(KmerNode n : list){ 271 if(n.pivot>NOT_PRESENT && n.value()<=1){set(n.pivot, n.value());} 272 } 273 }else{ 274 for(int i=0; i<oldk.length; i++){ 275 if(oldk[i]>NOT_PRESENT){set(oldk[i], oldc[i]);} 276 } 277 for(KmerNode n : list){ 278 if(n.pivot>NOT_PRESENT){set(n.pivot, n.value());} 279 } 280 } 281 282 assert(oldSize+oldVSize==size+victims.size) : oldSize+", "+oldVSize+" -> "+size+", "+victims.size; 283 } 284 285 @Deprecated 286 @Override rebalance()287 public void rebalance(){ 288 throw new RuntimeException("Unimplemented."); 289 } 290 291 @Override regenerate(final int limit)292 public long regenerate(final int limit){ 293 long sum=0; 294 assert(owners==null) : "Clear ownership before regeneration."; 295 for(int pos=0; pos<values.length; pos++){ 296 final long key=array[pos]; 297 if(key>=0){ 298 final int value=values[pos]; 299 values[pos]=NOT_PRESENT; 300 array[pos]=NOT_PRESENT; 301 size--; 302 if(value>limit){ 303 set(key, value); 304 }else{ 305 sum++; 306 } 307 } 308 } 309 310 ArrayList<KmerNode> nodes=victims.toList(); 311 victims.clear(); 312 for(KmerNode node : nodes){ 313 int value=node.value(); 314 if(value<=limit){ 315 sum++; 316 }else{ 317 set(node.pivot, node.value()); 318 } 319 } 320 321 return sum; 322 } 323 324 @Override toString()325 public String toString(){ 326 return Arrays.toString(array); 327 } 328 walk()329 public Walker walk(){ 330 return new Walker1D(); 331 } 332 333 /*--------------------------------------------------------------*/ 334 /*---------------- Fields ----------------*/ 335 /*--------------------------------------------------------------*/ 336 337 private int[] values; 338 values()339 public int[] values(){return values;} 340 341 /*--------------------------------------------------------------*/ 342 /*---------------- Walker ----------------*/ 343 /*--------------------------------------------------------------*/ 344 345 public class Walker1D extends Walker { 346 Walker1D()347 Walker1D(){ 348 ha=HashArray1D.this; 349 victims=ha.victims().toList(); 350 } 351 352 /** 353 * Fills this object with the next key and value. 354 * @return True if successful. 355 */ next()356 public boolean next(){ 357 while(i<values.length && values[i]==NOT_XPRESENT){i++;} 358 if(i<values.length){ 359 kmer=array[i]; 360 value=values[i]; 361 assert(value!=NOT_XPRESENT); 362 i++; 363 return true; 364 } 365 if(i2<victims.size()){ 366 KmerNode kn=victims.get(i2); 367 kmer=kn.pivot; 368 value=kn.value(); 369 assert(value!=NOT_XPRESENT); 370 i2++; 371 return true; 372 } 373 kmer=-1; 374 value=NOT_XPRESENT; 375 return false; 376 } 377 kmer()378 public long kmer(){return kmer;} value()379 public int value(){return value;} 380 381 /** Hash map over which this is walking */ 382 private HashArray1D ha; 383 /** Victim list of the hash map */ 384 private ArrayList<KmerNode> victims; 385 386 private long kmer; 387 private int value; 388 389 /** Potential next kmer cell; may point to an empty cell */ 390 private int i=0; 391 /** Next victim in list */ 392 private int i2=0; 393 } 394 395 //TODO: Remove after fixing array initialization 396 private static final int NOT_XPRESENT=0; 397 398 399 } 400