1 package kmer; 2 3 import java.util.ArrayList; 4 import java.util.concurrent.atomic.AtomicLong; 5 import java.util.concurrent.locks.Lock; 6 import java.util.concurrent.locks.ReentrantLock; 7 8 import fileIO.ByteStreamWriter; 9 import fileIO.TextStreamWriter; 10 import shared.Primes; 11 import shared.Tools; 12 import structures.ByteBuilder; 13 import structures.SuperLongList; 14 15 /** 16 * @author Brian Bushnell 17 * @date Oct 23, 2013 18 * 19 */ 20 public final class KmerTable extends AbstractKmerTable { 21 22 /*--------------------------------------------------------------*/ 23 /*---------------- Initialization ----------------*/ 24 /*--------------------------------------------------------------*/ 25 KmerTable(int initialSize, boolean autoResize_)26 public KmerTable(int initialSize, boolean autoResize_){ 27 if(initialSize>1){ 28 initialSize=(int)Tools.min(maxPrime, Primes.primeAtLeast(initialSize)); 29 }else{ 30 initialSize=1; 31 } 32 prime=initialSize; 33 sizeLimit=(long) (initialSize*resizeMult); 34 array=new KmerLink[prime]; 35 autoResize=autoResize_; 36 } 37 38 /*--------------------------------------------------------------*/ 39 /*---------------- Public Methods ----------------*/ 40 /*--------------------------------------------------------------*/ 41 42 @Override increment(final long kmer, final int incr)43 public int increment(final long kmer, final int incr){ 44 final int cell=(int)(kmer%prime); 45 KmerLink n=array[cell], prev=null; 46 while(n!=null && n.pivot!=kmer){ 47 prev=n; 48 n=n.next; 49 } 50 if(n==null){ 51 n=new KmerLink(kmer, incr); 52 size++; 53 if(prev==null){ 54 array[cell]=n; 55 }else{ 56 prev.next=n; 57 } 58 if(autoResize && size>sizeLimit){resize();} 59 }else{ 60 n.value+=incr; 61 if(n.value<0){n.value=Integer.MAX_VALUE;} 62 } 63 return n.value; 64 } 65 66 @Override incrementAndReturnNumCreated(final long kmer, final int incr)67 public int incrementAndReturnNumCreated(final long kmer, final int incr){ 68 final int cell=(int)(kmer%prime); 69 KmerLink n=array[cell], prev=null; 70 while(n!=null && n.pivot!=kmer){ 71 prev=n; 72 n=n.next; 73 } 74 if(n==null){ 75 n=new KmerLink(kmer, incr); 76 size++; 77 if(prev==null){ 78 array[cell]=n; 79 }else{ 80 prev.next=n; 81 } 82 if(autoResize && size>sizeLimit){resize();} 83 return 1; 84 }else{ 85 n.value+=incr; 86 if(n.value<0){n.value=Integer.MAX_VALUE;} 87 return 0; 88 } 89 } 90 91 @Override set(long kmer, int value)92 public int set(long kmer, int value){ 93 int x=1, cell=(int)(kmer%prime); 94 final KmerLink n=array[cell]; 95 if(n==null){ 96 array[cell]=new KmerLink(kmer, value); 97 }else{ 98 x=n.set(kmer, value); 99 } 100 size+=x; 101 if(autoResize && size>sizeLimit){resize();} 102 return x; 103 } 104 105 @Override set(long kmer, int[] vals, int vlen)106 public int set(long kmer, int[] vals, int vlen) { 107 throw new RuntimeException("Unimplemented."); 108 } 109 110 @Override setIfNotPresent(long kmer, int value)111 public int setIfNotPresent(long kmer, int value){ 112 int x=1, cell=(int)(kmer%prime); 113 final KmerLink n=array[cell]; 114 if(n==null){ 115 array[cell]=new KmerLink(kmer, value); 116 }else{ 117 x=n.setIfNotPresent(kmer, value); 118 } 119 size+=x; 120 if(autoResize && size>sizeLimit){resize();} 121 return x; 122 } 123 124 @Override getValue(long kmer)125 public int getValue(long kmer){ 126 int cell=(int)(kmer%prime); 127 KmerLink n=array[cell]; 128 while(n!=null && n.pivot!=kmer){n=n.next;} 129 return n==null ? 0 : n.value; 130 } 131 132 @Override getValues(long kmer, int[] singleton)133 public int[] getValues(long kmer, int[] singleton){ 134 assert(array.length==0); 135 singleton[0]=getValue(kmer); 136 return singleton; 137 } 138 139 @Override contains(long kmer)140 public boolean contains(long kmer){ 141 KmerLink node=get(kmer); 142 return node!=null; 143 } 144 145 /*--------------------------------------------------------------*/ 146 /*---------------- Ownership ----------------*/ 147 /*--------------------------------------------------------------*/ 148 149 @Override initializeOwnership()150 public final void initializeOwnership(){ 151 for(KmerLink n : array){ 152 if(n!=null){n.initializeOwnership();} 153 } 154 } 155 156 @Override clearOwnership()157 public final void clearOwnership(){ 158 for(KmerLink n : array){ 159 if(n!=null){n.clearOwnership();} 160 } 161 } 162 163 @Override setOwner(final long kmer, final int newOwner)164 public final int setOwner(final long kmer, final int newOwner){ 165 final int cell=(int)(kmer%prime); 166 KmerLink n=array[cell]; 167 assert(n!=null); 168 return n.setOwner(kmer, newOwner); 169 } 170 171 @Override clearOwner(final long kmer, final int owner)172 public final boolean clearOwner(final long kmer, final int owner){ 173 final int cell=(int)(kmer%prime); 174 KmerLink n=array[cell]; 175 assert(n!=null); 176 return n.clearOwner(kmer, owner); 177 } 178 179 @Override getOwner(final long kmer)180 public final int getOwner(final long kmer){ 181 final int cell=(int)(kmer%prime); 182 KmerLink n=array[cell]; 183 assert(n!=null); 184 return n.getOwner(kmer); 185 } 186 187 /*--------------------------------------------------------------*/ 188 /*---------------- Nonpublic Methods ----------------*/ 189 /*--------------------------------------------------------------*/ 190 191 @Override get(long kmer)192 KmerLink get(long kmer){ 193 int cell=(int)(kmer%prime); 194 KmerLink n=array[cell]; 195 while(n!=null && n.pivot!=kmer){n=n.next;} 196 return n; 197 } 198 insert(KmerLink n)199 boolean insert(KmerLink n){ 200 n.next=null; 201 int cell=(int)(n.pivot%prime); 202 if(array[cell]==null){ 203 array[cell]=n; 204 return true; 205 } 206 return array[cell].insert(n); 207 } 208 209 /*--------------------------------------------------------------*/ 210 /*---------------- Private Methods ----------------*/ 211 /*--------------------------------------------------------------*/ 212 213 /*--------------------------------------------------------------*/ 214 /*---------------- Invalid Methods ----------------*/ 215 /*--------------------------------------------------------------*/ 216 217 /*--------------------------------------------------------------*/ 218 /*---------------- Resizing and Rebalancing ----------------*/ 219 /*--------------------------------------------------------------*/ 220 221 @Override canResize()222 boolean canResize() {return true;} 223 224 @Override canRebalance()225 public boolean canRebalance() {return false;} 226 227 @Override size()228 public long size() {return size;} 229 230 @Override arrayLength()231 public int arrayLength() {return array.length;} 232 233 @Override resize()234 synchronized void resize(){ 235 // assert(false); 236 // System.err.println("Resizing from "+prime+"; load="+(size*1f/prime)); 237 sizeLimit=Tools.max((long)(size*1.4), (long)(maxLoadFactor*prime)); 238 239 final long maxAllowedByLoadFactor=(long)(size*minLoadMult); 240 final long minAllowedByLoadFactor=(long)(size*maxLoadMult); 241 assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor); 242 if(maxAllowedByLoadFactor<prime){return;} 243 244 long x=10+(long)(prime*resizeMult); 245 x=Tools.max(x, minAllowedByLoadFactor); 246 x=Tools.min(x, maxAllowedByLoadFactor); 247 248 int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x)); 249 250 if(prime2<=prime){return;} 251 252 prime=prime2; 253 // System.err.println("Resized to "+prime+"; load="+(size*1f/prime)); 254 KmerLink[] old=array; 255 array=new KmerLink[prime2]; 256 ArrayList<KmerLink> list=new ArrayList<KmerLink>(1000); 257 for(int i=0; i<old.length; i++){ 258 if(old[i]!=null){ 259 old[i].traverseInfix(list); 260 for(KmerLink n : list){insert(n);} 261 list.clear(); 262 } 263 } 264 sizeLimit=Tools.max((long)(size*1.4), (long)(maxLoadFactor*prime)); 265 } 266 267 @Override rebalance()268 public void rebalance(){ 269 ArrayList<KmerLink> list=new ArrayList<KmerLink>(1000); 270 for(int i=0; i<array.length; i++){ 271 if(array[i]!=null){array[i]=array[i].rebalance(list);} 272 } 273 } 274 275 /*--------------------------------------------------------------*/ 276 /*---------------- Info Dumping ----------------*/ 277 /*--------------------------------------------------------------*/ 278 279 @Deprecated 280 @Override dumpKmersAsText(TextStreamWriter tsw, int k, int mincount, int maxcount)281 public boolean dumpKmersAsText(TextStreamWriter tsw, int k, int mincount, int maxcount){ 282 throw new RuntimeException("TODO"); 283 } 284 285 @Override dumpKmersAsBytes_MT(final ByteStreamWriter bsw, final ByteBuilder bb, final int k, final int mincount, int maxcount, AtomicLong remaining)286 public boolean dumpKmersAsBytes_MT(final ByteStreamWriter bsw, final ByteBuilder bb, final int k, final int mincount, int maxcount, AtomicLong remaining){ 287 for(int i=0; i<array.length; i++){ 288 KmerLink node=array[i]; 289 if(node!=null && node.value>=mincount){ 290 node.dumpKmersAsBytes_MT(bsw, bb, k, mincount, maxcount, remaining); 291 } 292 } 293 return true; 294 } 295 296 @Deprecated 297 @Override dumpKmersAsBytes(ByteStreamWriter bsw, int k, int mincount, int maxcount, AtomicLong remaining)298 public boolean dumpKmersAsBytes(ByteStreamWriter bsw, int k, int mincount, int maxcount, AtomicLong remaining){ 299 throw new RuntimeException("TODO"); 300 } 301 302 @Deprecated 303 @Override fillHistogram(long[] ca, int max)304 public void fillHistogram(long[] ca, int max){ 305 throw new RuntimeException("TODO"); 306 } 307 308 @Deprecated 309 @Override fillHistogram(SuperLongList sll)310 public void fillHistogram(SuperLongList sll){ 311 throw new RuntimeException("TODO"); 312 } 313 314 @Deprecated 315 @Override countGC(long[] gcCounts, int max)316 public void countGC(long[] gcCounts, int max){ 317 throw new RuntimeException("TODO"); 318 } 319 320 @Deprecated 321 @Override regenerate(final int limit)322 public long regenerate(final int limit){ 323 throw new RuntimeException("TODO - remove zero-value links."); 324 } 325 326 /*--------------------------------------------------------------*/ 327 /*---------------- Fields ----------------*/ 328 /*--------------------------------------------------------------*/ 329 330 KmerLink[] array; 331 int prime; 332 long size=0; 333 long sizeLimit; 334 final boolean autoResize; 335 private final Lock lock=new ReentrantLock(); 336 337 @Override getLock()338 final Lock getLock(){return lock;} 339 340 /*--------------------------------------------------------------*/ 341 /*---------------- Static Fields ----------------*/ 342 /*--------------------------------------------------------------*/ 343 344 final static int maxPrime=(int)Primes.primeAtMost(Integer.MAX_VALUE); 345 final static float resizeMult=2f; //Resize by a minimum of this much 346 final static float minLoadFactor=0.5f; //Resize by enough to get the load above this factor 347 final static float maxLoadFactor=0.98f; //Resize by enough to get the load under this factor 348 final static float minLoadMult=1/minLoadFactor; 349 final static float maxLoadMult=1/maxLoadFactor; 350 351 } 352