1 package kmer;
2 
3 import java.util.ArrayList;
4 import java.util.concurrent.atomic.AtomicLong;
5 import java.util.concurrent.locks.Lock;
6 import java.util.concurrent.locks.ReentrantLock;
7 
8 import fileIO.ByteStreamWriter;
9 import fileIO.TextStreamWriter;
10 import shared.Primes;
11 import shared.Tools;
12 import structures.ByteBuilder;
13 import structures.SuperLongList;
14 
15 /**
16  * @author Brian Bushnell
17  * @date Oct 23, 2013
18  *
19  */
20 public final class KmerTable extends AbstractKmerTable {
21 
22 	/*--------------------------------------------------------------*/
23 	/*----------------        Initialization        ----------------*/
24 	/*--------------------------------------------------------------*/
25 
KmerTable(int initialSize, boolean autoResize_)26 	public KmerTable(int initialSize, boolean autoResize_){
27 		if(initialSize>1){
28 			initialSize=(int)Tools.min(maxPrime, Primes.primeAtLeast(initialSize));
29 		}else{
30 			initialSize=1;
31 		}
32 		prime=initialSize;
33 		sizeLimit=(long) (initialSize*resizeMult);
34 		array=new KmerLink[prime];
35 		autoResize=autoResize_;
36 	}
37 
38 	/*--------------------------------------------------------------*/
39 	/*----------------        Public Methods        ----------------*/
40 	/*--------------------------------------------------------------*/
41 
42 	@Override
increment(final long kmer, final int incr)43 	public int increment(final long kmer, final int incr){
44 		final int cell=(int)(kmer%prime);
45 		KmerLink n=array[cell], prev=null;
46 		while(n!=null && n.pivot!=kmer){
47 			prev=n;
48 			n=n.next;
49 		}
50 		if(n==null){
51 			n=new KmerLink(kmer, incr);
52 			size++;
53 			if(prev==null){
54 				array[cell]=n;
55 			}else{
56 				prev.next=n;
57 			}
58 			if(autoResize && size>sizeLimit){resize();}
59 		}else{
60 			n.value+=incr;
61 			if(n.value<0){n.value=Integer.MAX_VALUE;}
62 		}
63 		return n.value;
64 	}
65 
66 	@Override
incrementAndReturnNumCreated(final long kmer, final int incr)67 	public int incrementAndReturnNumCreated(final long kmer, final int incr){
68 		final int cell=(int)(kmer%prime);
69 		KmerLink n=array[cell], prev=null;
70 		while(n!=null && n.pivot!=kmer){
71 			prev=n;
72 			n=n.next;
73 		}
74 		if(n==null){
75 			n=new KmerLink(kmer, incr);
76 			size++;
77 			if(prev==null){
78 				array[cell]=n;
79 			}else{
80 				prev.next=n;
81 			}
82 			if(autoResize && size>sizeLimit){resize();}
83 			return 1;
84 		}else{
85 			n.value+=incr;
86 			if(n.value<0){n.value=Integer.MAX_VALUE;}
87 			return 0;
88 		}
89 	}
90 
91 	@Override
set(long kmer, int value)92 	public int set(long kmer, int value){
93 		int x=1, cell=(int)(kmer%prime);
94 		final KmerLink n=array[cell];
95 		if(n==null){
96 			array[cell]=new KmerLink(kmer, value);
97 		}else{
98 			x=n.set(kmer, value);
99 		}
100 		size+=x;
101 		if(autoResize && size>sizeLimit){resize();}
102 		return x;
103 	}
104 
105 	@Override
set(long kmer, int[] vals, int vlen)106 	public int set(long kmer, int[] vals, int vlen) {
107 		throw new RuntimeException("Unimplemented.");
108 	}
109 
110 	@Override
setIfNotPresent(long kmer, int value)111 	public int setIfNotPresent(long kmer, int value){
112 		int x=1, cell=(int)(kmer%prime);
113 		final KmerLink n=array[cell];
114 		if(n==null){
115 			array[cell]=new KmerLink(kmer, value);
116 		}else{
117 			x=n.setIfNotPresent(kmer, value);
118 		}
119 		size+=x;
120 		if(autoResize && size>sizeLimit){resize();}
121 		return x;
122 	}
123 
124 	@Override
getValue(long kmer)125 	public int getValue(long kmer){
126 		int cell=(int)(kmer%prime);
127 		KmerLink n=array[cell];
128 		while(n!=null && n.pivot!=kmer){n=n.next;}
129 		return n==null ? 0 : n.value;
130 	}
131 
132 	@Override
getValues(long kmer, int[] singleton)133 	public int[] getValues(long kmer, int[] singleton){
134 		assert(array.length==0);
135 		singleton[0]=getValue(kmer);
136 		return singleton;
137 	}
138 
139 	@Override
contains(long kmer)140 	public boolean contains(long kmer){
141 		KmerLink node=get(kmer);
142 		return node!=null;
143 	}
144 
145 	/*--------------------------------------------------------------*/
146 	/*----------------          Ownership           ----------------*/
147 	/*--------------------------------------------------------------*/
148 
149 	@Override
initializeOwnership()150 	public final void initializeOwnership(){
151 		for(KmerLink n : array){
152 			if(n!=null){n.initializeOwnership();}
153 		}
154 	}
155 
156 	@Override
clearOwnership()157 	public final void clearOwnership(){
158 		for(KmerLink n : array){
159 			if(n!=null){n.clearOwnership();}
160 		}
161 	}
162 
163 	@Override
setOwner(final long kmer, final int newOwner)164 	public final int setOwner(final long kmer, final int newOwner){
165 		final int cell=(int)(kmer%prime);
166 		KmerLink n=array[cell];
167 		assert(n!=null);
168 		return n.setOwner(kmer, newOwner);
169 	}
170 
171 	@Override
clearOwner(final long kmer, final int owner)172 	public final boolean clearOwner(final long kmer, final int owner){
173 		final int cell=(int)(kmer%prime);
174 		KmerLink n=array[cell];
175 		assert(n!=null);
176 		return n.clearOwner(kmer, owner);
177 	}
178 
179 	@Override
getOwner(final long kmer)180 	public final int getOwner(final long kmer){
181 		final int cell=(int)(kmer%prime);
182 		KmerLink n=array[cell];
183 		assert(n!=null);
184 		return n.getOwner(kmer);
185 	}
186 
187 	/*--------------------------------------------------------------*/
188 	/*----------------      Nonpublic Methods       ----------------*/
189 	/*--------------------------------------------------------------*/
190 
191 	@Override
get(long kmer)192 	KmerLink get(long kmer){
193 		int cell=(int)(kmer%prime);
194 		KmerLink n=array[cell];
195 		while(n!=null && n.pivot!=kmer){n=n.next;}
196 		return n;
197 	}
198 
insert(KmerLink n)199 	boolean insert(KmerLink n){
200 		n.next=null;
201 		int cell=(int)(n.pivot%prime);
202 		if(array[cell]==null){
203 			array[cell]=n;
204 			return true;
205 		}
206 		return array[cell].insert(n);
207 	}
208 
209 	/*--------------------------------------------------------------*/
210 	/*----------------       Private Methods        ----------------*/
211 	/*--------------------------------------------------------------*/
212 
213 	/*--------------------------------------------------------------*/
214 	/*----------------       Invalid Methods        ----------------*/
215 	/*--------------------------------------------------------------*/
216 
217 	/*--------------------------------------------------------------*/
218 	/*----------------   Resizing and Rebalancing   ----------------*/
219 	/*--------------------------------------------------------------*/
220 
221 	@Override
canResize()222 	boolean canResize() {return true;}
223 
224 	@Override
canRebalance()225 	public boolean canRebalance() {return false;}
226 
227 	@Override
size()228 	public long size() {return size;}
229 
230 	@Override
arrayLength()231 	public int arrayLength() {return array.length;}
232 
233 	@Override
resize()234 	synchronized void resize(){
235 //		assert(false);
236 //		System.err.println("Resizing from "+prime+"; load="+(size*1f/prime));
237 		sizeLimit=Tools.max((long)(size*1.4), (long)(maxLoadFactor*prime));
238 
239 		final long maxAllowedByLoadFactor=(long)(size*minLoadMult);
240 		final long minAllowedByLoadFactor=(long)(size*maxLoadMult);
241 		assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor);
242 		if(maxAllowedByLoadFactor<prime){return;}
243 
244 		long x=10+(long)(prime*resizeMult);
245 		x=Tools.max(x, minAllowedByLoadFactor);
246 		x=Tools.min(x, maxAllowedByLoadFactor);
247 
248 		int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x));
249 
250 		if(prime2<=prime){return;}
251 
252 		prime=prime2;
253 //		System.err.println("Resized to "+prime+"; load="+(size*1f/prime));
254 		KmerLink[] old=array;
255 		array=new KmerLink[prime2];
256 		ArrayList<KmerLink> list=new ArrayList<KmerLink>(1000);
257 		for(int i=0; i<old.length; i++){
258 			if(old[i]!=null){
259 				old[i].traverseInfix(list);
260 				for(KmerLink n : list){insert(n);}
261 				list.clear();
262 			}
263 		}
264 		sizeLimit=Tools.max((long)(size*1.4), (long)(maxLoadFactor*prime));
265 	}
266 
267 	@Override
rebalance()268 	public void rebalance(){
269 		ArrayList<KmerLink> list=new ArrayList<KmerLink>(1000);
270 		for(int i=0; i<array.length; i++){
271 			if(array[i]!=null){array[i]=array[i].rebalance(list);}
272 		}
273 	}
274 
275 	/*--------------------------------------------------------------*/
276 	/*----------------         Info Dumping         ----------------*/
277 	/*--------------------------------------------------------------*/
278 
279 	@Deprecated
280 	@Override
dumpKmersAsText(TextStreamWriter tsw, int k, int mincount, int maxcount)281 	public boolean dumpKmersAsText(TextStreamWriter tsw, int k, int mincount, int maxcount){
282 		throw new RuntimeException("TODO");
283 	}
284 
285 	@Override
dumpKmersAsBytes_MT(final ByteStreamWriter bsw, final ByteBuilder bb, final int k, final int mincount, int maxcount, AtomicLong remaining)286 	public boolean dumpKmersAsBytes_MT(final ByteStreamWriter bsw, final ByteBuilder bb, final int k, final int mincount, int maxcount, AtomicLong remaining){
287 		for(int i=0; i<array.length; i++){
288 			KmerLink node=array[i];
289 			if(node!=null && node.value>=mincount){
290 				node.dumpKmersAsBytes_MT(bsw, bb, k, mincount, maxcount, remaining);
291 			}
292 		}
293 		return true;
294 	}
295 
296 	@Deprecated
297 	@Override
dumpKmersAsBytes(ByteStreamWriter bsw, int k, int mincount, int maxcount, AtomicLong remaining)298 	public boolean dumpKmersAsBytes(ByteStreamWriter bsw, int k, int mincount, int maxcount, AtomicLong remaining){
299 		throw new RuntimeException("TODO");
300 	}
301 
302 	@Deprecated
303 	@Override
fillHistogram(long[] ca, int max)304 	public void fillHistogram(long[] ca, int max){
305 		throw new RuntimeException("TODO");
306 	}
307 
308 	@Deprecated
309 	@Override
fillHistogram(SuperLongList sll)310 	public void fillHistogram(SuperLongList sll){
311 		throw new RuntimeException("TODO");
312 	}
313 
314 	@Deprecated
315 	@Override
countGC(long[] gcCounts, int max)316 	public void countGC(long[] gcCounts, int max){
317 		throw new RuntimeException("TODO");
318 	}
319 
320 	@Deprecated
321 	@Override
regenerate(final int limit)322 	public long regenerate(final int limit){
323 		throw new RuntimeException("TODO - remove zero-value links.");
324 	}
325 
326 	/*--------------------------------------------------------------*/
327 	/*----------------            Fields            ----------------*/
328 	/*--------------------------------------------------------------*/
329 
330 	KmerLink[] array;
331 	int prime;
332 	long size=0;
333 	long sizeLimit;
334 	final boolean autoResize;
335 	private final Lock lock=new ReentrantLock();
336 
337 	@Override
getLock()338 	final Lock getLock(){return lock;}
339 
340 	/*--------------------------------------------------------------*/
341 	/*----------------        Static Fields         ----------------*/
342 	/*--------------------------------------------------------------*/
343 
344 	final static int maxPrime=(int)Primes.primeAtMost(Integer.MAX_VALUE);
345 	final static float resizeMult=2f; //Resize by a minimum of this much
346 	final static float minLoadFactor=0.5f; //Resize by enough to get the load above this factor
347 	final static float maxLoadFactor=0.98f; //Resize by enough to get the load under this factor
348 	final static float minLoadMult=1/minLoadFactor;
349 	final static float maxLoadMult=1/maxLoadFactor;
350 
351 }
352