1 package kmer;
2 
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 
6 import shared.KillSwitch;
7 import shared.Primes;
8 import shared.Tools;
9 import structures.SuperLongList;
10 
11 /**
12  * Stores kmers in a long[] and counts in an int[], with a victim cache.
13  * @author Brian Bushnell
14  * @date Oct 25, 2013
15  *
16  */
17 public final class HashArray1D extends HashArray {
18 
19 	/*--------------------------------------------------------------*/
20 	/*----------------        Initialization        ----------------*/
21 	/*--------------------------------------------------------------*/
22 
HashArray1D(int[] schedule_, long coreMask_)23 	public HashArray1D(int[] schedule_, long coreMask_){
24 		super(schedule_, coreMask_, false);
25 		values=allocInt1D(prime+extra);
26 	}
27 
HashArray1D(int initialSize, long coreMask, boolean autoResize_)28 	public HashArray1D(int initialSize, long coreMask, boolean autoResize_){
29 		super(initialSize, coreMask, autoResize_, false);
30 		values=allocInt1D(prime+extra);
31 	}
32 
33 	/*--------------------------------------------------------------*/
34 	/*----------------        Public Methods        ----------------*/
35 	/*--------------------------------------------------------------*/
36 
37 	@Override
increment(final long kmer, final int incr)38 	public final int increment(final long kmer, final int incr){
39 		int cell=kmerToCell(kmer);
40 
41 		for(final int max=cell+extra; cell<max; cell++){
42 			long n=array[cell];
43 			if(n==kmer){
44 				values[cell]+=incr;
45 				if(values[cell]<0){values[cell]=Integer.MAX_VALUE;}
46 				return values[cell];
47 			}else if(n==NOT_PRESENT){
48 				array[cell]=kmer;
49 				size++;
50 				values[cell]=incr;
51 				if(autoResize && size+victims.size>sizeLimit){resize();}
52 				return 1;
53 			}
54 		}
55 		int x=victims.increment(kmer, incr);
56 		if(autoResize && size+victims.size>sizeLimit){resize();}
57 		return x;
58 	}
59 
60 	@Override
incrementAndReturnNumCreated(final long kmer, final int incr)61 	public final int incrementAndReturnNumCreated(final long kmer, final int incr){
62 		int cell=kmerToCell(kmer);
63 
64 		for(final int max=cell+extra; cell<max; cell++){
65 			long n=array[cell];
66 			if(n==kmer){
67 				values[cell]+=incr;
68 				if(values[cell]<0){values[cell]=Integer.MAX_VALUE;}
69 				return 0;
70 			}else if(n==NOT_PRESENT){
71 				array[cell]=kmer;
72 				size++;
73 				values[cell]=incr;
74 				if(autoResize && size+victims.size>sizeLimit){resize();}
75 				return 1;
76 			}
77 		}
78 		return victims.incrementAndReturnNumCreated(kmer, incr);
79 	}
80 
81 	@Override
fillHistogram(SuperLongList sll)82 	public void fillHistogram(SuperLongList sll){
83 		for(int i=0; i<values.length; i++){
84 			int count=values[i];
85 			if(count>0){sll.add(count);}
86 		}
87 		if(victims!=null){
88 			victims.fillHistogram(sll);
89 		}
90 	}
91 
92 	/*--------------------------------------------------------------*/
93 	/*----------------      Nonpublic Methods       ----------------*/
94 	/*--------------------------------------------------------------*/
95 
96 	@Override
readCellValue(int cell)97 	public final int readCellValue(int cell) {
98 		return values[cell];
99 	}
100 
101 	@Override
readCellValues(int cell, int[] singleton)102 	protected final int[] readCellValues(int cell, int[] singleton) {
103 		singleton[0]=values[cell];
104 		return singleton;
105 	}
106 
107 	@Override
insertValue(long kmer, int v, int cell)108 	protected final void insertValue(long kmer, int v, int cell) {
109 		assert(array[cell]==kmer);
110 		values[cell]=v;
111 	}
112 
113 	@Override
insertValue(long kmer, int[] vals, int cell, int vlen)114 	protected final void insertValue(long kmer, int[] vals, int cell, int vlen) {
115 		assert(array[cell]==kmer);
116 		assert(vals.length==1);
117 		values[cell]=vals[0];
118 	}
119 
120 	/*--------------------------------------------------------------*/
121 	/*----------------   Resizing and Rebalancing   ----------------*/
122 	/*--------------------------------------------------------------*/
123 
124 	@Override
canRebalance()125 	public final boolean canRebalance() {return false;}
126 
127 //	@Override
128 //	protected synchronized void resize_old(){
129 ////		assert(false);
130 ////		System.err.println("Resizing from "+prime+"; load="+(size*1f/prime));
131 //		if(prime>=maxPrime){
132 //			sizeLimit=0xFFFFFFFFFFFFL;
133 //			return;
134 //		}
135 //
136 //		final long oldSize=size, oldVSize=victims.size;
137 //		final long totalSize=oldSize+oldVSize;
138 //
139 //		final long maxAllowedByLoadFactor=(long)(totalSize*minLoadMult);
140 //		final long minAllowedByLoadFactor=(long)(totalSize*maxLoadMult);
141 //
142 ////		sizeLimit=Tools.min((long)(maxLoadFactor*prime), maxPrime);
143 //
144 //		assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor);
145 //		if(maxAllowedByLoadFactor<prime){
146 //			sizeLimit=(long)(maxLoadFactor*prime);
147 //			return;
148 //		}
149 //
150 //		long x=10+(long)(prime*resizeMult);
151 //		x=Tools.max(x, minAllowedByLoadFactor);
152 //		x=Tools.min(x, maxAllowedByLoadFactor);
153 //
154 //		int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x));
155 //
156 //		if(prime2<=prime){
157 //			sizeLimit=(long)(maxLoadFactor*prime);
158 //			assert(prime2==prime) : "Resizing to smaller array? "+totalSize+", "+prime+", "+x;
159 //			return;
160 //		}
161 //
162 //		prime=prime2;
163 ////		System.err.println("Resized to "+prime+"; load="+(size*1f/prime));
164 //		long[] oldk=array;
165 //		int[] oldc=values;
166 //		KmerNode[] oldv=victims.array;
167 //		array=allocLong1D(prime2+extra);
168 //		Arrays.fill(array, NOT_PRESENT);
169 //		values=allocInt1D(prime2+extra);
170 //		ArrayList<KmerNode> list=victims.toList();
171 //		Arrays.fill(oldv, null);
172 //		victims.size=0;
173 //		size=0;
174 //		sizeLimit=Long.MAX_VALUE;
175 //
176 //		if(TWO_PASS_RESIZE){
177 //			for(int i=0; i<oldk.length; i++){
178 //				if(oldk[i]>NOT_PRESENT && oldc[i]>1){set(oldk[i], oldc[i]);}
179 //			}
180 //			for(KmerNode n : list){
181 //				if(n.pivot>NOT_PRESENT && n.value()>1){set(n.pivot, n.value());}
182 //			}
183 //			for(int i=0; i<oldk.length; i++){
184 //				if(oldk[i]>NOT_PRESENT && oldc[i]<=1){set(oldk[i], oldc[i]);}
185 //			}
186 //			for(KmerNode n : list){
187 //				if(n.pivot>NOT_PRESENT && n.value()<=1){set(n.pivot, n.value());}
188 //			}
189 //		}else{
190 //			for(int i=0; i<oldk.length; i++){
191 //				if(oldk[i]>NOT_PRESENT){set(oldk[i], oldc[i]);}
192 //			}
193 //			for(KmerNode n : list){
194 //				if(n.pivot>NOT_PRESENT){set(n.pivot, n.value());}
195 //			}
196 //		}
197 //
198 //		assert(oldSize+oldVSize==size+victims.size) : oldSize+", "+oldVSize+" -> "+size+", "+victims.size;
199 //
200 //		sizeLimit=(long)(maxLoadFactor*prime);
201 //	}
202 
203 	@Override
resize()204 	protected synchronized void resize(){
205 //		assert(false);
206 //		System.err.println("Resizing from "+prime+"; load="+(size*1f/prime));
207 		if(prime>=maxPrime){
208 //			sizeLimit=0xFFFFFFFFFFFFL;
209 			KillSwitch.memKill(new OutOfMemoryError());
210 		}
211 
212 		final long oldSize=size, oldVSize=victims.size;
213 		if(schedule!=null){
214 			final long oldPrime=prime;
215 			prime=nextScheduleSize();
216 			if(prime<=oldPrime){KillSwitch.memKill(new OutOfMemoryError());}
217 			sizeLimit=(long)((atMaxSize() ? maxLoadFactorFinal : maxLoadFactor)*prime);
218 		}else{//Old method
219 			final long totalSize=oldSize+oldVSize;
220 
221 			final long maxAllowedByLoadFactor=(long)(totalSize*minLoadMult);
222 			final long minAllowedByLoadFactor=(long)(totalSize*maxLoadMult);
223 
224 			//		sizeLimit=Tools.min((long)(maxLoadFactor*prime), maxPrime);
225 
226 			assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor);
227 			if(maxAllowedByLoadFactor<prime){
228 				sizeLimit=(long)(maxLoadFactor*prime);
229 				return;
230 			}
231 
232 			long x=10+(long)(prime*resizeMult);
233 			x=Tools.max(x, minAllowedByLoadFactor);
234 			x=Tools.min(x, maxAllowedByLoadFactor);
235 
236 			int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x));
237 
238 			if(prime2<=prime){
239 				sizeLimit=(long)(maxLoadFactor*prime);
240 				assert(prime2==prime) : "Resizing to smaller array? "+totalSize+", "+prime+", "+x;
241 				return;
242 			}
243 
244 			prime=prime2;
245 			sizeLimit=(long)(maxLoadFactor*prime);
246 		}
247 //		System.err.println("Resized to "+prime+"; load="+(size*1f/prime));
248 		long[] oldk=array;
249 		int[] oldc=values;
250 		KmerNode[] oldv=victims.array;
251 		array=allocLong1D(prime+extra);
252 		Arrays.fill(array, NOT_PRESENT);
253 //		System.err.print(prime+" ");//123
254 		values=allocInt1D(prime+extra);
255 		ArrayList<KmerNode> list=victims.toList();
256 		Arrays.fill(oldv, null);
257 		victims.size=0;
258 		size=0;
259 
260 		if(TWO_PASS_RESIZE){
261 			for(int i=0; i<oldk.length; i++){
262 				if(oldk[i]>NOT_PRESENT && oldc[i]>1){set(oldk[i], oldc[i]);}
263 			}
264 			for(KmerNode n : list){
265 				if(n.pivot>NOT_PRESENT && n.value()>1){set(n.pivot, n.value());}
266 			}
267 			for(int i=0; i<oldk.length; i++){
268 				if(oldk[i]>NOT_PRESENT && oldc[i]<=1){set(oldk[i], oldc[i]);}
269 			}
270 			for(KmerNode n : list){
271 				if(n.pivot>NOT_PRESENT && n.value()<=1){set(n.pivot, n.value());}
272 			}
273 		}else{
274 			for(int i=0; i<oldk.length; i++){
275 				if(oldk[i]>NOT_PRESENT){set(oldk[i], oldc[i]);}
276 			}
277 			for(KmerNode n : list){
278 				if(n.pivot>NOT_PRESENT){set(n.pivot, n.value());}
279 			}
280 		}
281 
282 		assert(oldSize+oldVSize==size+victims.size) : oldSize+", "+oldVSize+" -> "+size+", "+victims.size;
283 	}
284 
285 	@Deprecated
286 	@Override
rebalance()287 	public void rebalance(){
288 		throw new RuntimeException("Unimplemented.");
289 	}
290 
291 	@Override
regenerate(final int limit)292 	public long regenerate(final int limit){
293 		long sum=0;
294 		assert(owners==null) : "Clear ownership before regeneration.";
295 		for(int pos=0; pos<values.length; pos++){
296 			final long key=array[pos];
297 			if(key>=0){
298 				final int value=values[pos];
299 				values[pos]=NOT_PRESENT;
300 				array[pos]=NOT_PRESENT;
301 				size--;
302 				if(value>limit){
303 					set(key, value);
304 				}else{
305 					sum++;
306 				}
307 			}
308 		}
309 
310 		ArrayList<KmerNode> nodes=victims.toList();
311 		victims.clear();
312 		for(KmerNode node : nodes){
313 			int value=node.value();
314 			if(value<=limit){
315 				sum++;
316 			}else{
317 				set(node.pivot, node.value());
318 			}
319 		}
320 
321 		return sum;
322 	}
323 
324 	@Override
toString()325 	public String toString(){
326 		return Arrays.toString(array);
327 	}
328 
walk()329 	public Walker walk(){
330 		return new Walker1D();
331 	}
332 
333 	/*--------------------------------------------------------------*/
334 	/*----------------            Fields            ----------------*/
335 	/*--------------------------------------------------------------*/
336 
337 	private int[] values;
338 
values()339 	public int[] values(){return values;}
340 
341 	/*--------------------------------------------------------------*/
342 	/*----------------            Walker            ----------------*/
343 	/*--------------------------------------------------------------*/
344 
345 	public class Walker1D extends Walker {
346 
Walker1D()347 		Walker1D(){
348 			ha=HashArray1D.this;
349 			victims=ha.victims().toList();
350 		}
351 
352 		/**
353 		 * Fills this object with the next key and value.
354 		 * @return True if successful.
355 		 */
next()356 		public boolean next(){
357 			while(i<values.length && values[i]==NOT_XPRESENT){i++;}
358 			if(i<values.length){
359 				kmer=array[i];
360 				value=values[i];
361 				assert(value!=NOT_XPRESENT);
362 				i++;
363 				return true;
364 			}
365 			if(i2<victims.size()){
366 				KmerNode kn=victims.get(i2);
367 				kmer=kn.pivot;
368 				value=kn.value();
369 				assert(value!=NOT_XPRESENT);
370 				i2++;
371 				return true;
372 			}
373 			kmer=-1;
374 			value=NOT_XPRESENT;
375 			return false;
376 		}
377 
kmer()378 		public long kmer(){return kmer;}
value()379 		public int value(){return value;}
380 
381 		/** Hash map over which this is walking */
382 		private HashArray1D ha;
383 		/** Victim list of the hash map */
384 		private ArrayList<KmerNode> victims;
385 
386 		private long kmer;
387 		private int value;
388 
389 		/** Potential next kmer cell; may point to an empty cell */
390 		private int i=0;
391 		/** Next victim in list */
392 		private int i2=0;
393 	}
394 
395 	//TODO: Remove after fixing array initialization
396 	private static final int NOT_XPRESENT=0;
397 
398 
399 }
400