1 package bloom;
2 
3 import java.util.Locale;
4 
5 /**
6  * @author Brian Bushnell
7  * @date Jul 5, 2012
8  */
9 public class KCountArray2 {
10 
main(String[] args)11 	public static void main(String[] args){
12 		KCountArray2 kca=new KCountArray2(1024, 16);
13 	}
14 
KCountArray2(long cells_, int bits_)15 	public KCountArray2(long cells_, int bits_){
16 		this(cells_, bits_, 0);
17 	}
18 
KCountArray2(long cells_, int bits_, int gap_)19 	public KCountArray2(long cells_, int bits_, int gap_){
20 		gap=gap_;
21 		assert(bits_<=32);
22 		assert(Integer.bitCount(bits_)==1);
23 		assert(Long.bitCount(cells_)==1);
24 
25 		while(bits_*cells_<32*numArrays){
26 			assert(false);
27 			bits_*=2;
28 		} //Increases bits per cell so that at minimum each array is size 1
29 
30 		assert(bits_!=32);
31 
32 		cells=cells_;
33 		cellBits=bits_;
34 		valueMask=~((-1)<<cellBits);
35 		maxValue=min(Integer.MAX_VALUE, ~((-1)<<min(cellBits,31)));
36 		cellsPerWord=32/cellBits;
37 		indexShift=Integer.numberOfTrailingZeros(cellsPerWord);
38 		long words=cells/cellsPerWord;
39 		int wordsPerArray=(int)(words/numArrays);
40 		matrix=new int[numArrays][wordsPerArray];
41 
42 		if(verbose){
43 			System.out.println("cells:   \t"+cells);
44 			System.out.println("cellBits:\t"+cellBits);
45 			System.out.println("valueMask:\t"+Long.toHexString(valueMask));
46 			System.out.println("maxValue:\t"+maxValue);
47 			System.out.println("cellsPerWord:\t"+cellsPerWord);
48 			System.out.println("indexShift:\t"+indexShift);
49 			System.out.println("words:   \t"+words);
50 			System.out.println("wordsPerArray:\t"+wordsPerArray);
51 			System.out.println("numArrays:\t"+numArrays);
52 
53 
54 			long mem=words*4;
55 			if(mem<(1<<30)){
56 				System.out.println("memory:   \t"+String.format(Locale.ROOT, "%.2f MB", mem*1d/(1<<20)));
57 			}else{
58 				System.out.println("memory:   \t"+String.format(Locale.ROOT, "%.2f GB", mem*1d/(1<<30)));
59 			}
60 		}
61 	}
62 
read(long key)63 	public int read(long key){
64 //		System.out.println("key="+key);
65 		int arrayNum=(int)(key&arrayMask);
66 //		System.out.println("array="+arrayNum);
67 		key>>>=arrayBits;
68 //		System.out.println("key2="+key);
69 		int[] array=matrix[arrayNum];
70 		int index=(int)(key>>>indexShift);
71 //		System.out.println("index="+index);
72 		int word=array[index];
73 //		System.out.println("word="+Integer.toHexString(word));
74 		int cellShift=(int)(cellBits*key);
75 //		System.out.println("cellShift="+cellShift);
76 		return (int)((word>>>cellShift)&valueMask);
77 	}
78 
write(long key, int value)79 	public void write(long key, int value){
80 		int arrayNum=(int)(key&arrayMask);
81 		key>>>=arrayBits;
82 		int[] array=matrix[arrayNum];
83 		int index=(int)(key>>>indexShift);
84 		int word=array[index];
85 		int cellShift=(int)(cellBits*key);
86 		word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
87 		array[index]=word;
88 	}
89 
increment(long key, int incr)90 	public int increment(long key, int incr){
91 		int arrayNum=(int)(key&arrayMask);
92 		key>>>=arrayBits;
93 		int[] array=matrix[arrayNum];
94 		int index=(int)(key>>>indexShift);
95 		int word=array[index];
96 		int cellShift=(int)(cellBits*key);
97 		int value=((word>>>cellShift)&valueMask);
98 		if(value==0 && incr>0){cellsUsed++;}
99 		else if(incr<0 && value+incr==0){cellsUsed--;}
100 		value=min(value+incr, maxValue);
101 		word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
102 		array[index]=word;
103 		return (int)value;
104 	}
105 
106 	/** Returns unincremented value */
increment2(long key, int incr)107 	public int increment2(long key, int incr){
108 		int arrayNum=(int)(key&arrayMask);
109 		key>>>=arrayBits;
110 		int[] array=matrix[arrayNum];
111 		int index=(int)(key>>>indexShift);
112 		int word=array[index];
113 		int cellShift=(int)(cellBits*key);
114 		final int value=((word>>>cellShift)&valueMask);
115 		final int value2=min(value+incr, maxValue);
116 		word=(value2<<cellShift)|(word&~((valueMask)<<cellShift));
117 		array[index]=word;
118 		return value;
119 	}
120 
transformToFrequency()121 	public long[] transformToFrequency(){
122 		long[] freq=new long[100000];
123 		int maxFreq=freq.length-1;
124 
125 		if(cellBits!=32){
126 			assert(cellBits>0);
127 			for(int[] array : matrix){
128 				for(int i=0; i<array.length; i++){
129 					int word=array[i];
130 					int j=cellsPerWord;
131 					//				System.out.println("initial: word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits);
132 					for(; word!=0; j--){
133 						int x=word&valueMask;
134 						int x2=(int)min(x, maxFreq);
135 						freq[x2]++;
136 						word=(word>>>cellBits);
137 						//					System.out.println("word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits);
138 					}
139 					freq[0]+=j;
140 				}
141 			}
142 		}else{
143 			for(int[] array : matrix){
144 				for(int i=0; i<array.length; i++){
145 					int word=array[i];
146 					int x2=(int)min(word, maxFreq);
147 					freq[x2]++;
148 				}
149 			}
150 		}
151 		return freq;
152 	}
153 
154 	@Override
toString()155 	public String toString(){
156 		StringBuilder sb=new StringBuilder();
157 		sb.append("[");
158 		String comma="";
159 		for(int[] array : matrix){
160 			for(int i=0; i<array.length; i++){
161 				int word=array[i];
162 				for(int j=0; j<cellsPerWord; j++){
163 					int x=word&valueMask;
164 					sb.append(comma);
165 					sb.append(x);
166 					word>>>=cellBits;
167 					comma=", ";
168 				}
169 			}
170 		}
171 		sb.append("]");
172 		return sb.toString();
173 	}
174 
usedFraction()175 	public double usedFraction(){return cellsUsed/(double)cells;}
176 
usedFraction(int mindepth)177 	public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;}
178 
cellsUsed(int mindepth)179 	public long cellsUsed(int mindepth){
180 		long count=0;
181 		for(int[] array : matrix){
182 			if(array!=null){
183 				for(int word : array){
184 					while(word>0){
185 						int x=word&valueMask;
186 						if(x>=mindepth){count++;}
187 						word>>>=cellBits;
188 					}
189 				}
190 			}
191 		}
192 		return count;
193 	}
194 
mem()195 	public String mem(){
196 		long mem=(cells*cellBits)/8;
197 		if(mem<(1<<20)){
198 			return (String.format(Locale.ROOT, "%.2f KB", mem*1d/(1<<10)));
199 		}else if(mem<(1<<30)){
200 			return (String.format(Locale.ROOT, "%.2f MB", mem*1d/(1<<20)));
201 		}else{
202 			return (String.format(Locale.ROOT, "%.2f GB", mem*1d/(1<<30)));
203 		}
204 	}
205 
min(int x, int y)206 	public static final int min(int x, int y){return x<y ? x : y;}
max(int x, int y)207 	public static final int max(int x, int y){return x>y ? x : y;}
min(long x, long y)208 	public static final long min(long x, long y){return x<y ? x : y;}
max(long x, long y)209 	public static final long max(long x, long y){return x>y ? x : y;}
210 
211 	private long cellsUsed;
212 
213 	public final long cells;
214 	public final int cellBits;
215 	public final int maxValue;
216 	public final int gap; //Set this for convenience on gapped tables to make sure you're using the right table.
217 
218 	private final int cellsPerWord;
219 	private final int indexShift;
220 	private final int valueMask;
221 	private final int[][] matrix;
222 
223 	private static final int arrayBits=2;
224 	private static final int numArrays=1<<arrayBits;
225 	private static final int arrayMask=numArrays-1;
226 
227 	public static boolean verbose=false;
228 
229 
230 }
231