1 package bloom; 2 3 import java.util.Locale; 4 5 /** 6 * @author Brian Bushnell 7 * @date Jul 5, 2012 8 */ 9 public class KCountArray2 { 10 main(String[] args)11 public static void main(String[] args){ 12 KCountArray2 kca=new KCountArray2(1024, 16); 13 } 14 KCountArray2(long cells_, int bits_)15 public KCountArray2(long cells_, int bits_){ 16 this(cells_, bits_, 0); 17 } 18 KCountArray2(long cells_, int bits_, int gap_)19 public KCountArray2(long cells_, int bits_, int gap_){ 20 gap=gap_; 21 assert(bits_<=32); 22 assert(Integer.bitCount(bits_)==1); 23 assert(Long.bitCount(cells_)==1); 24 25 while(bits_*cells_<32*numArrays){ 26 assert(false); 27 bits_*=2; 28 } //Increases bits per cell so that at minimum each array is size 1 29 30 assert(bits_!=32); 31 32 cells=cells_; 33 cellBits=bits_; 34 valueMask=~((-1)<<cellBits); 35 maxValue=min(Integer.MAX_VALUE, ~((-1)<<min(cellBits,31))); 36 cellsPerWord=32/cellBits; 37 indexShift=Integer.numberOfTrailingZeros(cellsPerWord); 38 long words=cells/cellsPerWord; 39 int wordsPerArray=(int)(words/numArrays); 40 matrix=new int[numArrays][wordsPerArray]; 41 42 if(verbose){ 43 System.out.println("cells: \t"+cells); 44 System.out.println("cellBits:\t"+cellBits); 45 System.out.println("valueMask:\t"+Long.toHexString(valueMask)); 46 System.out.println("maxValue:\t"+maxValue); 47 System.out.println("cellsPerWord:\t"+cellsPerWord); 48 System.out.println("indexShift:\t"+indexShift); 49 System.out.println("words: \t"+words); 50 System.out.println("wordsPerArray:\t"+wordsPerArray); 51 System.out.println("numArrays:\t"+numArrays); 52 53 54 long mem=words*4; 55 if(mem<(1<<30)){ 56 System.out.println("memory: \t"+String.format(Locale.ROOT, "%.2f MB", mem*1d/(1<<20))); 57 }else{ 58 System.out.println("memory: \t"+String.format(Locale.ROOT, "%.2f GB", mem*1d/(1<<30))); 59 } 60 } 61 } 62 read(long key)63 public int read(long key){ 64 // System.out.println("key="+key); 65 int arrayNum=(int)(key&arrayMask); 66 // System.out.println("array="+arrayNum); 67 key>>>=arrayBits; 68 // System.out.println("key2="+key); 69 int[] array=matrix[arrayNum]; 70 int index=(int)(key>>>indexShift); 71 // System.out.println("index="+index); 72 int word=array[index]; 73 // System.out.println("word="+Integer.toHexString(word)); 74 int cellShift=(int)(cellBits*key); 75 // System.out.println("cellShift="+cellShift); 76 return (int)((word>>>cellShift)&valueMask); 77 } 78 write(long key, int value)79 public void write(long key, int value){ 80 int arrayNum=(int)(key&arrayMask); 81 key>>>=arrayBits; 82 int[] array=matrix[arrayNum]; 83 int index=(int)(key>>>indexShift); 84 int word=array[index]; 85 int cellShift=(int)(cellBits*key); 86 word=(value<<cellShift)|(word&~((valueMask)<<cellShift)); 87 array[index]=word; 88 } 89 increment(long key, int incr)90 public int increment(long key, int incr){ 91 int arrayNum=(int)(key&arrayMask); 92 key>>>=arrayBits; 93 int[] array=matrix[arrayNum]; 94 int index=(int)(key>>>indexShift); 95 int word=array[index]; 96 int cellShift=(int)(cellBits*key); 97 int value=((word>>>cellShift)&valueMask); 98 if(value==0 && incr>0){cellsUsed++;} 99 else if(incr<0 && value+incr==0){cellsUsed--;} 100 value=min(value+incr, maxValue); 101 word=(value<<cellShift)|(word&~((valueMask)<<cellShift)); 102 array[index]=word; 103 return (int)value; 104 } 105 106 /** Returns unincremented value */ increment2(long key, int incr)107 public int increment2(long key, int incr){ 108 int arrayNum=(int)(key&arrayMask); 109 key>>>=arrayBits; 110 int[] array=matrix[arrayNum]; 111 int index=(int)(key>>>indexShift); 112 int word=array[index]; 113 int cellShift=(int)(cellBits*key); 114 final int value=((word>>>cellShift)&valueMask); 115 final int value2=min(value+incr, maxValue); 116 word=(value2<<cellShift)|(word&~((valueMask)<<cellShift)); 117 array[index]=word; 118 return value; 119 } 120 transformToFrequency()121 public long[] transformToFrequency(){ 122 long[] freq=new long[100000]; 123 int maxFreq=freq.length-1; 124 125 if(cellBits!=32){ 126 assert(cellBits>0); 127 for(int[] array : matrix){ 128 for(int i=0; i<array.length; i++){ 129 int word=array[i]; 130 int j=cellsPerWord; 131 // System.out.println("initial: word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits); 132 for(; word!=0; j--){ 133 int x=word&valueMask; 134 int x2=(int)min(x, maxFreq); 135 freq[x2]++; 136 word=(word>>>cellBits); 137 // System.out.println("word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits); 138 } 139 freq[0]+=j; 140 } 141 } 142 }else{ 143 for(int[] array : matrix){ 144 for(int i=0; i<array.length; i++){ 145 int word=array[i]; 146 int x2=(int)min(word, maxFreq); 147 freq[x2]++; 148 } 149 } 150 } 151 return freq; 152 } 153 154 @Override toString()155 public String toString(){ 156 StringBuilder sb=new StringBuilder(); 157 sb.append("["); 158 String comma=""; 159 for(int[] array : matrix){ 160 for(int i=0; i<array.length; i++){ 161 int word=array[i]; 162 for(int j=0; j<cellsPerWord; j++){ 163 int x=word&valueMask; 164 sb.append(comma); 165 sb.append(x); 166 word>>>=cellBits; 167 comma=", "; 168 } 169 } 170 } 171 sb.append("]"); 172 return sb.toString(); 173 } 174 usedFraction()175 public double usedFraction(){return cellsUsed/(double)cells;} 176 usedFraction(int mindepth)177 public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;} 178 cellsUsed(int mindepth)179 public long cellsUsed(int mindepth){ 180 long count=0; 181 for(int[] array : matrix){ 182 if(array!=null){ 183 for(int word : array){ 184 while(word>0){ 185 int x=word&valueMask; 186 if(x>=mindepth){count++;} 187 word>>>=cellBits; 188 } 189 } 190 } 191 } 192 return count; 193 } 194 mem()195 public String mem(){ 196 long mem=(cells*cellBits)/8; 197 if(mem<(1<<20)){ 198 return (String.format(Locale.ROOT, "%.2f KB", mem*1d/(1<<10))); 199 }else if(mem<(1<<30)){ 200 return (String.format(Locale.ROOT, "%.2f MB", mem*1d/(1<<20))); 201 }else{ 202 return (String.format(Locale.ROOT, "%.2f GB", mem*1d/(1<<30))); 203 } 204 } 205 min(int x, int y)206 public static final int min(int x, int y){return x<y ? x : y;} max(int x, int y)207 public static final int max(int x, int y){return x>y ? x : y;} min(long x, long y)208 public static final long min(long x, long y){return x<y ? x : y;} max(long x, long y)209 public static final long max(long x, long y){return x>y ? x : y;} 210 211 private long cellsUsed; 212 213 public final long cells; 214 public final int cellBits; 215 public final int maxValue; 216 public final int gap; //Set this for convenience on gapped tables to make sure you're using the right table. 217 218 private final int cellsPerWord; 219 private final int indexShift; 220 private final int valueMask; 221 private final int[][] matrix; 222 223 private static final int arrayBits=2; 224 private static final int numArrays=1<<arrayBits; 225 private static final int arrayMask=numArrays-1; 226 227 public static boolean verbose=false; 228 229 230 } 231