1 /* 2 * This file is part of ELKI: 3 * Environment for Developing KDD-Applications Supported by Index-Structures 4 * 5 * Copyright (C) 2018 6 * ELKI Development Team 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Affero General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Affero General Public License for more details. 17 * 18 * You should have received a copy of the GNU Affero General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 package de.lmu.ifi.dbs.elki.data; 22 23 import java.io.IOException; 24 import java.nio.ByteBuffer; 25 26 import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; 27 import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; 28 import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil; 29 import de.lmu.ifi.dbs.elki.utilities.io.ByteBufferSerializer; 30 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; 31 32 /** 33 * Vector type using {@code int[]} storage. 34 * 35 * @author Erich Schubert 36 * @since 0.4.0 37 */ 38 public class IntegerVector implements NumberVector { 39 /** 40 * Static instance (object factory). 41 */ 42 public static final IntegerVector.Factory STATIC = new IntegerVector.Factory(); 43 44 /** 45 * Serializer for up to 127 dimensions. 46 */ 47 public static final ByteBufferSerializer<IntegerVector> BYTE_SERIALIZER = new SmallSerializer(); 48 49 /** 50 * Serializer for up to 2^15-1 dimensions. 51 */ 52 public static final ByteBufferSerializer<IntegerVector> SHORT_SERIALIZER = new ShortSerializer(); 53 54 /** 55 * Serializer using varint encoding. 56 */ 57 public static final ByteBufferSerializer<IntegerVector> VARIABLE_SERIALIZER = new VariableSerializer(); 58 59 /** 60 * Keeps the values of the real vector. 61 */ 62 private final int[] values; 63 64 /** 65 * Private constructor. NOT for public use. 66 * 67 * @param values Value data 68 * @param nocopy Flag to use without copying. 69 */ IntegerVector(int[] values, boolean nocopy)70 private IntegerVector(int[] values, boolean nocopy) { 71 if (nocopy) { 72 this.values = values; 73 } else { 74 this.values = new int[values.length]; 75 System.arraycopy(values, 0, this.values, 0, values.length); 76 } 77 } 78 79 /** 80 * Create an IntegerVector consisting of the given integer values. 81 * 82 * @param values the values to be set as values of the IntegerVector 83 */ IntegerVector(int[] values)84 public IntegerVector(int[] values) { 85 this.values = values.clone(); 86 } 87 88 @Override getDimensionality()89 public int getDimensionality() { 90 return values.length; 91 } 92 93 /** 94 * Returns the value of the specified attribute. 95 * 96 * @param dimension the selected attribute. Attributes are counted starting 97 * with 0. 98 * 99 * @throws IllegalArgumentException if the specified dimension is out of range 100 * of the possible attributes 101 * 102 * {@inheritDoc} 103 */ 104 @Override 105 @Deprecated getValue(int dimension)106 public Integer getValue(int dimension) { 107 return Integer.valueOf(values[dimension]); 108 } 109 110 @Override doubleValue(int dimension)111 public double doubleValue(int dimension) { 112 return values[dimension]; 113 } 114 115 @Override longValue(int dimension)116 public long longValue(int dimension) { 117 return values[dimension]; 118 } 119 120 @Override intValue(int dimension)121 public int intValue(int dimension) { 122 return values[dimension]; 123 } 124 125 @Override toArray()126 public double[] toArray() { 127 double[] data = new double[values.length]; 128 for (int i = 0; i < values.length; i++) { 129 data[i] = values[i]; 130 } 131 return data; 132 } 133 134 @Override toString()135 public String toString() { 136 StringBuilder featureLine = new StringBuilder(); 137 for (int i = 0; i < values.length; i++) { 138 featureLine.append(values[i]); 139 if (i + 1 < values.length) { 140 featureLine.append(ATTRIBUTE_SEPARATOR); 141 } 142 } 143 return featureLine.toString(); 144 } 145 146 /** 147 * Factory for integer vectors. 148 * 149 * @author Erich Schubert 150 * 151 * @has - - - IntegerVector 152 */ 153 public static class Factory implements NumberVector.Factory<IntegerVector> { 154 @Override newFeatureVector(A array, ArrayAdapter<? extends Number, A> adapter)155 public <A> IntegerVector newFeatureVector(A array, ArrayAdapter<? extends Number, A> adapter) { 156 int dim = adapter.size(array); 157 int[] values = new int[dim]; 158 for (int i = 0; i < dim; i++) { 159 values[i] = adapter.get(array, i).intValue(); 160 } 161 return new IntegerVector(values, true); 162 } 163 164 @Override newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter)165 public <A> IntegerVector newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter) { 166 int dim = adapter.size(array); 167 int[] values = new int[dim]; 168 for (int i = 0; i < dim; i++) { 169 values[i] = adapter.getInteger(array, i); 170 } 171 return new IntegerVector(values, true); 172 } 173 174 @Override getDefaultSerializer()175 public ByteBufferSerializer<IntegerVector> getDefaultSerializer() { 176 return VARIABLE_SERIALIZER; 177 } 178 179 @Override getRestrictionClass()180 public Class<? super IntegerVector> getRestrictionClass() { 181 return IntegerVector.class; 182 } 183 184 /** 185 * Parameterization class. 186 * 187 * @author Erich Schubert 188 */ 189 public static class Parameterizer extends AbstractParameterizer { 190 @Override makeInstance()191 protected IntegerVector.Factory makeInstance() { 192 return STATIC; 193 } 194 } 195 } 196 197 /** 198 * Serialization class for dense integer vectors with up to 127 dimensions, by 199 * using a byte for storing the dimensionality. 200 * 201 * @author Erich Schubert 202 * 203 * @assoc - serializes - IntegerVector 204 */ 205 public static class SmallSerializer implements ByteBufferSerializer<IntegerVector> { 206 @Override fromByteBuffer(ByteBuffer buffer)207 public IntegerVector fromByteBuffer(ByteBuffer buffer) throws IOException { 208 final byte dimensionality = buffer.get(); 209 assert (buffer.remaining() >= ByteArrayUtil.SIZE_INT * dimensionality); 210 final int[] values = new int[dimensionality]; 211 for (int i = 0; i < dimensionality; i++) { 212 values[i] = buffer.getInt(); 213 } 214 return new IntegerVector(values, true); 215 } 216 217 @Override toByteBuffer(ByteBuffer buffer, IntegerVector vec)218 public void toByteBuffer(ByteBuffer buffer, IntegerVector vec) throws IOException { 219 assert (vec.values.length < Byte.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Byte.MAX_VALUE + "!"; 220 assert (buffer.remaining() >= ByteArrayUtil.SIZE_INT * vec.values.length); 221 buffer.put((byte) vec.values.length); 222 for (int i = 0; i < vec.values.length; i++) { 223 buffer.putInt(vec.values[i]); 224 } 225 } 226 227 @Override getByteSize(IntegerVector vec)228 public int getByteSize(IntegerVector vec) { 229 assert (vec.values.length < Byte.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Byte.MAX_VALUE + "!"; 230 return ByteArrayUtil.SIZE_BYTE + ByteArrayUtil.SIZE_INT * vec.getDimensionality(); 231 } 232 } 233 234 /** 235 * Serialization class for dense integer vectors with up to 236 * {@link Short#MAX_VALUE} dimensions, by using a short for storing the 237 * dimensionality. 238 * 239 * @author Erich Schubert 240 * 241 * @assoc - serializes - IntegerVector 242 */ 243 public static class ShortSerializer implements ByteBufferSerializer<IntegerVector> { 244 @Override 245 public IntegerVector fromByteBuffer(ByteBuffer buffer) throws IOException { 246 final short dimensionality = buffer.getShort(); 247 assert (buffer.remaining() >= ByteArrayUtil.SIZE_INT * dimensionality); 248 final int[] values = new int[dimensionality]; 249 for (int i = 0; i < dimensionality; i++) { 250 values[i] = buffer.getInt(); 251 } 252 return new IntegerVector(values, true); 253 } 254 255 @Override toByteBuffer(ByteBuffer buffer, IntegerVector vec)256 public void toByteBuffer(ByteBuffer buffer, IntegerVector vec) throws IOException { 257 assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!"; 258 assert (buffer.remaining() >= ByteArrayUtil.SIZE_INT * vec.values.length); 259 buffer.putShort((short) vec.values.length); 260 for (int i = 0; i < vec.values.length; i++) { 261 buffer.putInt(vec.values[i]); 262 } 263 } 264 265 @Override getByteSize(IntegerVector vec)266 public int getByteSize(IntegerVector vec) { 267 assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!"; 268 return ByteArrayUtil.SIZE_SHORT + ByteArrayUtil.SIZE_INT * vec.getDimensionality(); 269 } 270 } 271 272 /** 273 * Serialization class for variable dimensionality by using VarInt encoding. 274 * 275 * @author Erich Schubert 276 * 277 * @assoc - serializes - IntegerVector 278 */ 279 public static class VariableSerializer implements ByteBufferSerializer<IntegerVector> { 280 @Override 281 public IntegerVector fromByteBuffer(ByteBuffer buffer) throws IOException { 282 final int dimensionality = ByteArrayUtil.readUnsignedVarint(buffer); 283 assert (buffer.remaining() >= ByteArrayUtil.SIZE_INT * dimensionality); 284 final int[] values = new int[dimensionality]; 285 for (int i = 0; i < dimensionality; i++) { 286 values[i] = ByteArrayUtil.readSignedVarint(buffer); 287 } 288 return new IntegerVector(values, true); 289 } 290 291 @Override toByteBuffer(ByteBuffer buffer, IntegerVector vec)292 public void toByteBuffer(ByteBuffer buffer, IntegerVector vec) throws IOException { 293 assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!"; 294 ByteArrayUtil.writeUnsignedVarint(buffer, vec.values.length); 295 for (int i = 0; i < vec.values.length; i++) { 296 ByteArrayUtil.writeSignedVarint(buffer, vec.values[i]); 297 } 298 } 299 300 @Override 301 public int getByteSize(IntegerVector vec) { 302 assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!"; 303 int len = ByteArrayUtil.getUnsignedVarintSize(vec.values.length); 304 for (int i = 0; i < vec.values.length; i++) { 305 len += ByteArrayUtil.getSignedVarintSize(vec.values[i]); 306 } 307 return len; 308 } 309 } 310 } 311