1 /*
2  * This file is part of ELKI:
3  * Environment for Developing KDD-Applications Supported by Index-Structures
4  *
5  * Copyright (C) 2018
6  * ELKI Development Team
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Affero General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
20  */
21 package de.lmu.ifi.dbs.elki.data;
22 
23 import java.io.IOException;
24 import java.nio.ByteBuffer;
25 
26 import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter;
27 import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
28 import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil;
29 import de.lmu.ifi.dbs.elki.utilities.io.ByteBufferSerializer;
30 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
31 
32 /**
33  * Vector type using {@code int[]} storage.
34  *
35  * @author Erich Schubert
36  * @since 0.4.0
37  */
38 public class IntegerVector implements NumberVector {
39   /**
40    * Static instance (object factory).
41    */
42   public static final IntegerVector.Factory STATIC = new IntegerVector.Factory();
43 
44   /**
45    * Serializer for up to 127 dimensions.
46    */
47   public static final ByteBufferSerializer<IntegerVector> BYTE_SERIALIZER = new SmallSerializer();
48 
49   /**
50    * Serializer for up to 2^15-1 dimensions.
51    */
52   public static final ByteBufferSerializer<IntegerVector> SHORT_SERIALIZER = new ShortSerializer();
53 
54   /**
55    * Serializer using varint encoding.
56    */
57   public static final ByteBufferSerializer<IntegerVector> VARIABLE_SERIALIZER = new VariableSerializer();
58 
59   /**
60    * Keeps the values of the real vector.
61    */
62   private final int[] values;
63 
64   /**
65    * Private constructor. NOT for public use.
66    *
67    * @param values Value data
68    * @param nocopy Flag to use without copying.
69    */
IntegerVector(int[] values, boolean nocopy)70   private IntegerVector(int[] values, boolean nocopy) {
71     if (nocopy) {
72       this.values = values;
73     } else {
74       this.values = new int[values.length];
75       System.arraycopy(values, 0, this.values, 0, values.length);
76     }
77   }
78 
79   /**
80    * Create an IntegerVector consisting of the given integer values.
81    *
82    * @param values the values to be set as values of the IntegerVector
83    */
IntegerVector(int[] values)84   public IntegerVector(int[] values) {
85     this.values = values.clone();
86   }
87 
88   @Override
getDimensionality()89   public int getDimensionality() {
90     return values.length;
91   }
92 
93   /**
94    * Returns the value of the specified attribute.
95    *
96    * @param dimension the selected attribute. Attributes are counted starting
97    *        with 0.
98    *
99    * @throws IllegalArgumentException if the specified dimension is out of range
100    *         of the possible attributes
101    *
102    *         {@inheritDoc}
103    */
104   @Override
105   @Deprecated
getValue(int dimension)106   public Integer getValue(int dimension) {
107     return Integer.valueOf(values[dimension]);
108   }
109 
110   @Override
doubleValue(int dimension)111   public double doubleValue(int dimension) {
112     return values[dimension];
113   }
114 
115   @Override
longValue(int dimension)116   public long longValue(int dimension) {
117     return values[dimension];
118   }
119 
120   @Override
intValue(int dimension)121   public int intValue(int dimension) {
122     return values[dimension];
123   }
124 
125   @Override
toArray()126   public double[] toArray() {
127     double[] data = new double[values.length];
128     for (int i = 0; i < values.length; i++) {
129       data[i] = values[i];
130     }
131     return data;
132   }
133 
134   @Override
toString()135   public String toString() {
136     StringBuilder featureLine = new StringBuilder();
137     for (int i = 0; i < values.length; i++) {
138       featureLine.append(values[i]);
139       if (i + 1 < values.length) {
140         featureLine.append(ATTRIBUTE_SEPARATOR);
141       }
142     }
143     return featureLine.toString();
144   }
145 
146   /**
147    * Factory for integer vectors.
148    *
149    * @author Erich Schubert
150    *
151    * @has - - - IntegerVector
152    */
153   public static class Factory implements NumberVector.Factory<IntegerVector> {
154     @Override
newFeatureVector(A array, ArrayAdapter<? extends Number, A> adapter)155     public <A> IntegerVector newFeatureVector(A array, ArrayAdapter<? extends Number, A> adapter) {
156       int dim = adapter.size(array);
157       int[] values = new int[dim];
158       for (int i = 0; i < dim; i++) {
159         values[i] = adapter.get(array, i).intValue();
160       }
161       return new IntegerVector(values, true);
162     }
163 
164     @Override
newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter)165     public <A> IntegerVector newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter) {
166       int dim = adapter.size(array);
167       int[] values = new int[dim];
168       for (int i = 0; i < dim; i++) {
169         values[i] = adapter.getInteger(array, i);
170       }
171       return new IntegerVector(values, true);
172     }
173 
174     @Override
getDefaultSerializer()175     public ByteBufferSerializer<IntegerVector> getDefaultSerializer() {
176       return VARIABLE_SERIALIZER;
177     }
178 
179     @Override
getRestrictionClass()180     public Class<? super IntegerVector> getRestrictionClass() {
181       return IntegerVector.class;
182     }
183 
184     /**
185      * Parameterization class.
186      *
187      * @author Erich Schubert
188      */
189     public static class Parameterizer extends AbstractParameterizer {
190       @Override
makeInstance()191       protected IntegerVector.Factory makeInstance() {
192         return STATIC;
193       }
194     }
195   }
196 
197   /**
198    * Serialization class for dense integer vectors with up to 127 dimensions, by
199    * using a byte for storing the dimensionality.
200    *
201    * @author Erich Schubert
202    *
203    * @assoc - serializes - IntegerVector
204    */
205   public static class SmallSerializer implements ByteBufferSerializer<IntegerVector> {
206     @Override
fromByteBuffer(ByteBuffer buffer)207     public IntegerVector fromByteBuffer(ByteBuffer buffer) throws IOException {
208       final byte dimensionality = buffer.get();
209       assert (buffer.remaining() >= ByteArrayUtil.SIZE_INT * dimensionality);
210       final int[] values = new int[dimensionality];
211       for (int i = 0; i < dimensionality; i++) {
212         values[i] = buffer.getInt();
213       }
214       return new IntegerVector(values, true);
215     }
216 
217     @Override
toByteBuffer(ByteBuffer buffer, IntegerVector vec)218     public void toByteBuffer(ByteBuffer buffer, IntegerVector vec) throws IOException {
219       assert (vec.values.length < Byte.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Byte.MAX_VALUE + "!";
220       assert (buffer.remaining() >= ByteArrayUtil.SIZE_INT * vec.values.length);
221       buffer.put((byte) vec.values.length);
222       for (int i = 0; i < vec.values.length; i++) {
223         buffer.putInt(vec.values[i]);
224       }
225     }
226 
227     @Override
getByteSize(IntegerVector vec)228     public int getByteSize(IntegerVector vec) {
229       assert (vec.values.length < Byte.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Byte.MAX_VALUE + "!";
230       return ByteArrayUtil.SIZE_BYTE + ByteArrayUtil.SIZE_INT * vec.getDimensionality();
231     }
232   }
233 
234   /**
235    * Serialization class for dense integer vectors with up to
236    * {@link Short#MAX_VALUE} dimensions, by using a short for storing the
237    * dimensionality.
238    *
239    * @author Erich Schubert
240    *
241    * @assoc - serializes - IntegerVector
242    */
243   public static class ShortSerializer implements ByteBufferSerializer<IntegerVector> {
244     @Override
245     public IntegerVector fromByteBuffer(ByteBuffer buffer) throws IOException {
246       final short dimensionality = buffer.getShort();
247       assert (buffer.remaining() >= ByteArrayUtil.SIZE_INT * dimensionality);
248       final int[] values = new int[dimensionality];
249       for (int i = 0; i < dimensionality; i++) {
250         values[i] = buffer.getInt();
251       }
252       return new IntegerVector(values, true);
253     }
254 
255     @Override
toByteBuffer(ByteBuffer buffer, IntegerVector vec)256     public void toByteBuffer(ByteBuffer buffer, IntegerVector vec) throws IOException {
257       assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!";
258       assert (buffer.remaining() >= ByteArrayUtil.SIZE_INT * vec.values.length);
259       buffer.putShort((short) vec.values.length);
260       for (int i = 0; i < vec.values.length; i++) {
261         buffer.putInt(vec.values[i]);
262       }
263     }
264 
265     @Override
getByteSize(IntegerVector vec)266     public int getByteSize(IntegerVector vec) {
267       assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!";
268       return ByteArrayUtil.SIZE_SHORT + ByteArrayUtil.SIZE_INT * vec.getDimensionality();
269     }
270   }
271 
272   /**
273    * Serialization class for variable dimensionality by using VarInt encoding.
274    *
275    * @author Erich Schubert
276    *
277    * @assoc - serializes - IntegerVector
278    */
279   public static class VariableSerializer implements ByteBufferSerializer<IntegerVector> {
280     @Override
281     public IntegerVector fromByteBuffer(ByteBuffer buffer) throws IOException {
282       final int dimensionality = ByteArrayUtil.readUnsignedVarint(buffer);
283       assert (buffer.remaining() >= ByteArrayUtil.SIZE_INT * dimensionality);
284       final int[] values = new int[dimensionality];
285       for (int i = 0; i < dimensionality; i++) {
286         values[i] = ByteArrayUtil.readSignedVarint(buffer);
287       }
288       return new IntegerVector(values, true);
289     }
290 
291     @Override
toByteBuffer(ByteBuffer buffer, IntegerVector vec)292     public void toByteBuffer(ByteBuffer buffer, IntegerVector vec) throws IOException {
293       assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!";
294       ByteArrayUtil.writeUnsignedVarint(buffer, vec.values.length);
295       for (int i = 0; i < vec.values.length; i++) {
296         ByteArrayUtil.writeSignedVarint(buffer, vec.values[i]);
297       }
298     }
299 
300     @Override
301     public int getByteSize(IntegerVector vec) {
302       assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!";
303       int len = ByteArrayUtil.getUnsignedVarintSize(vec.values.length);
304       for (int i = 0; i < vec.values.length; i++) {
305         len += ByteArrayUtil.getSignedVarintSize(vec.values[i]);
306       }
307       return len;
308     }
309   }
310 }
311