1 /*
2  * This file is part of ELKI:
3  * Environment for Developing KDD-Applications Supported by Index-Structures
4  *
5  * Copyright (C) 2018
6  * ELKI Development Team
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Affero General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
20  */
21 package de.lmu.ifi.dbs.elki.database.ids;
22 
23 import java.util.Random;
24 
25 import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
26 import de.lmu.ifi.dbs.elki.utilities.io.ByteBufferSerializer;
27 import de.lmu.ifi.dbs.elki.utilities.random.FastNonThreadsafeRandom;
28 import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory;
29 
30 /**
31  * DBID Utility functions.
32  *
33  * @author Erich Schubert
34  * @since 0.4.0
35  *
36  * @opt nodefillcolor LemonChiffon
37  *
38  * @has - - - DBIDs
39  * @has - - - DBIDRef
40  * @composed - - - DBIDFactory
41  */
42 public final class DBIDUtil {
43   /**
44    * Static - no public constructor.
45    */
DBIDUtil()46   private DBIDUtil() {
47     // Never called.
48   }
49 
50   /**
51    * Final, global copy of empty DBIDs.
52    */
53   public static final EmptyDBIDs EMPTYDBIDS = new EmptyDBIDs();
54 
55   /**
56    * Get the invalid special ID.
57    *
58    * @return invalid ID value
59    */
invalid()60   public static DBIDRef invalid() {
61     return DBIDFactory.FACTORY.invalid();
62   }
63 
64   /**
65    * Import and integer as DBID.
66    * <p>
67    * Note: this may not be possible for some factories!
68    *
69    * @param id Integer ID to import
70    * @return DBID
71    */
importInteger(int id)72   public static DBID importInteger(int id) {
73     return DBIDFactory.FACTORY.importInteger(id);
74   }
75 
76   /**
77    * Export a DBID as int.
78    * <p>
79    * Note: this may not be possible for some factories!
80    *
81    * @param id DBID to export
82    * @return integer value
83    */
asInteger(DBIDRef id)84   public static int asInteger(DBIDRef id) {
85     return id.internalGetIndex();
86   }
87 
88   /**
89    * Compare two DBIDs.
90    *
91    * @param id1 First ID
92    * @param id2 Second ID
93    * @return Comparison result
94    */
compare(DBIDRef id1, DBIDRef id2)95   public static int compare(DBIDRef id1, DBIDRef id2) {
96     return DBIDFactory.FACTORY.compare(id1, id2);
97   }
98 
99   /**
100    * Test two DBIDs for equality.
101    *
102    * @param id1 First ID
103    * @param id2 Second ID
104    * @return Comparison result
105    */
equal(DBIDRef id1, DBIDRef id2)106   public static boolean equal(DBIDRef id1, DBIDRef id2) {
107     return DBIDFactory.FACTORY.equal(id1, id2);
108   }
109 
110   /**
111    * Dereference a DBID reference.
112    *
113    * @param ref DBID reference
114    * @return DBID
115    */
deref(DBIDRef ref)116   public static DBID deref(DBIDRef ref) {
117     return ref instanceof DBID ? (DBID) ref : importInteger(ref.internalGetIndex());
118   }
119 
120   /**
121    * Format a DBID as string.
122    *
123    * @param id DBID
124    * @return String representation
125    */
toString(DBIDRef id)126   public static String toString(DBIDRef id) {
127     return DBIDFactory.FACTORY.toString(id);
128   }
129 
130   /**
131    * Format a DBID as string.
132    *
133    * @param ids DBIDs
134    * @return String representation
135    */
toString(DBIDs ids)136   public static String toString(DBIDs ids) {
137     final DBIDFactory factory = DBIDFactory.FACTORY;
138     if(ids instanceof DBID) {
139       return factory.toString((DBID) ids);
140     }
141     if(ids.isEmpty()) {
142       return "";
143     }
144     DBIDIter iter = ids.iter();
145     StringBuilder buf = new StringBuilder(ids.size() * 6) //
146         .append(factory.toString(iter));
147     while(iter.advance().valid()) {
148       buf.append(',').append(factory.toString(iter));
149     }
150     return buf.toString();
151   }
152 
153   /**
154    * Get a serializer for DBIDs.
155    *
156    * @return DBID serializer
157    */
getDBIDSerializer()158   public static ByteBufferSerializer<DBID> getDBIDSerializer() {
159     return DBIDFactory.FACTORY.getDBIDSerializer();
160   }
161 
162   /**
163    * Get a serializer for DBIDs with static size.
164    *
165    * @return DBID serializer
166    */
getDBIDSerializerStatic()167   public static ByteBufferSerializer<DBID> getDBIDSerializerStatic() {
168     return DBIDFactory.FACTORY.getDBIDSerializerStatic();
169   }
170 
171   /**
172    * Generate a single DBID.
173    *
174    * @return A single DBID
175    */
generateSingleDBID()176   public static DBID generateSingleDBID() {
177     return DBIDFactory.FACTORY.generateSingleDBID();
178   }
179 
180   /**
181    * Return a single DBID for reuse.
182    *
183    * @param id DBID to deallocate
184    */
deallocateSingleDBID(DBID id)185   public static void deallocateSingleDBID(DBID id) {
186     DBIDFactory.FACTORY.deallocateSingleDBID(id);
187   }
188 
189   /**
190    * Generate a static DBID range.
191    *
192    * @param size Requested size
193    * @return DBID range
194    */
generateStaticDBIDRange(int size)195   public static DBIDRange generateStaticDBIDRange(int size) {
196     return DBIDFactory.FACTORY.generateStaticDBIDRange(size);
197   }
198 
199   /**
200    * Deallocate a static DBID range.
201    *
202    * @param range Range to deallocate
203    */
deallocateDBIDRange(DBIDRange range)204   public static void deallocateDBIDRange(DBIDRange range) {
205     DBIDFactory.FACTORY.deallocateDBIDRange(range);
206   }
207 
208   /**
209    * Make a new DBID variable.
210    *
211    * @param val Initial value.
212    * @return Variable
213    */
newVar(DBIDRef val)214   public static DBIDVar newVar(DBIDRef val) {
215     return DBIDFactory.FACTORY.newVar(val);
216   }
217 
218   /**
219    * Make a new DBID variable.
220    *
221    * @return Variable
222    */
newVar()223   public static DBIDVar newVar() {
224     return DBIDFactory.FACTORY.newVar(DBIDFactory.FACTORY.invalid());
225   }
226 
227   /**
228    * Make a new (modifiable) array of DBIDs.
229    *
230    * @return New array
231    */
newArray()232   public static ArrayModifiableDBIDs newArray() {
233     return DBIDFactory.FACTORY.newArray();
234   }
235 
236   /**
237    * Make a new (modifiable) hash set of DBIDs.
238    *
239    * @return New hash set
240    */
newHashSet()241   public static HashSetModifiableDBIDs newHashSet() {
242     return DBIDFactory.FACTORY.newHashSet();
243   }
244 
245   /**
246    * Make a new (modifiable) array of DBIDs.
247    *
248    * @param size Size hint
249    * @return New array
250    */
newArray(int size)251   public static ArrayModifiableDBIDs newArray(int size) {
252     return DBIDFactory.FACTORY.newArray(size);
253   }
254 
255   /**
256    * Make a new (modifiable) hash set of DBIDs.
257    *
258    * @param size Size hint
259    * @return New hash set
260    */
newHashSet(int size)261   public static HashSetModifiableDBIDs newHashSet(int size) {
262     return DBIDFactory.FACTORY.newHashSet(size);
263   }
264 
265   /**
266    * Make a new (modifiable) array of DBIDs.
267    *
268    * @param existing Existing DBIDs
269    * @return New array
270    */
newArray(DBIDs existing)271   public static ArrayModifiableDBIDs newArray(DBIDs existing) {
272     return DBIDFactory.FACTORY.newArray(existing);
273   }
274 
275   /**
276    * Make a new (modifiable) hash set of DBIDs.
277    *
278    * @param existing Existing DBIDs
279    * @return New hash set
280    */
newHashSet(DBIDs existing)281   public static HashSetModifiableDBIDs newHashSet(DBIDs existing) {
282     return DBIDFactory.FACTORY.newHashSet(existing);
283   }
284 
285   /**
286    * Compute the set intersection of two sets.
287    *
288    * @param first First set
289    * @param second Second set
290    * @return intersection
291    */
intersection(DBIDs first, DBIDs second)292   public static ModifiableDBIDs intersection(DBIDs first, DBIDs second) {
293     // If exactly one is a Set, use it as second parameter.
294     if(second instanceof SetDBIDs) {
295       if(!(first instanceof SetDBIDs)) {
296         return internalIntersection(first, second);
297       }
298     }
299     else if(first instanceof SetDBIDs) {
300       return internalIntersection(second, first);
301     }
302     // Both are the same type: both set or both non set.
303     // Smaller goes first.
304     return first.size() <= second.size() ? internalIntersection(first, second) : internalIntersection(second, first);
305   }
306 
307   /**
308    * Compute the set intersection of two sets.
309    *
310    * @param first First set
311    * @param second Second set
312    * @return result.
313    */
internalIntersection(DBIDs first, DBIDs second)314   private static ModifiableDBIDs internalIntersection(DBIDs first, DBIDs second) {
315     second = second.size() > 16 && !(second instanceof SetDBIDs) ? newHashSet(second) : second;
316     ModifiableDBIDs inter = newHashSet(first.size());
317     for(DBIDIter it = first.iter(); it.valid(); it.advance()) {
318       if(second.contains(it)) {
319         inter.add(it);
320       }
321     }
322     return inter;
323   }
324 
325   /**
326    * Compute the set intersection size of two sets.
327    *
328    * @param first First set
329    * @param second Second set
330    * @return size
331    */
intersectionSize(DBIDs first, DBIDs second)332   public static int intersectionSize(DBIDs first, DBIDs second) {
333     // If exactly one is a Set, use it as second parameter.
334     if(second instanceof SetDBIDs) {
335       if(!(first instanceof SetDBIDs)) {
336         return internalIntersectionSize(first, second);
337       }
338     }
339     else if(first instanceof SetDBIDs) {
340       return internalIntersectionSize(second, first);
341     }
342     // Both are the same type: both set or both non set.
343     // Smaller goes first.
344     return first.size() <= second.size() ? internalIntersectionSize(first, second) : internalIntersectionSize(second, first);
345   }
346 
347   /**
348    * Compute the set intersection size of two sets.
349    *
350    * @param first First set
351    * @param second Second set
352    * @return size
353    */
internalIntersectionSize(DBIDs first, DBIDs second)354   private static int internalIntersectionSize(DBIDs first, DBIDs second) {
355     second = second.size() > 16 && !(second instanceof SetDBIDs) ? newHashSet(second) : second;
356     int c = 0;
357     for(DBIDIter it = first.iter(); it.valid(); it.advance()) {
358       if(second.contains(it)) {
359         c++;
360       }
361     }
362     return c;
363   }
364 
365   /**
366    * Compute the set symmetric intersection of two sets.
367    *
368    * @param first First set
369    * @param second Second set
370    * @param firstonly OUTPUT: elements only in first. MUST BE EMPTY
371    * @param intersection OUTPUT: elements in intersection. MUST BE EMPTY
372    * @param secondonly OUTPUT: elements only in second. MUST BE EMPTY
373    */
374   // TODO: optimize?
symmetricIntersection(DBIDs first, DBIDs second, HashSetModifiableDBIDs firstonly, HashSetModifiableDBIDs intersection, HashSetModifiableDBIDs secondonly)375   public static void symmetricIntersection(DBIDs first, DBIDs second, HashSetModifiableDBIDs firstonly, HashSetModifiableDBIDs intersection, HashSetModifiableDBIDs secondonly) {
376     if(first.size() > second.size()) {
377       symmetricIntersection(second, first, secondonly, intersection, firstonly);
378       return;
379     }
380     assert (firstonly.size() == 0) : "OUTPUT set should be empty!";
381     assert (intersection.size() == 0) : "OUTPUT set should be empty!";
382     assert (secondonly.size() == 0) : "OUTPUT set should be empty!";
383     // Initialize with second
384     secondonly.addDBIDs(second);
385     for(DBIDIter it = first.iter(); it.valid(); it.advance()) {
386       // Try to remove
387       (secondonly.remove(it) ? intersection : firstonly).add(it);
388     }
389   }
390 
391   /**
392    * Returns the union of the two specified collection of IDs.
393    *
394    * @param ids1 the first collection
395    * @param ids2 the second collection
396    * @return the union of ids1 and ids2 without duplicates
397    */
union(DBIDs ids1, DBIDs ids2)398   public static ModifiableDBIDs union(DBIDs ids1, DBIDs ids2) {
399     ModifiableDBIDs result = DBIDUtil.newHashSet(Math.max(ids1.size(), ids2.size()));
400     result.addDBIDs(ids1);
401     result.addDBIDs(ids2);
402     return result;
403   }
404 
405   /**
406    * Returns the difference of the two specified collection of IDs.
407    *
408    * @param ids1 the first collection
409    * @param ids2 the second collection
410    * @return the difference of ids1 minus ids2
411    */
difference(DBIDs ids1, DBIDs ids2)412   public static ModifiableDBIDs difference(DBIDs ids1, DBIDs ids2) {
413     ModifiableDBIDs result = DBIDUtil.newHashSet(ids1);
414     result.removeDBIDs(ids2);
415     return result;
416   }
417 
418   /**
419    * Wrap an existing DBIDs collection to be unmodifiable.
420    *
421    * @param existing Existing collection
422    * @return Unmodifiable collection
423    */
makeUnmodifiable(DBIDs existing)424   public static StaticDBIDs makeUnmodifiable(DBIDs existing) {
425     return DBIDFactory.FACTORY.makeUnmodifiable(existing);
426   }
427 
428   /**
429    * Ensure that the given DBIDs are array-indexable.
430    *
431    * @param ids IDs
432    * @return Array DBIDs.
433    */
ensureArray(DBIDs ids)434   public static ArrayDBIDs ensureArray(DBIDs ids) {
435     return ids instanceof ArrayDBIDs ? (ArrayDBIDs) ids : newArray(ids);
436   }
437 
438   /**
439    * Ensure that the given DBIDs support fast "contains" operations.
440    *
441    * @param ids IDs
442    * @return Set DBIDs.
443    */
ensureSet(DBIDs ids)444   public static SetDBIDs ensureSet(DBIDs ids) {
445     return ids instanceof SetDBIDs ? (SetDBIDs) ids : newHashSet(ids);
446   }
447 
448   /**
449    * Ensure modifiable.
450    *
451    * @param ids IDs
452    * @return Modifiable DBIDs.
453    */
ensureModifiable(DBIDs ids)454   public static ModifiableDBIDs ensureModifiable(DBIDs ids) {
455     return ids instanceof ModifiableDBIDs ? (ModifiableDBIDs) ids : //
456         ids instanceof HashSetDBIDs ? newHashSet(ids) : newArray(ids);
457   }
458 
459   /**
460    * Make a DBID pair.
461    *
462    * @param id1 first ID
463    * @param id2 second ID
464    *
465    * @return DBID pair
466    */
newPair(DBIDRef id1, DBIDRef id2)467   public static DBIDPair newPair(DBIDRef id1, DBIDRef id2) {
468     return DBIDFactory.FACTORY.newPair(id1, id2);
469   }
470 
471   /**
472    * Make a DoubleDBIDPair.
473    *
474    * @param val double value
475    * @param id ID
476    * @return new pair
477    */
newPair(double val, DBIDRef id)478   public static DoubleDBIDPair newPair(double val, DBIDRef id) {
479     return DBIDFactory.FACTORY.newPair(val, id);
480   }
481 
482   /**
483    * Create an appropriate heap for the distance type.
484    *
485    * This will use a double heap if appropriate.
486    *
487    * @param k K value
488    * @return New heap of size k, appropriate for this distance type.
489    */
newHeap(int k)490   public static KNNHeap newHeap(int k) {
491     return DBIDFactory.FACTORY.newHeap(k);
492   }
493 
494   /**
495    * Build a new heap from a given list.
496    *
497    * @param exist Existing result
498    * @return New heap
499    */
newHeap(KNNList exist)500   public static KNNHeap newHeap(KNNList exist) {
501     return DBIDFactory.FACTORY.newHeap(exist);
502   }
503 
504   /**
505    * Produce a random shuffling of the given DBID array.
506    *
507    * @param ids Original DBIDs, no duplicates allowed
508    * @param rnd Random generator
509    */
randomShuffle(ArrayModifiableDBIDs ids, RandomFactory rnd)510   public static void randomShuffle(ArrayModifiableDBIDs ids, RandomFactory rnd) {
511     randomShuffle(ids, rnd.getSingleThreadedRandom(), ids.size());
512   }
513 
514   /**
515    * Produce a random shuffling of the given DBID array.
516    *
517    * @param ids Original DBIDs, no duplicates allowed
518    * @param random Random generator
519    */
randomShuffle(ArrayModifiableDBIDs ids, Random random)520   public static void randomShuffle(ArrayModifiableDBIDs ids, Random random) {
521     randomShuffle(ids, random, ids.size());
522   }
523 
524   /**
525    * Produce a random shuffling of the given DBID array.
526    *
527    * Only the first {@code limit} elements will be fully randomized, but the
528    * remaining objects will also be changed.
529    *
530    * @param ids Original DBIDs, no duplicates allowed
531    * @param random Random generator
532    * @param limit Shuffling limit.
533    */
randomShuffle(ArrayModifiableDBIDs ids, Random random, final int limit)534   public static void randomShuffle(ArrayModifiableDBIDs ids, Random random, final int limit) {
535     final int end = ids.size();
536     for(int i = 1; i < limit; i++) {
537       ids.swap(i - 1, i + random.nextInt(end - i));
538     }
539   }
540 
541   /**
542    * Produce a random sample of the given DBIDs.
543    *
544    * @param source Original DBIDs, no duplicates allowed
545    * @param k k Parameter
546    * @param seed Random generator seed
547    * @return new DBIDs
548    */
randomSample(DBIDs source, int k, int seed)549   public static ModifiableDBIDs randomSample(DBIDs source, int k, int seed) {
550     return randomSample(source, k, new Random(seed));
551   }
552 
553   /**
554    * Produce a random sample of the given DBIDs.
555    *
556    * @param source Original DBIDs, no duplicates allowed
557    * @param k k Parameter
558    * @param seed Random generator seed
559    * @return new DBIDs
560    */
randomSample(DBIDs source, int k, Long seed)561   public static ModifiableDBIDs randomSample(DBIDs source, int k, Long seed) {
562     return randomSample(source, k, seed != null ? new Random(seed.longValue()) : new Random());
563   }
564 
565   /**
566    * Produce a random sample of the given DBIDs.
567    *
568    * @param source Original DBIDs, no duplicates allowed
569    * @param k k Parameter
570    * @param rnd Random generator
571    * @return new DBIDs
572    */
randomSample(DBIDs source, int k, RandomFactory rnd)573   public static ModifiableDBIDs randomSample(DBIDs source, int k, RandomFactory rnd) {
574     return randomSample(source, k, rnd.getSingleThreadedRandom());
575   }
576 
577   /**
578    * Produce a random sample of the given DBIDs.
579    *
580    * @param source Original DBIDs, no duplicates allowed
581    * @param except Excluded object
582    * @param k k Parameter
583    * @param rnd Random generator
584    * @return new DBIDs
585    */
randomSampleExcept(DBIDs source, DBIDRef except, int k, RandomFactory rnd)586   public static ModifiableDBIDs randomSampleExcept(DBIDs source, DBIDRef except, int k, RandomFactory rnd) {
587     return randomSampleExcept(source, except, k, rnd.getSingleThreadedRandom());
588   }
589 
590   /**
591    * Produce a random sample of the given DBIDs.
592    *
593    * @param source Original DBIDs, no duplicates allowed
594    * @param k k Parameter
595    * @param random Random generator
596    * @return new DBIDs
597    */
randomSample(DBIDs source, int k, Random random)598   public static ModifiableDBIDs randomSample(DBIDs source, int k, Random random) {
599     if(k < 0 || k > source.size()) {
600       throw new IllegalArgumentException("Illegal value for size of random sample: " + k + " > " + source.size() + " or < 0");
601     }
602     // Fast, and we're single-threaded here anyway.
603     random = (random != null) ? random : new FastNonThreadsafeRandom();
604 
605     // TODO: better balancing for different sizes
606     // Two methods: constructive vs. destructive
607     if(k < source.size() >> 2) {
608       ArrayDBIDs aids = DBIDUtil.ensureArray(source);
609       DBIDArrayIter iter = aids.iter();
610       final int size = aids.size();
611       HashSetModifiableDBIDs sample = DBIDUtil.newHashSet(k);
612       while(sample.size() < k) {
613         sample.add(iter.seek(random.nextInt(size)));
614       }
615       return sample;
616     }
617     else {
618       ArrayModifiableDBIDs sample = DBIDUtil.newArray(source);
619       randomShuffle(sample, random, k);
620       // Delete trailing elements
621       for(int i = sample.size() - 1; i >= k; i--) {
622         sample.remove(i);
623       }
624       return sample;
625     }
626   }
627 
628   /**
629    * Produce a random sample of the given DBIDs.
630    *
631    * @param source Original DBIDs, no duplicates allowed
632    * @param except Excluded object
633    * @param k k Parameter
634    * @param random Random generator
635    * @return new DBIDs
636    */
randomSampleExcept(DBIDs source, DBIDRef except, int k, Random random)637   public static ModifiableDBIDs randomSampleExcept(DBIDs source, DBIDRef except, int k, Random random) {
638     if(k < 0 || k > source.size()) {
639       throw new IllegalArgumentException("Illegal value for size of random sample: " + k + " > " + source.size() + " or < 0");
640     }
641     // Fast, and we're single-threaded here anyway.
642     random = (random != null) ? random : new FastNonThreadsafeRandom();
643 
644     // TODO: better balancing for different sizes
645     // Two methods: constructive vs. destructive
646     if(k < source.size() >> 2) {
647       ArrayDBIDs aids = DBIDUtil.ensureArray(source);
648       DBIDArrayIter iter = aids.iter();
649       int size = aids.size();
650       HashSetModifiableDBIDs sample = DBIDUtil.newHashSet(k);
651       while(sample.size() < k) {
652         if(!equal(iter.seek(random.nextInt(size)), except)) {
653           sample.add(iter);
654         }
655       }
656       return sample;
657     }
658     else {
659       ArrayModifiableDBIDs sample = DBIDUtil.newArray(source);
660       randomShuffle(sample, random, k);
661       // Avoid excluded object:
662       for(DBIDArrayIter iter = sample.iter(); iter.valid() && iter.getOffset() < k; iter.advance()) {
663         if(equal(iter, except)) {
664           sample.swap(iter.getOffset(), k);
665           break; // Assuming that except occurrs only once!
666         }
667       }
668       // Delete trailing elements
669       for(int i = sample.size() - 1; i >= k; i--) {
670         sample.remove(i);
671       }
672       return sample;
673     }
674   }
675 
676   /**
677    * Produce a random sample of the given DBIDs.
678    *
679    * @param ids Original ids, no duplicates allowed
680    * @param rate Sampling rate
681    * @param random Random generator
682    * @return Sample
683    */
randomSample(DBIDs ids, double rate, RandomFactory random)684   public static DBIDs randomSample(DBIDs ids, double rate, RandomFactory random) {
685     return randomSample(ids, rate, random.getSingleThreadedRandom());
686   }
687 
688   /**
689    * Produce a random sample of the given DBIDs.
690    * <ul>
691    * <li>values less or equal 0 mean no sampling.
692    * <li>values larger than 0, but at most 1, are relative rates.
693    * <li>values larger than 1 are supposed to be integer counts.
694    * </ul>
695    *
696    * @param ids Original ids, no duplicates allowed
697    * @param rate Sampling rate
698    * @param random Random generator
699    * @return Sample
700    */
randomSample(DBIDs ids, double rate, Random random)701   public static DBIDs randomSample(DBIDs ids, double rate, Random random) {
702     return rate <= 0 ? ids : // Magic for "no sampling"
703         randomSample(ids, Math.min(ids.size(), //
704             (int) (rate <= 1 ? rate * ids.size() : rate)), random);
705   }
706 
707   /**
708    * Draw a single random sample.
709    *
710    * @param ids IDs to draw from
711    * @param random Random value
712    * @return Random ID
713    */
randomSample(DBIDs ids, Random random)714   public static DBIDVar randomSample(DBIDs ids, Random random) {
715     return DBIDUtil.ensureArray(ids).assignVar(random.nextInt(ids.size()), DBIDUtil.newVar());
716   }
717 
718   /**
719    * Draw a single random sample.
720    *
721    * @param ids IDs to draw from
722    * @param random Random value
723    * @return Random ID
724    */
randomSample(DBIDs ids, RandomFactory random)725   public static DBIDVar randomSample(DBIDs ids, RandomFactory random) {
726     return randomSample(ids, random.getSingleThreadedRandom());
727   }
728 
729   /**
730    * Randomly split IDs into {@code p} partitions of almost-equal size.
731    *
732    * @param ids Original DBIDs
733    * @param p Desired number of partitions.
734    * @param rnd Random generator
735    */
randomSplit(DBIDs ids, int p, RandomFactory rnd)736   public static ArrayDBIDs[] randomSplit(DBIDs ids, int p, RandomFactory rnd) {
737     return randomSplit(ids, p, rnd.getSingleThreadedRandom());
738   }
739 
740   /**
741    * Randomly split IDs into {@code p} partitions of almost-equal size.
742    *
743    * @param oids Original DBIDs
744    * @param p Desired number of partitions.
745    * @param random Random generator
746    */
randomSplit(DBIDs oids, int p, Random random)747   public static ArrayDBIDs[] randomSplit(DBIDs oids, int p, Random random) {
748     // Fast, and we're single-threaded here anyway.
749     random = random != null ? random : new FastNonThreadsafeRandom();
750     ArrayModifiableDBIDs ids = newArray(oids);
751     final int size = ids.size();
752     ArrayDBIDs[] split = new ArrayDBIDs[p];
753     // Shuffle
754     for(int i = 1; i < size; i++) {
755       ids.swap(i - 1, i + random.nextInt(size - i));
756     }
757     final int minsize = size / p, // Floor.
758         extra = size % p; // Remainder
759     for(int beg = 0, part = 0; part < p; part++) {
760       // First partitions are smaller, last partitions are larger.
761       final int psize = minsize + ((part < extra) ? 1 : 0);
762       split[part] = ids.slice(beg, beg + psize);
763       beg += psize;
764     }
765     return split;
766   }
767 
768   /**
769    * Create a modifiable list to store distance-DBID pairs.
770    *
771    * @param size Estimated upper list size
772    * @return Empty list
773    */
newDistanceDBIDList(int size)774   public static ModifiableDoubleDBIDList newDistanceDBIDList(int size) {
775     return DBIDFactory.FACTORY.newDistanceDBIDList(size);
776   }
777 
778   /**
779    * Create a modifiable list to store distance-DBID pairs.
780    *
781    * @return Empty list
782    */
newDistanceDBIDList()783   public static ModifiableDoubleDBIDList newDistanceDBIDList() {
784     return DBIDFactory.FACTORY.newDistanceDBIDList();
785   }
786 
787   /**
788    * Assert that the presented ids constitute a continuous {@link DBIDRange}.
789    *
790    * @param ids ID range.
791    * @return DBID range.
792    * @throws AbortException
793    */
assertRange(DBIDs ids)794   public static DBIDRange assertRange(DBIDs ids) {
795     if(!(ids instanceof DBIDRange)) {
796       throw new AbortException("This class may currently only be used with static databases and DBID ranges.");
797     }
798     return (DBIDRange) ids;
799   }
800 }
801