1 /* 2 * This file is part of ELKI: 3 * Environment for Developing KDD-Applications Supported by Index-Structures 4 * 5 * Copyright (C) 2018 6 * ELKI Development Team 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Affero General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Affero General Public License for more details. 17 * 18 * You should have received a copy of the GNU Affero General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 package de.lmu.ifi.dbs.elki.database.ids; 22 23 import java.util.Random; 24 25 import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; 26 import de.lmu.ifi.dbs.elki.utilities.io.ByteBufferSerializer; 27 import de.lmu.ifi.dbs.elki.utilities.random.FastNonThreadsafeRandom; 28 import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory; 29 30 /** 31 * DBID Utility functions. 32 * 33 * @author Erich Schubert 34 * @since 0.4.0 35 * 36 * @opt nodefillcolor LemonChiffon 37 * 38 * @has - - - DBIDs 39 * @has - - - DBIDRef 40 * @composed - - - DBIDFactory 41 */ 42 public final class DBIDUtil { 43 /** 44 * Static - no public constructor. 45 */ DBIDUtil()46 private DBIDUtil() { 47 // Never called. 48 } 49 50 /** 51 * Final, global copy of empty DBIDs. 52 */ 53 public static final EmptyDBIDs EMPTYDBIDS = new EmptyDBIDs(); 54 55 /** 56 * Get the invalid special ID. 57 * 58 * @return invalid ID value 59 */ invalid()60 public static DBIDRef invalid() { 61 return DBIDFactory.FACTORY.invalid(); 62 } 63 64 /** 65 * Import and integer as DBID. 66 * <p> 67 * Note: this may not be possible for some factories! 68 * 69 * @param id Integer ID to import 70 * @return DBID 71 */ importInteger(int id)72 public static DBID importInteger(int id) { 73 return DBIDFactory.FACTORY.importInteger(id); 74 } 75 76 /** 77 * Export a DBID as int. 78 * <p> 79 * Note: this may not be possible for some factories! 80 * 81 * @param id DBID to export 82 * @return integer value 83 */ asInteger(DBIDRef id)84 public static int asInteger(DBIDRef id) { 85 return id.internalGetIndex(); 86 } 87 88 /** 89 * Compare two DBIDs. 90 * 91 * @param id1 First ID 92 * @param id2 Second ID 93 * @return Comparison result 94 */ compare(DBIDRef id1, DBIDRef id2)95 public static int compare(DBIDRef id1, DBIDRef id2) { 96 return DBIDFactory.FACTORY.compare(id1, id2); 97 } 98 99 /** 100 * Test two DBIDs for equality. 101 * 102 * @param id1 First ID 103 * @param id2 Second ID 104 * @return Comparison result 105 */ equal(DBIDRef id1, DBIDRef id2)106 public static boolean equal(DBIDRef id1, DBIDRef id2) { 107 return DBIDFactory.FACTORY.equal(id1, id2); 108 } 109 110 /** 111 * Dereference a DBID reference. 112 * 113 * @param ref DBID reference 114 * @return DBID 115 */ deref(DBIDRef ref)116 public static DBID deref(DBIDRef ref) { 117 return ref instanceof DBID ? (DBID) ref : importInteger(ref.internalGetIndex()); 118 } 119 120 /** 121 * Format a DBID as string. 122 * 123 * @param id DBID 124 * @return String representation 125 */ toString(DBIDRef id)126 public static String toString(DBIDRef id) { 127 return DBIDFactory.FACTORY.toString(id); 128 } 129 130 /** 131 * Format a DBID as string. 132 * 133 * @param ids DBIDs 134 * @return String representation 135 */ toString(DBIDs ids)136 public static String toString(DBIDs ids) { 137 final DBIDFactory factory = DBIDFactory.FACTORY; 138 if(ids instanceof DBID) { 139 return factory.toString((DBID) ids); 140 } 141 if(ids.isEmpty()) { 142 return ""; 143 } 144 DBIDIter iter = ids.iter(); 145 StringBuilder buf = new StringBuilder(ids.size() * 6) // 146 .append(factory.toString(iter)); 147 while(iter.advance().valid()) { 148 buf.append(',').append(factory.toString(iter)); 149 } 150 return buf.toString(); 151 } 152 153 /** 154 * Get a serializer for DBIDs. 155 * 156 * @return DBID serializer 157 */ getDBIDSerializer()158 public static ByteBufferSerializer<DBID> getDBIDSerializer() { 159 return DBIDFactory.FACTORY.getDBIDSerializer(); 160 } 161 162 /** 163 * Get a serializer for DBIDs with static size. 164 * 165 * @return DBID serializer 166 */ getDBIDSerializerStatic()167 public static ByteBufferSerializer<DBID> getDBIDSerializerStatic() { 168 return DBIDFactory.FACTORY.getDBIDSerializerStatic(); 169 } 170 171 /** 172 * Generate a single DBID. 173 * 174 * @return A single DBID 175 */ generateSingleDBID()176 public static DBID generateSingleDBID() { 177 return DBIDFactory.FACTORY.generateSingleDBID(); 178 } 179 180 /** 181 * Return a single DBID for reuse. 182 * 183 * @param id DBID to deallocate 184 */ deallocateSingleDBID(DBID id)185 public static void deallocateSingleDBID(DBID id) { 186 DBIDFactory.FACTORY.deallocateSingleDBID(id); 187 } 188 189 /** 190 * Generate a static DBID range. 191 * 192 * @param size Requested size 193 * @return DBID range 194 */ generateStaticDBIDRange(int size)195 public static DBIDRange generateStaticDBIDRange(int size) { 196 return DBIDFactory.FACTORY.generateStaticDBIDRange(size); 197 } 198 199 /** 200 * Deallocate a static DBID range. 201 * 202 * @param range Range to deallocate 203 */ deallocateDBIDRange(DBIDRange range)204 public static void deallocateDBIDRange(DBIDRange range) { 205 DBIDFactory.FACTORY.deallocateDBIDRange(range); 206 } 207 208 /** 209 * Make a new DBID variable. 210 * 211 * @param val Initial value. 212 * @return Variable 213 */ newVar(DBIDRef val)214 public static DBIDVar newVar(DBIDRef val) { 215 return DBIDFactory.FACTORY.newVar(val); 216 } 217 218 /** 219 * Make a new DBID variable. 220 * 221 * @return Variable 222 */ newVar()223 public static DBIDVar newVar() { 224 return DBIDFactory.FACTORY.newVar(DBIDFactory.FACTORY.invalid()); 225 } 226 227 /** 228 * Make a new (modifiable) array of DBIDs. 229 * 230 * @return New array 231 */ newArray()232 public static ArrayModifiableDBIDs newArray() { 233 return DBIDFactory.FACTORY.newArray(); 234 } 235 236 /** 237 * Make a new (modifiable) hash set of DBIDs. 238 * 239 * @return New hash set 240 */ newHashSet()241 public static HashSetModifiableDBIDs newHashSet() { 242 return DBIDFactory.FACTORY.newHashSet(); 243 } 244 245 /** 246 * Make a new (modifiable) array of DBIDs. 247 * 248 * @param size Size hint 249 * @return New array 250 */ newArray(int size)251 public static ArrayModifiableDBIDs newArray(int size) { 252 return DBIDFactory.FACTORY.newArray(size); 253 } 254 255 /** 256 * Make a new (modifiable) hash set of DBIDs. 257 * 258 * @param size Size hint 259 * @return New hash set 260 */ newHashSet(int size)261 public static HashSetModifiableDBIDs newHashSet(int size) { 262 return DBIDFactory.FACTORY.newHashSet(size); 263 } 264 265 /** 266 * Make a new (modifiable) array of DBIDs. 267 * 268 * @param existing Existing DBIDs 269 * @return New array 270 */ newArray(DBIDs existing)271 public static ArrayModifiableDBIDs newArray(DBIDs existing) { 272 return DBIDFactory.FACTORY.newArray(existing); 273 } 274 275 /** 276 * Make a new (modifiable) hash set of DBIDs. 277 * 278 * @param existing Existing DBIDs 279 * @return New hash set 280 */ newHashSet(DBIDs existing)281 public static HashSetModifiableDBIDs newHashSet(DBIDs existing) { 282 return DBIDFactory.FACTORY.newHashSet(existing); 283 } 284 285 /** 286 * Compute the set intersection of two sets. 287 * 288 * @param first First set 289 * @param second Second set 290 * @return intersection 291 */ intersection(DBIDs first, DBIDs second)292 public static ModifiableDBIDs intersection(DBIDs first, DBIDs second) { 293 // If exactly one is a Set, use it as second parameter. 294 if(second instanceof SetDBIDs) { 295 if(!(first instanceof SetDBIDs)) { 296 return internalIntersection(first, second); 297 } 298 } 299 else if(first instanceof SetDBIDs) { 300 return internalIntersection(second, first); 301 } 302 // Both are the same type: both set or both non set. 303 // Smaller goes first. 304 return first.size() <= second.size() ? internalIntersection(first, second) : internalIntersection(second, first); 305 } 306 307 /** 308 * Compute the set intersection of two sets. 309 * 310 * @param first First set 311 * @param second Second set 312 * @return result. 313 */ internalIntersection(DBIDs first, DBIDs second)314 private static ModifiableDBIDs internalIntersection(DBIDs first, DBIDs second) { 315 second = second.size() > 16 && !(second instanceof SetDBIDs) ? newHashSet(second) : second; 316 ModifiableDBIDs inter = newHashSet(first.size()); 317 for(DBIDIter it = first.iter(); it.valid(); it.advance()) { 318 if(second.contains(it)) { 319 inter.add(it); 320 } 321 } 322 return inter; 323 } 324 325 /** 326 * Compute the set intersection size of two sets. 327 * 328 * @param first First set 329 * @param second Second set 330 * @return size 331 */ intersectionSize(DBIDs first, DBIDs second)332 public static int intersectionSize(DBIDs first, DBIDs second) { 333 // If exactly one is a Set, use it as second parameter. 334 if(second instanceof SetDBIDs) { 335 if(!(first instanceof SetDBIDs)) { 336 return internalIntersectionSize(first, second); 337 } 338 } 339 else if(first instanceof SetDBIDs) { 340 return internalIntersectionSize(second, first); 341 } 342 // Both are the same type: both set or both non set. 343 // Smaller goes first. 344 return first.size() <= second.size() ? internalIntersectionSize(first, second) : internalIntersectionSize(second, first); 345 } 346 347 /** 348 * Compute the set intersection size of two sets. 349 * 350 * @param first First set 351 * @param second Second set 352 * @return size 353 */ internalIntersectionSize(DBIDs first, DBIDs second)354 private static int internalIntersectionSize(DBIDs first, DBIDs second) { 355 second = second.size() > 16 && !(second instanceof SetDBIDs) ? newHashSet(second) : second; 356 int c = 0; 357 for(DBIDIter it = first.iter(); it.valid(); it.advance()) { 358 if(second.contains(it)) { 359 c++; 360 } 361 } 362 return c; 363 } 364 365 /** 366 * Compute the set symmetric intersection of two sets. 367 * 368 * @param first First set 369 * @param second Second set 370 * @param firstonly OUTPUT: elements only in first. MUST BE EMPTY 371 * @param intersection OUTPUT: elements in intersection. MUST BE EMPTY 372 * @param secondonly OUTPUT: elements only in second. MUST BE EMPTY 373 */ 374 // TODO: optimize? symmetricIntersection(DBIDs first, DBIDs second, HashSetModifiableDBIDs firstonly, HashSetModifiableDBIDs intersection, HashSetModifiableDBIDs secondonly)375 public static void symmetricIntersection(DBIDs first, DBIDs second, HashSetModifiableDBIDs firstonly, HashSetModifiableDBIDs intersection, HashSetModifiableDBIDs secondonly) { 376 if(first.size() > second.size()) { 377 symmetricIntersection(second, first, secondonly, intersection, firstonly); 378 return; 379 } 380 assert (firstonly.size() == 0) : "OUTPUT set should be empty!"; 381 assert (intersection.size() == 0) : "OUTPUT set should be empty!"; 382 assert (secondonly.size() == 0) : "OUTPUT set should be empty!"; 383 // Initialize with second 384 secondonly.addDBIDs(second); 385 for(DBIDIter it = first.iter(); it.valid(); it.advance()) { 386 // Try to remove 387 (secondonly.remove(it) ? intersection : firstonly).add(it); 388 } 389 } 390 391 /** 392 * Returns the union of the two specified collection of IDs. 393 * 394 * @param ids1 the first collection 395 * @param ids2 the second collection 396 * @return the union of ids1 and ids2 without duplicates 397 */ union(DBIDs ids1, DBIDs ids2)398 public static ModifiableDBIDs union(DBIDs ids1, DBIDs ids2) { 399 ModifiableDBIDs result = DBIDUtil.newHashSet(Math.max(ids1.size(), ids2.size())); 400 result.addDBIDs(ids1); 401 result.addDBIDs(ids2); 402 return result; 403 } 404 405 /** 406 * Returns the difference of the two specified collection of IDs. 407 * 408 * @param ids1 the first collection 409 * @param ids2 the second collection 410 * @return the difference of ids1 minus ids2 411 */ difference(DBIDs ids1, DBIDs ids2)412 public static ModifiableDBIDs difference(DBIDs ids1, DBIDs ids2) { 413 ModifiableDBIDs result = DBIDUtil.newHashSet(ids1); 414 result.removeDBIDs(ids2); 415 return result; 416 } 417 418 /** 419 * Wrap an existing DBIDs collection to be unmodifiable. 420 * 421 * @param existing Existing collection 422 * @return Unmodifiable collection 423 */ makeUnmodifiable(DBIDs existing)424 public static StaticDBIDs makeUnmodifiable(DBIDs existing) { 425 return DBIDFactory.FACTORY.makeUnmodifiable(existing); 426 } 427 428 /** 429 * Ensure that the given DBIDs are array-indexable. 430 * 431 * @param ids IDs 432 * @return Array DBIDs. 433 */ ensureArray(DBIDs ids)434 public static ArrayDBIDs ensureArray(DBIDs ids) { 435 return ids instanceof ArrayDBIDs ? (ArrayDBIDs) ids : newArray(ids); 436 } 437 438 /** 439 * Ensure that the given DBIDs support fast "contains" operations. 440 * 441 * @param ids IDs 442 * @return Set DBIDs. 443 */ ensureSet(DBIDs ids)444 public static SetDBIDs ensureSet(DBIDs ids) { 445 return ids instanceof SetDBIDs ? (SetDBIDs) ids : newHashSet(ids); 446 } 447 448 /** 449 * Ensure modifiable. 450 * 451 * @param ids IDs 452 * @return Modifiable DBIDs. 453 */ ensureModifiable(DBIDs ids)454 public static ModifiableDBIDs ensureModifiable(DBIDs ids) { 455 return ids instanceof ModifiableDBIDs ? (ModifiableDBIDs) ids : // 456 ids instanceof HashSetDBIDs ? newHashSet(ids) : newArray(ids); 457 } 458 459 /** 460 * Make a DBID pair. 461 * 462 * @param id1 first ID 463 * @param id2 second ID 464 * 465 * @return DBID pair 466 */ newPair(DBIDRef id1, DBIDRef id2)467 public static DBIDPair newPair(DBIDRef id1, DBIDRef id2) { 468 return DBIDFactory.FACTORY.newPair(id1, id2); 469 } 470 471 /** 472 * Make a DoubleDBIDPair. 473 * 474 * @param val double value 475 * @param id ID 476 * @return new pair 477 */ newPair(double val, DBIDRef id)478 public static DoubleDBIDPair newPair(double val, DBIDRef id) { 479 return DBIDFactory.FACTORY.newPair(val, id); 480 } 481 482 /** 483 * Create an appropriate heap for the distance type. 484 * 485 * This will use a double heap if appropriate. 486 * 487 * @param k K value 488 * @return New heap of size k, appropriate for this distance type. 489 */ newHeap(int k)490 public static KNNHeap newHeap(int k) { 491 return DBIDFactory.FACTORY.newHeap(k); 492 } 493 494 /** 495 * Build a new heap from a given list. 496 * 497 * @param exist Existing result 498 * @return New heap 499 */ newHeap(KNNList exist)500 public static KNNHeap newHeap(KNNList exist) { 501 return DBIDFactory.FACTORY.newHeap(exist); 502 } 503 504 /** 505 * Produce a random shuffling of the given DBID array. 506 * 507 * @param ids Original DBIDs, no duplicates allowed 508 * @param rnd Random generator 509 */ randomShuffle(ArrayModifiableDBIDs ids, RandomFactory rnd)510 public static void randomShuffle(ArrayModifiableDBIDs ids, RandomFactory rnd) { 511 randomShuffle(ids, rnd.getSingleThreadedRandom(), ids.size()); 512 } 513 514 /** 515 * Produce a random shuffling of the given DBID array. 516 * 517 * @param ids Original DBIDs, no duplicates allowed 518 * @param random Random generator 519 */ randomShuffle(ArrayModifiableDBIDs ids, Random random)520 public static void randomShuffle(ArrayModifiableDBIDs ids, Random random) { 521 randomShuffle(ids, random, ids.size()); 522 } 523 524 /** 525 * Produce a random shuffling of the given DBID array. 526 * 527 * Only the first {@code limit} elements will be fully randomized, but the 528 * remaining objects will also be changed. 529 * 530 * @param ids Original DBIDs, no duplicates allowed 531 * @param random Random generator 532 * @param limit Shuffling limit. 533 */ randomShuffle(ArrayModifiableDBIDs ids, Random random, final int limit)534 public static void randomShuffle(ArrayModifiableDBIDs ids, Random random, final int limit) { 535 final int end = ids.size(); 536 for(int i = 1; i < limit; i++) { 537 ids.swap(i - 1, i + random.nextInt(end - i)); 538 } 539 } 540 541 /** 542 * Produce a random sample of the given DBIDs. 543 * 544 * @param source Original DBIDs, no duplicates allowed 545 * @param k k Parameter 546 * @param seed Random generator seed 547 * @return new DBIDs 548 */ randomSample(DBIDs source, int k, int seed)549 public static ModifiableDBIDs randomSample(DBIDs source, int k, int seed) { 550 return randomSample(source, k, new Random(seed)); 551 } 552 553 /** 554 * Produce a random sample of the given DBIDs. 555 * 556 * @param source Original DBIDs, no duplicates allowed 557 * @param k k Parameter 558 * @param seed Random generator seed 559 * @return new DBIDs 560 */ randomSample(DBIDs source, int k, Long seed)561 public static ModifiableDBIDs randomSample(DBIDs source, int k, Long seed) { 562 return randomSample(source, k, seed != null ? new Random(seed.longValue()) : new Random()); 563 } 564 565 /** 566 * Produce a random sample of the given DBIDs. 567 * 568 * @param source Original DBIDs, no duplicates allowed 569 * @param k k Parameter 570 * @param rnd Random generator 571 * @return new DBIDs 572 */ randomSample(DBIDs source, int k, RandomFactory rnd)573 public static ModifiableDBIDs randomSample(DBIDs source, int k, RandomFactory rnd) { 574 return randomSample(source, k, rnd.getSingleThreadedRandom()); 575 } 576 577 /** 578 * Produce a random sample of the given DBIDs. 579 * 580 * @param source Original DBIDs, no duplicates allowed 581 * @param except Excluded object 582 * @param k k Parameter 583 * @param rnd Random generator 584 * @return new DBIDs 585 */ randomSampleExcept(DBIDs source, DBIDRef except, int k, RandomFactory rnd)586 public static ModifiableDBIDs randomSampleExcept(DBIDs source, DBIDRef except, int k, RandomFactory rnd) { 587 return randomSampleExcept(source, except, k, rnd.getSingleThreadedRandom()); 588 } 589 590 /** 591 * Produce a random sample of the given DBIDs. 592 * 593 * @param source Original DBIDs, no duplicates allowed 594 * @param k k Parameter 595 * @param random Random generator 596 * @return new DBIDs 597 */ randomSample(DBIDs source, int k, Random random)598 public static ModifiableDBIDs randomSample(DBIDs source, int k, Random random) { 599 if(k < 0 || k > source.size()) { 600 throw new IllegalArgumentException("Illegal value for size of random sample: " + k + " > " + source.size() + " or < 0"); 601 } 602 // Fast, and we're single-threaded here anyway. 603 random = (random != null) ? random : new FastNonThreadsafeRandom(); 604 605 // TODO: better balancing for different sizes 606 // Two methods: constructive vs. destructive 607 if(k < source.size() >> 2) { 608 ArrayDBIDs aids = DBIDUtil.ensureArray(source); 609 DBIDArrayIter iter = aids.iter(); 610 final int size = aids.size(); 611 HashSetModifiableDBIDs sample = DBIDUtil.newHashSet(k); 612 while(sample.size() < k) { 613 sample.add(iter.seek(random.nextInt(size))); 614 } 615 return sample; 616 } 617 else { 618 ArrayModifiableDBIDs sample = DBIDUtil.newArray(source); 619 randomShuffle(sample, random, k); 620 // Delete trailing elements 621 for(int i = sample.size() - 1; i >= k; i--) { 622 sample.remove(i); 623 } 624 return sample; 625 } 626 } 627 628 /** 629 * Produce a random sample of the given DBIDs. 630 * 631 * @param source Original DBIDs, no duplicates allowed 632 * @param except Excluded object 633 * @param k k Parameter 634 * @param random Random generator 635 * @return new DBIDs 636 */ randomSampleExcept(DBIDs source, DBIDRef except, int k, Random random)637 public static ModifiableDBIDs randomSampleExcept(DBIDs source, DBIDRef except, int k, Random random) { 638 if(k < 0 || k > source.size()) { 639 throw new IllegalArgumentException("Illegal value for size of random sample: " + k + " > " + source.size() + " or < 0"); 640 } 641 // Fast, and we're single-threaded here anyway. 642 random = (random != null) ? random : new FastNonThreadsafeRandom(); 643 644 // TODO: better balancing for different sizes 645 // Two methods: constructive vs. destructive 646 if(k < source.size() >> 2) { 647 ArrayDBIDs aids = DBIDUtil.ensureArray(source); 648 DBIDArrayIter iter = aids.iter(); 649 int size = aids.size(); 650 HashSetModifiableDBIDs sample = DBIDUtil.newHashSet(k); 651 while(sample.size() < k) { 652 if(!equal(iter.seek(random.nextInt(size)), except)) { 653 sample.add(iter); 654 } 655 } 656 return sample; 657 } 658 else { 659 ArrayModifiableDBIDs sample = DBIDUtil.newArray(source); 660 randomShuffle(sample, random, k); 661 // Avoid excluded object: 662 for(DBIDArrayIter iter = sample.iter(); iter.valid() && iter.getOffset() < k; iter.advance()) { 663 if(equal(iter, except)) { 664 sample.swap(iter.getOffset(), k); 665 break; // Assuming that except occurrs only once! 666 } 667 } 668 // Delete trailing elements 669 for(int i = sample.size() - 1; i >= k; i--) { 670 sample.remove(i); 671 } 672 return sample; 673 } 674 } 675 676 /** 677 * Produce a random sample of the given DBIDs. 678 * 679 * @param ids Original ids, no duplicates allowed 680 * @param rate Sampling rate 681 * @param random Random generator 682 * @return Sample 683 */ randomSample(DBIDs ids, double rate, RandomFactory random)684 public static DBIDs randomSample(DBIDs ids, double rate, RandomFactory random) { 685 return randomSample(ids, rate, random.getSingleThreadedRandom()); 686 } 687 688 /** 689 * Produce a random sample of the given DBIDs. 690 * <ul> 691 * <li>values less or equal 0 mean no sampling. 692 * <li>values larger than 0, but at most 1, are relative rates. 693 * <li>values larger than 1 are supposed to be integer counts. 694 * </ul> 695 * 696 * @param ids Original ids, no duplicates allowed 697 * @param rate Sampling rate 698 * @param random Random generator 699 * @return Sample 700 */ randomSample(DBIDs ids, double rate, Random random)701 public static DBIDs randomSample(DBIDs ids, double rate, Random random) { 702 return rate <= 0 ? ids : // Magic for "no sampling" 703 randomSample(ids, Math.min(ids.size(), // 704 (int) (rate <= 1 ? rate * ids.size() : rate)), random); 705 } 706 707 /** 708 * Draw a single random sample. 709 * 710 * @param ids IDs to draw from 711 * @param random Random value 712 * @return Random ID 713 */ randomSample(DBIDs ids, Random random)714 public static DBIDVar randomSample(DBIDs ids, Random random) { 715 return DBIDUtil.ensureArray(ids).assignVar(random.nextInt(ids.size()), DBIDUtil.newVar()); 716 } 717 718 /** 719 * Draw a single random sample. 720 * 721 * @param ids IDs to draw from 722 * @param random Random value 723 * @return Random ID 724 */ randomSample(DBIDs ids, RandomFactory random)725 public static DBIDVar randomSample(DBIDs ids, RandomFactory random) { 726 return randomSample(ids, random.getSingleThreadedRandom()); 727 } 728 729 /** 730 * Randomly split IDs into {@code p} partitions of almost-equal size. 731 * 732 * @param ids Original DBIDs 733 * @param p Desired number of partitions. 734 * @param rnd Random generator 735 */ randomSplit(DBIDs ids, int p, RandomFactory rnd)736 public static ArrayDBIDs[] randomSplit(DBIDs ids, int p, RandomFactory rnd) { 737 return randomSplit(ids, p, rnd.getSingleThreadedRandom()); 738 } 739 740 /** 741 * Randomly split IDs into {@code p} partitions of almost-equal size. 742 * 743 * @param oids Original DBIDs 744 * @param p Desired number of partitions. 745 * @param random Random generator 746 */ randomSplit(DBIDs oids, int p, Random random)747 public static ArrayDBIDs[] randomSplit(DBIDs oids, int p, Random random) { 748 // Fast, and we're single-threaded here anyway. 749 random = random != null ? random : new FastNonThreadsafeRandom(); 750 ArrayModifiableDBIDs ids = newArray(oids); 751 final int size = ids.size(); 752 ArrayDBIDs[] split = new ArrayDBIDs[p]; 753 // Shuffle 754 for(int i = 1; i < size; i++) { 755 ids.swap(i - 1, i + random.nextInt(size - i)); 756 } 757 final int minsize = size / p, // Floor. 758 extra = size % p; // Remainder 759 for(int beg = 0, part = 0; part < p; part++) { 760 // First partitions are smaller, last partitions are larger. 761 final int psize = minsize + ((part < extra) ? 1 : 0); 762 split[part] = ids.slice(beg, beg + psize); 763 beg += psize; 764 } 765 return split; 766 } 767 768 /** 769 * Create a modifiable list to store distance-DBID pairs. 770 * 771 * @param size Estimated upper list size 772 * @return Empty list 773 */ newDistanceDBIDList(int size)774 public static ModifiableDoubleDBIDList newDistanceDBIDList(int size) { 775 return DBIDFactory.FACTORY.newDistanceDBIDList(size); 776 } 777 778 /** 779 * Create a modifiable list to store distance-DBID pairs. 780 * 781 * @return Empty list 782 */ newDistanceDBIDList()783 public static ModifiableDoubleDBIDList newDistanceDBIDList() { 784 return DBIDFactory.FACTORY.newDistanceDBIDList(); 785 } 786 787 /** 788 * Assert that the presented ids constitute a continuous {@link DBIDRange}. 789 * 790 * @param ids ID range. 791 * @return DBID range. 792 * @throws AbortException 793 */ assertRange(DBIDs ids)794 public static DBIDRange assertRange(DBIDs ids) { 795 if(!(ids instanceof DBIDRange)) { 796 throw new AbortException("This class may currently only be used with static databases and DBID ranges."); 797 } 798 return (DBIDRange) ids; 799 } 800 } 801