1 /* 2 * This file is part of ELKI: 3 * Environment for Developing KDD-Applications Supported by Index-Structures 4 * 5 * Copyright (C) 2018 6 * ELKI Development Team 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Affero General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Affero General Public License for more details. 17 * 18 * You should have received a copy of the GNU Affero General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk; 22 23 import java.util.ArrayList; 24 import java.util.List; 25 26 import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; 27 import de.lmu.ifi.dbs.elki.data.spatial.SpatialSingleMeanComparator; 28 import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; 29 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; 30 import net.jafama.FastMath; 31 32 /** 33 * This is variation of the {@link SortTileRecursiveBulkSplit}, incorporating 34 * some ideas from {@link MaxExtensionBulkSplit}. Instead of iterating through 35 * the axes in order, it always chooses the axis with the largest extend. This 36 * may rarely lead to the data being split on the same axis twice, but most 37 * importantly it varies the splitting order compared to STR. 38 * 39 * {@link AdaptiveSortTileRecursiveBulkSplit} takes these ideas one step 40 * further, by also varying the fan-out degree. 41 * 42 * @author Erich Schubert 43 * @since 0.6.0 44 */ 45 public class MaxExtensionSortTileRecursiveBulkSplit extends AbstractBulkSplit { 46 /** 47 * Static instance. 48 */ 49 public static final MaxExtensionSortTileRecursiveBulkSplit STATIC = new MaxExtensionSortTileRecursiveBulkSplit(); 50 51 @Override partition(List<T> spatialObjects, int minEntries, int maxEntries)52 public <T extends SpatialComparable> List<List<T>> partition(List<T> spatialObjects, int minEntries, int maxEntries) { 53 final int dims = spatialObjects.get(0).getDimensionality(); 54 final int p = (int) FastMath.ceil(spatialObjects.size() / (double) maxEntries); 55 List<List<T>> ret = new ArrayList<>(p); 56 strPartition(spatialObjects, 0, spatialObjects.size(), 0, dims, maxEntries, new SpatialSingleMeanComparator(0), ret); 57 return ret; 58 } 59 60 /** 61 * Recursively partition. 62 * 63 * @param objs Object list 64 * @param start Subinterval start 65 * @param end Subinterval end 66 * @param depth Iteration depth (must be less than dimensionality!) 67 * @param dims Total number of dimensions 68 * @param maxEntries Maximum page size 69 * @param c Comparison helper 70 * @param ret Output list 71 * @param <T> data type 72 */ strPartition(List<T> objs, int start, int end, int depth, int dims, int maxEntries, SpatialSingleMeanComparator c, List<List<T>> ret)73 protected <T extends SpatialComparable> void strPartition(List<T> objs, int start, int end, int depth, int dims, int maxEntries, SpatialSingleMeanComparator c, List<List<T>> ret) { 74 final int p = (int) FastMath.ceil((end - start) / (double) maxEntries); 75 76 // Compute min and max: 77 double[] mm = new double[dims * 2]; 78 for (int d = 0; d < mm.length; d += 2) { 79 mm[d] = Double.POSITIVE_INFINITY; // min <- +inf 80 mm[d + 1] = Double.NEGATIVE_INFINITY; // max <- -inf 81 } 82 for (int i = start; i < end; i++) { 83 T o = objs.get(i); 84 for (int d1 = 0, d2 = 0; d2 < mm.length; d1++, d2 += 2) { 85 mm[d2] = Math.min(mm[d2], o.getMin(d1)); 86 mm[d2 + 1] = Math.max(mm[d2 + 1], o.getMax(d1)); 87 } 88 } 89 // Find maximum and compute extends 90 double maxex = 0.0; 91 int sdim = -1; 92 for (int d = 0; d < mm.length; d += 2) { 93 final double extend = mm[d + 1] - mm[d]; 94 if (extend > maxex) { 95 maxex = extend; 96 sdim = d >> 1; 97 } 98 } 99 // Chose the number of partitions: 100 final int s = (int) FastMath.ceil(FastMath.pow(p, 1.0 / (dims - depth))); 101 102 final double len = end - start; // double intentional! 103 for (int i = 0; i < s; i++) { 104 // We don't completely sort, but only ensure the quantile is invariant. 105 int s2 = start + (int) ((i * len) / s); 106 int e2 = start + (int) (((i + 1) * len) / s); 107 // LoggingUtil.warning("STR " + dim + " s2:" + s2 + " e2:" + e2); 108 if (e2 < end) { 109 c.setDimension(sdim); 110 QuickSelect.quickSelect(objs, c, s2, end, e2); 111 } 112 if (depth + 1 == dims) { 113 ret.add(objs.subList(s2, e2)); 114 } else { 115 // Descend 116 strPartition(objs, s2, e2, depth + 1, dims, maxEntries, c, ret); 117 } 118 } 119 } 120 121 /** 122 * Parameterization class. 123 * 124 * @author Erich Schubert 125 */ 126 public static class Parameterizer extends AbstractParameterizer { 127 @Override makeInstance()128 protected MaxExtensionSortTileRecursiveBulkSplit makeInstance() { 129 return STATIC; 130 } 131 } 132 } 133