1 /*
2  * This file is part of ELKI:
3  * Environment for Developing KDD-Applications Supported by Index-Structures
4  *
5  * Copyright (C) 2018
6  * ELKI Development Team
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Affero General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
20  */
21 package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk;
22 
23 import java.util.ArrayList;
24 import java.util.List;
25 
26 import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable;
27 import de.lmu.ifi.dbs.elki.data.spatial.SpatialSingleMeanComparator;
28 import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
29 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
30 import net.jafama.FastMath;
31 
32 /**
33  * This is variation of the {@link SortTileRecursiveBulkSplit}, incorporating
34  * some ideas from {@link MaxExtensionBulkSplit}. Instead of iterating through
35  * the axes in order, it always chooses the axis with the largest extend. This
36  * may rarely lead to the data being split on the same axis twice, but most
37  * importantly it varies the splitting order compared to STR.
38  *
39  * {@link AdaptiveSortTileRecursiveBulkSplit} takes these ideas one step
40  * further, by also varying the fan-out degree.
41  *
42  * @author Erich Schubert
43  * @since 0.6.0
44  */
45 public class MaxExtensionSortTileRecursiveBulkSplit extends AbstractBulkSplit {
46   /**
47    * Static instance.
48    */
49   public static final MaxExtensionSortTileRecursiveBulkSplit STATIC = new MaxExtensionSortTileRecursiveBulkSplit();
50 
51   @Override
partition(List<T> spatialObjects, int minEntries, int maxEntries)52   public <T extends SpatialComparable> List<List<T>> partition(List<T> spatialObjects, int minEntries, int maxEntries) {
53     final int dims = spatialObjects.get(0).getDimensionality();
54     final int p = (int) FastMath.ceil(spatialObjects.size() / (double) maxEntries);
55     List<List<T>> ret = new ArrayList<>(p);
56     strPartition(spatialObjects, 0, spatialObjects.size(), 0, dims, maxEntries, new SpatialSingleMeanComparator(0), ret);
57     return ret;
58   }
59 
60   /**
61    * Recursively partition.
62    *
63    * @param objs Object list
64    * @param start Subinterval start
65    * @param end Subinterval end
66    * @param depth Iteration depth (must be less than dimensionality!)
67    * @param dims Total number of dimensions
68    * @param maxEntries Maximum page size
69    * @param c Comparison helper
70    * @param ret Output list
71    * @param <T> data type
72    */
strPartition(List<T> objs, int start, int end, int depth, int dims, int maxEntries, SpatialSingleMeanComparator c, List<List<T>> ret)73   protected <T extends SpatialComparable> void strPartition(List<T> objs, int start, int end, int depth, int dims, int maxEntries, SpatialSingleMeanComparator c, List<List<T>> ret) {
74     final int p = (int) FastMath.ceil((end - start) / (double) maxEntries);
75 
76     // Compute min and max:
77     double[] mm = new double[dims * 2];
78     for (int d = 0; d < mm.length; d += 2) {
79       mm[d] = Double.POSITIVE_INFINITY; // min <- +inf
80       mm[d + 1] = Double.NEGATIVE_INFINITY; // max <- -inf
81     }
82     for (int i = start; i < end; i++) {
83       T o = objs.get(i);
84       for (int d1 = 0, d2 = 0; d2 < mm.length; d1++, d2 += 2) {
85         mm[d2] = Math.min(mm[d2], o.getMin(d1));
86         mm[d2 + 1] = Math.max(mm[d2 + 1], o.getMax(d1));
87       }
88     }
89     // Find maximum and compute extends
90     double maxex = 0.0;
91     int sdim = -1;
92     for (int d = 0; d < mm.length; d += 2) {
93       final double extend = mm[d + 1] - mm[d];
94       if (extend > maxex) {
95         maxex = extend;
96         sdim = d >> 1;
97       }
98     }
99     // Chose the number of partitions:
100     final int s = (int) FastMath.ceil(FastMath.pow(p, 1.0 / (dims - depth)));
101 
102     final double len = end - start; // double intentional!
103     for (int i = 0; i < s; i++) {
104       // We don't completely sort, but only ensure the quantile is invariant.
105       int s2 = start + (int) ((i * len) / s);
106       int e2 = start + (int) (((i + 1) * len) / s);
107       // LoggingUtil.warning("STR " + dim + " s2:" + s2 + " e2:" + e2);
108       if (e2 < end) {
109         c.setDimension(sdim);
110         QuickSelect.quickSelect(objs, c, s2, end, e2);
111       }
112       if (depth + 1 == dims) {
113         ret.add(objs.subList(s2, e2));
114       } else {
115         // Descend
116         strPartition(objs, s2, e2, depth + 1, dims, maxEntries, c, ret);
117       }
118     }
119   }
120 
121   /**
122    * Parameterization class.
123    *
124    * @author Erich Schubert
125    */
126   public static class Parameterizer extends AbstractParameterizer {
127     @Override
makeInstance()128     protected MaxExtensionSortTileRecursiveBulkSplit makeInstance() {
129       return STATIC;
130     }
131   }
132 }
133