1<?php
2/**
3 * Matomo - free/libre analytics platform
4 *
5 * @link https://matomo.org
6 * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
7 *
8 */
9
10namespace Piwik;
11
12use Exception;
13
14/**
15 * The ranking query class wraps an arbitrary SQL query with more SQL that limits
16 * the number of results while aggregating the rest in an a new "Others" row. It also
17 * allows for some more fancy things that can be configured via method calls of this
18 * class. The advanced use cases are explained in the doc comments of the methods.
19 *
20 * The general use case looks like this:
21 *
22 *     // limit to 500 rows + "Others"
23 *     $rankingQuery = new RankingQuery();
24 *     $rankingQuery->setLimit(500);
25 *
26 *     // idaction_url will be "Others" in the row that contains the aggregated rest
27 *     $rankingQuery->addLabelColumn('idaction_url');
28 *
29 *     // the actual query. it's important to sort it before the limit is applied
30 *     $sql = 'SELECT idaction_url, COUNT(*) AS nb_hits
31 *             FROM log_link_visit_action
32 *             GROUP BY idaction_url
33 *             ORDER BY nb_hits DESC';
34 *
35 *     // execute the query
36 *     $rankingQuery->execute($sql);
37 *
38 * For more examples, see RankingQueryTest.php
39 *
40 * @api
41 */
42class RankingQuery
43{
44    // a special label used to mark the 'Others' row in a ranking query result set. this is mapped to the
45    // datatable summary row during archiving.
46    const LABEL_SUMMARY_ROW = '__mtm_ranking_query_others__';
47
48    /**
49     * Contains the labels of the inner query.
50     * Format: "label" => true (to make sure labels don't appear twice)
51     * @var array
52     */
53    private $labelColumns = array();
54
55    /**
56     * The columns of the inner query that are not labels
57     * Format: "label" => "aggregation function" or false for no aggregation
58     * @var array
59     */
60    private $additionalColumns = array();
61
62    /**
63     * The limit for each group
64     * @var int
65     */
66    private $limit = 5;
67
68    /**
69     * The name of the columns that marks rows to be excluded from the limit
70     * @var string
71     */
72    private $columnToMarkExcludedRows = false;
73
74    /**
75     * The column that is used to partition the result
76     * @var bool|string
77     */
78    private $partitionColumn = false;
79
80    /**
81     * The possible values for the column $this->partitionColumn
82     * @var array
83     */
84    private $partitionColumnValues = array();
85
86    /**
87     * The value to use in the label of the 'Others' row.
88     * @var string
89     */
90    private $othersLabelValue = self::LABEL_SUMMARY_ROW;
91
92    /**
93     * Constructor.
94     *
95     * @param int|false $limit The result row limit. See {@link setLimit()}.
96     */
97    public function __construct($limit = false)
98    {
99        if ($limit !== false) {
100            $this->setLimit($limit);
101        }
102    }
103
104    /**
105     * Set the limit after which everything is grouped to "Others".
106     *
107     * @param int $limit
108     */
109    public function setLimit($limit)
110    {
111        $this->limit = $limit;
112    }
113
114    /**
115     * Set the value to use for the label in the 'Others' row.
116     *
117     * @param string $value
118     */
119    public function setOthersLabel($value)
120    {
121        $this->othersLabelValue = $value;
122    }
123
124    /**
125     * Add a label column.
126     * Labels are the columns that are replaced with "Others" after the limit.
127     *
128     * @param string|array $labelColumn
129     */
130    public function addLabelColumn($labelColumn)
131    {
132        if (is_array($labelColumn)) {
133            foreach ($labelColumn as $label) {
134                $this->addLabelColumn($label);
135            }
136            return;
137        }
138        $this->labelColumns[$labelColumn] = true;
139    }
140
141    /**
142     * @return array
143     */
144    public function getLabelColumns()
145    {
146        return $this->labelColumns;
147    }
148
149    /**
150     * Add a column that has be added to the outer queries.
151     *
152     * @param $column
153     * @param string|bool $aggregationFunction If set, this function is used to aggregate the values of "Others",
154     *                                         eg, `'min'`, `'max'` or `'sum'`.
155     */
156    public function addColumn($column, $aggregationFunction = false)
157    {
158        if (is_array($column)) {
159            foreach ($column as $c) {
160                $this->addColumn($c, $aggregationFunction);
161            }
162            return;
163        }
164        $this->additionalColumns[$column] = $aggregationFunction;
165    }
166
167    /**
168     * Sets a column that will be used to filter the result into two categories.
169     * Rows where this column has a value > 0 will be removed from the result and put
170     * into another array. Both the result and the array of excluded rows are returned
171     * by {@link execute()}.
172     *
173     * @param $column string Name of the column.
174     * @throws Exception if method is used more than once.
175     */
176    public function setColumnToMarkExcludedRows($column)
177    {
178        if ($this->columnToMarkExcludedRows !== false) {
179            throw new Exception("setColumnToMarkExcludedRows can only be used once");
180        }
181
182        $this->columnToMarkExcludedRows = $column;
183        $this->addColumn($this->columnToMarkExcludedRows);
184    }
185
186    /**
187     * This method can be used to partition the result based on the possible values of one
188     * table column. This means the query will split the result set into other sets of rows
189     * for each possible value you provide (where the rows of each set have a column value
190     * that equals a possible value). Each of these new sets of rows will be individually
191     * limited resulting in several limited result sets.
192     *
193     * For example, you can run a query aggregating some data on the log_action table and
194     * partition by log_action.type with the possible values of {@link Piwik\Tracker\Action::TYPE_PAGE_URL},
195     * {@link Piwik\Tracker\Action::TYPE_OUTLINK}, {@link Piwik\Tracker\Action::TYPE_DOWNLOAD}.
196     * The result will be three separate result sets that are aggregated the same ways, but for rows
197     * where `log_action.type = TYPE_OUTLINK`, for rows where `log_action.type = TYPE_ACTION_URL` and for
198     * rows `log_action.type = TYPE_DOWNLOAD`.
199     *
200     * @param $partitionColumn string The column name to partition by.
201     * @param $possibleValues Array of possible column values.
202     * @throws Exception if method is used more than once.
203     */
204    public function partitionResultIntoMultipleGroups($partitionColumn, $possibleValues)
205    {
206        if ($this->partitionColumn !== false) {
207            throw new Exception("partitionResultIntoMultipleGroups can only be used once");
208        }
209
210        $this->partitionColumn = $partitionColumn;
211        $this->partitionColumnValues = $possibleValues;
212        $this->addColumn($partitionColumn);
213    }
214
215    /**
216     * Executes the query.
217     * The object has to be configured first using the other methods.
218     *
219     * @param $innerQuery string  The "payload" query that does the actual data aggregation. The ordering
220     *                            has to be specified in this query. {@link RankingQuery} cannot apply ordering
221     *                            itself.
222     * @param $bind array         Bindings for the inner query.
223     * @param int $timeLimitInMs  Adds a MAX_EXECUTION_TIME query hint to the query if $timeLimitInMs > 0
224     * @return array              The format depends on which methods have been used
225     *                            to configure the ranking query.
226     */
227    public function execute($innerQuery, $bind = array(), $timeLimitInMs = 0)
228    {
229        $query = $this->generateRankingQuery($innerQuery);
230        $query = DbHelper::addMaxExecutionTimeHintToQuery($query, $timeLimitInMs);
231
232        $data  = Db::getReader()->fetchAll($query, $bind);
233
234        if ($this->columnToMarkExcludedRows !== false) {
235            // split the result into the regular result and the rows with special treatment
236            $excludedFromLimit = array();
237            $result = array();
238            foreach ($data as &$row) {
239                if ($row[$this->columnToMarkExcludedRows] != 0) {
240                    $excludedFromLimit[] = $row;
241                } else {
242                    $result[] = $row;
243                }
244            }
245            $data = array(
246                'result'            => &$result,
247                'excludedFromLimit' => &$excludedFromLimit
248            );
249        }
250
251        if ($this->partitionColumn !== false) {
252            if ($this->columnToMarkExcludedRows !== false) {
253                $data['result'] = $this->splitPartitions($data['result']);
254            } else {
255                $data = $this->splitPartitions($data);
256            }
257        }
258
259        return $data;
260    }
261
262    private function splitPartitions(&$data)
263    {
264        $result = array();
265        foreach ($data as &$row) {
266            $partition = $row[$this->partitionColumn];
267            if (!isset($result[$partition])) {
268                $result[$partition] = array();
269            }
270            $result[$partition][] = & $row;
271        }
272        return $result;
273    }
274
275    /**
276     * Generate the SQL code that does the magic.
277     * If you want to get the result, use execute() instead. If you want to run the query
278     * yourself, use this method.
279     *
280     * @param $innerQuery string  The "payload" query that does the actual data aggregation. The ordering
281     *                            has to be specified in this query. {@link RankingQuery} cannot apply ordering
282     *                            itself.
283     * @return string             The entire ranking query SQL.
284     */
285    public function generateRankingQuery($innerQuery)
286    {
287        // +1 to include "Others"
288        $limit = $this->limit + 1;
289        $counterExpression = $this->getCounterExpression($limit);
290
291        // generate select clauses for label columns
292        $labelColumnsString = '`' . implode('`, `', array_keys($this->labelColumns)) . '`';
293        $labelColumnsOthersSwitch = array();
294        foreach ($this->labelColumns as $column => $true) {
295            $labelColumnsOthersSwitch[] = "
296				CASE
297					WHEN counter = $limit THEN '" . $this->othersLabelValue . "'
298					ELSE `$column`
299				END AS `$column`
300			";
301        }
302        $labelColumnsOthersSwitch = implode(', ', $labelColumnsOthersSwitch);
303
304        // generate select clauses for additional columns
305        $additionalColumnsString = '';
306        $additionalColumnsAggregatedString = '';
307        foreach ($this->additionalColumns as $additionalColumn => $aggregation) {
308            $additionalColumnsString .= ', `' . $additionalColumn . '`';
309            if ($aggregation !== false) {
310                $additionalColumnsAggregatedString .= ', ' . $aggregation . '(`' . $additionalColumn . '`) AS `' . $additionalColumn . '`';
311            } else {
312                $additionalColumnsAggregatedString .= ', `' . $additionalColumn . '`';
313            }
314        }
315
316        // initialize the counters
317        if ($this->partitionColumn !== false) {
318            $initCounter = '';
319            foreach ($this->partitionColumnValues as $value) {
320                $initCounter .= '( SELECT @counter' . intval($value) . ':=0 ) initCounter' . intval($value) . ', ';
321            }
322        } else {
323            $initCounter = '( SELECT @counter:=0 ) initCounter,';
324        }
325
326        // add a counter to the query
327        // we rely on the sorting of the inner query
328        $withCounter = "
329			SELECT
330				$labelColumnsString,
331				$counterExpression AS counter
332				$additionalColumnsString
333			FROM
334				$initCounter
335				( $innerQuery ) actualQuery
336		";
337
338        // group by the counter - this groups "Others" because the counter stops at $limit
339        $groupBy = 'counter';
340        if ($this->partitionColumn !== false) {
341            $groupBy .= ', `' . $this->partitionColumn . '`';
342        }
343        $groupOthers = "
344			SELECT
345				$labelColumnsOthersSwitch
346				$additionalColumnsAggregatedString
347			FROM ( $withCounter ) AS withCounter
348			GROUP BY $groupBy
349		";
350        return $groupOthers;
351    }
352
353    private function getCounterExpression($limit)
354    {
355        $whens = array();
356
357        if ($this->columnToMarkExcludedRows !== false) {
358            // when a row has been specified that marks which records should be excluded
359            // from limiting, we don't give those rows the normal counter but -1 times the
360            // value they had before. this way, they have a separate number space (i.e. negative
361            // integers).
362            $whens[] = "WHEN {$this->columnToMarkExcludedRows} != 0 THEN -1 * {$this->columnToMarkExcludedRows}";
363        }
364
365        if ($this->partitionColumn !== false) {
366            // partition: one counter per possible value
367            foreach ($this->partitionColumnValues as $value) {
368                $isValue = '`' . $this->partitionColumn . '` = ' . intval($value);
369                $counter = '@counter' . intval($value);
370                $whens[] = "WHEN $isValue AND $counter = $limit THEN $limit";
371                $whens[] = "WHEN $isValue THEN $counter:=$counter+1";
372            }
373            $whens[] = "ELSE 0";
374        } else {
375            // no partitioning: add a single counter
376            $whens[] = "WHEN @counter = $limit THEN $limit";
377            $whens[] = "ELSE @counter:=@counter+1";
378        }
379
380        return "
381			CASE
382				" . implode("
383				", $whens) . "
384			END
385		";
386    }
387}
388