1<?php 2/** 3 * Matomo - free/libre analytics platform 4 * 5 * @link https://matomo.org 6 * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later 7 * 8 */ 9 10namespace Piwik; 11 12use Exception; 13 14/** 15 * The ranking query class wraps an arbitrary SQL query with more SQL that limits 16 * the number of results while aggregating the rest in an a new "Others" row. It also 17 * allows for some more fancy things that can be configured via method calls of this 18 * class. The advanced use cases are explained in the doc comments of the methods. 19 * 20 * The general use case looks like this: 21 * 22 * // limit to 500 rows + "Others" 23 * $rankingQuery = new RankingQuery(); 24 * $rankingQuery->setLimit(500); 25 * 26 * // idaction_url will be "Others" in the row that contains the aggregated rest 27 * $rankingQuery->addLabelColumn('idaction_url'); 28 * 29 * // the actual query. it's important to sort it before the limit is applied 30 * $sql = 'SELECT idaction_url, COUNT(*) AS nb_hits 31 * FROM log_link_visit_action 32 * GROUP BY idaction_url 33 * ORDER BY nb_hits DESC'; 34 * 35 * // execute the query 36 * $rankingQuery->execute($sql); 37 * 38 * For more examples, see RankingQueryTest.php 39 * 40 * @api 41 */ 42class RankingQuery 43{ 44 // a special label used to mark the 'Others' row in a ranking query result set. this is mapped to the 45 // datatable summary row during archiving. 46 const LABEL_SUMMARY_ROW = '__mtm_ranking_query_others__'; 47 48 /** 49 * Contains the labels of the inner query. 50 * Format: "label" => true (to make sure labels don't appear twice) 51 * @var array 52 */ 53 private $labelColumns = array(); 54 55 /** 56 * The columns of the inner query that are not labels 57 * Format: "label" => "aggregation function" or false for no aggregation 58 * @var array 59 */ 60 private $additionalColumns = array(); 61 62 /** 63 * The limit for each group 64 * @var int 65 */ 66 private $limit = 5; 67 68 /** 69 * The name of the columns that marks rows to be excluded from the limit 70 * @var string 71 */ 72 private $columnToMarkExcludedRows = false; 73 74 /** 75 * The column that is used to partition the result 76 * @var bool|string 77 */ 78 private $partitionColumn = false; 79 80 /** 81 * The possible values for the column $this->partitionColumn 82 * @var array 83 */ 84 private $partitionColumnValues = array(); 85 86 /** 87 * The value to use in the label of the 'Others' row. 88 * @var string 89 */ 90 private $othersLabelValue = self::LABEL_SUMMARY_ROW; 91 92 /** 93 * Constructor. 94 * 95 * @param int|false $limit The result row limit. See {@link setLimit()}. 96 */ 97 public function __construct($limit = false) 98 { 99 if ($limit !== false) { 100 $this->setLimit($limit); 101 } 102 } 103 104 /** 105 * Set the limit after which everything is grouped to "Others". 106 * 107 * @param int $limit 108 */ 109 public function setLimit($limit) 110 { 111 $this->limit = $limit; 112 } 113 114 /** 115 * Set the value to use for the label in the 'Others' row. 116 * 117 * @param string $value 118 */ 119 public function setOthersLabel($value) 120 { 121 $this->othersLabelValue = $value; 122 } 123 124 /** 125 * Add a label column. 126 * Labels are the columns that are replaced with "Others" after the limit. 127 * 128 * @param string|array $labelColumn 129 */ 130 public function addLabelColumn($labelColumn) 131 { 132 if (is_array($labelColumn)) { 133 foreach ($labelColumn as $label) { 134 $this->addLabelColumn($label); 135 } 136 return; 137 } 138 $this->labelColumns[$labelColumn] = true; 139 } 140 141 /** 142 * @return array 143 */ 144 public function getLabelColumns() 145 { 146 return $this->labelColumns; 147 } 148 149 /** 150 * Add a column that has be added to the outer queries. 151 * 152 * @param $column 153 * @param string|bool $aggregationFunction If set, this function is used to aggregate the values of "Others", 154 * eg, `'min'`, `'max'` or `'sum'`. 155 */ 156 public function addColumn($column, $aggregationFunction = false) 157 { 158 if (is_array($column)) { 159 foreach ($column as $c) { 160 $this->addColumn($c, $aggregationFunction); 161 } 162 return; 163 } 164 $this->additionalColumns[$column] = $aggregationFunction; 165 } 166 167 /** 168 * Sets a column that will be used to filter the result into two categories. 169 * Rows where this column has a value > 0 will be removed from the result and put 170 * into another array. Both the result and the array of excluded rows are returned 171 * by {@link execute()}. 172 * 173 * @param $column string Name of the column. 174 * @throws Exception if method is used more than once. 175 */ 176 public function setColumnToMarkExcludedRows($column) 177 { 178 if ($this->columnToMarkExcludedRows !== false) { 179 throw new Exception("setColumnToMarkExcludedRows can only be used once"); 180 } 181 182 $this->columnToMarkExcludedRows = $column; 183 $this->addColumn($this->columnToMarkExcludedRows); 184 } 185 186 /** 187 * This method can be used to partition the result based on the possible values of one 188 * table column. This means the query will split the result set into other sets of rows 189 * for each possible value you provide (where the rows of each set have a column value 190 * that equals a possible value). Each of these new sets of rows will be individually 191 * limited resulting in several limited result sets. 192 * 193 * For example, you can run a query aggregating some data on the log_action table and 194 * partition by log_action.type with the possible values of {@link Piwik\Tracker\Action::TYPE_PAGE_URL}, 195 * {@link Piwik\Tracker\Action::TYPE_OUTLINK}, {@link Piwik\Tracker\Action::TYPE_DOWNLOAD}. 196 * The result will be three separate result sets that are aggregated the same ways, but for rows 197 * where `log_action.type = TYPE_OUTLINK`, for rows where `log_action.type = TYPE_ACTION_URL` and for 198 * rows `log_action.type = TYPE_DOWNLOAD`. 199 * 200 * @param $partitionColumn string The column name to partition by. 201 * @param $possibleValues Array of possible column values. 202 * @throws Exception if method is used more than once. 203 */ 204 public function partitionResultIntoMultipleGroups($partitionColumn, $possibleValues) 205 { 206 if ($this->partitionColumn !== false) { 207 throw new Exception("partitionResultIntoMultipleGroups can only be used once"); 208 } 209 210 $this->partitionColumn = $partitionColumn; 211 $this->partitionColumnValues = $possibleValues; 212 $this->addColumn($partitionColumn); 213 } 214 215 /** 216 * Executes the query. 217 * The object has to be configured first using the other methods. 218 * 219 * @param $innerQuery string The "payload" query that does the actual data aggregation. The ordering 220 * has to be specified in this query. {@link RankingQuery} cannot apply ordering 221 * itself. 222 * @param $bind array Bindings for the inner query. 223 * @param int $timeLimitInMs Adds a MAX_EXECUTION_TIME query hint to the query if $timeLimitInMs > 0 224 * @return array The format depends on which methods have been used 225 * to configure the ranking query. 226 */ 227 public function execute($innerQuery, $bind = array(), $timeLimitInMs = 0) 228 { 229 $query = $this->generateRankingQuery($innerQuery); 230 $query = DbHelper::addMaxExecutionTimeHintToQuery($query, $timeLimitInMs); 231 232 $data = Db::getReader()->fetchAll($query, $bind); 233 234 if ($this->columnToMarkExcludedRows !== false) { 235 // split the result into the regular result and the rows with special treatment 236 $excludedFromLimit = array(); 237 $result = array(); 238 foreach ($data as &$row) { 239 if ($row[$this->columnToMarkExcludedRows] != 0) { 240 $excludedFromLimit[] = $row; 241 } else { 242 $result[] = $row; 243 } 244 } 245 $data = array( 246 'result' => &$result, 247 'excludedFromLimit' => &$excludedFromLimit 248 ); 249 } 250 251 if ($this->partitionColumn !== false) { 252 if ($this->columnToMarkExcludedRows !== false) { 253 $data['result'] = $this->splitPartitions($data['result']); 254 } else { 255 $data = $this->splitPartitions($data); 256 } 257 } 258 259 return $data; 260 } 261 262 private function splitPartitions(&$data) 263 { 264 $result = array(); 265 foreach ($data as &$row) { 266 $partition = $row[$this->partitionColumn]; 267 if (!isset($result[$partition])) { 268 $result[$partition] = array(); 269 } 270 $result[$partition][] = & $row; 271 } 272 return $result; 273 } 274 275 /** 276 * Generate the SQL code that does the magic. 277 * If you want to get the result, use execute() instead. If you want to run the query 278 * yourself, use this method. 279 * 280 * @param $innerQuery string The "payload" query that does the actual data aggregation. The ordering 281 * has to be specified in this query. {@link RankingQuery} cannot apply ordering 282 * itself. 283 * @return string The entire ranking query SQL. 284 */ 285 public function generateRankingQuery($innerQuery) 286 { 287 // +1 to include "Others" 288 $limit = $this->limit + 1; 289 $counterExpression = $this->getCounterExpression($limit); 290 291 // generate select clauses for label columns 292 $labelColumnsString = '`' . implode('`, `', array_keys($this->labelColumns)) . '`'; 293 $labelColumnsOthersSwitch = array(); 294 foreach ($this->labelColumns as $column => $true) { 295 $labelColumnsOthersSwitch[] = " 296 CASE 297 WHEN counter = $limit THEN '" . $this->othersLabelValue . "' 298 ELSE `$column` 299 END AS `$column` 300 "; 301 } 302 $labelColumnsOthersSwitch = implode(', ', $labelColumnsOthersSwitch); 303 304 // generate select clauses for additional columns 305 $additionalColumnsString = ''; 306 $additionalColumnsAggregatedString = ''; 307 foreach ($this->additionalColumns as $additionalColumn => $aggregation) { 308 $additionalColumnsString .= ', `' . $additionalColumn . '`'; 309 if ($aggregation !== false) { 310 $additionalColumnsAggregatedString .= ', ' . $aggregation . '(`' . $additionalColumn . '`) AS `' . $additionalColumn . '`'; 311 } else { 312 $additionalColumnsAggregatedString .= ', `' . $additionalColumn . '`'; 313 } 314 } 315 316 // initialize the counters 317 if ($this->partitionColumn !== false) { 318 $initCounter = ''; 319 foreach ($this->partitionColumnValues as $value) { 320 $initCounter .= '( SELECT @counter' . intval($value) . ':=0 ) initCounter' . intval($value) . ', '; 321 } 322 } else { 323 $initCounter = '( SELECT @counter:=0 ) initCounter,'; 324 } 325 326 // add a counter to the query 327 // we rely on the sorting of the inner query 328 $withCounter = " 329 SELECT 330 $labelColumnsString, 331 $counterExpression AS counter 332 $additionalColumnsString 333 FROM 334 $initCounter 335 ( $innerQuery ) actualQuery 336 "; 337 338 // group by the counter - this groups "Others" because the counter stops at $limit 339 $groupBy = 'counter'; 340 if ($this->partitionColumn !== false) { 341 $groupBy .= ', `' . $this->partitionColumn . '`'; 342 } 343 $groupOthers = " 344 SELECT 345 $labelColumnsOthersSwitch 346 $additionalColumnsAggregatedString 347 FROM ( $withCounter ) AS withCounter 348 GROUP BY $groupBy 349 "; 350 return $groupOthers; 351 } 352 353 private function getCounterExpression($limit) 354 { 355 $whens = array(); 356 357 if ($this->columnToMarkExcludedRows !== false) { 358 // when a row has been specified that marks which records should be excluded 359 // from limiting, we don't give those rows the normal counter but -1 times the 360 // value they had before. this way, they have a separate number space (i.e. negative 361 // integers). 362 $whens[] = "WHEN {$this->columnToMarkExcludedRows} != 0 THEN -1 * {$this->columnToMarkExcludedRows}"; 363 } 364 365 if ($this->partitionColumn !== false) { 366 // partition: one counter per possible value 367 foreach ($this->partitionColumnValues as $value) { 368 $isValue = '`' . $this->partitionColumn . '` = ' . intval($value); 369 $counter = '@counter' . intval($value); 370 $whens[] = "WHEN $isValue AND $counter = $limit THEN $limit"; 371 $whens[] = "WHEN $isValue THEN $counter:=$counter+1"; 372 } 373 $whens[] = "ELSE 0"; 374 } else { 375 // no partitioning: add a single counter 376 $whens[] = "WHEN @counter = $limit THEN $limit"; 377 $whens[] = "ELSE @counter:=@counter+1"; 378 } 379 380 return " 381 CASE 382 " . implode(" 383 ", $whens) . " 384 END 385 "; 386 } 387} 388