1<?php 2/** 3 * Matomo - free/libre analytics platform 4 * 5 * @link https://matomo.org 6 * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later 7 * 8 */ 9namespace Piwik\Archive; 10 11use Piwik\ArchiveProcessor\Rules; 12use Piwik\Config; 13use Piwik\Container\StaticContainer; 14use Piwik\DataAccess\ArchiveTableCreator; 15use Piwik\DataAccess\Model; 16use Piwik\Date; 17use Piwik\Piwik; 18use Psr\Log\LoggerInterface; 19use Psr\Log\LogLevel; 20 21/** 22 * Service that purges temporary, error-ed, invalid and custom range archives from archive tables. 23 * 24 * Temporary archives are purged if they were archived before a specific time. The time is dependent 25 * on whether browser triggered archiving is enabled or not. 26 * 27 * Error-ed archives are purged w/o constraint. 28 * 29 * Invalid archives are purged if a new, valid, archive exists w/ the same site, date, period combination. 30 * Archives are marked as invalid via Piwik\Archive\ArchiveInvalidator. 31 */ 32class ArchivePurger 33{ 34 /** 35 * @var Model 36 */ 37 private $model; 38 39 /** 40 * Date threshold for purging custom range archives. Archives that are older than this date 41 * are purged unconditionally from the requested archive table. 42 * 43 * @var Date 44 */ 45 private $purgeCustomRangesOlderThan; 46 47 /** 48 * Date to use for 'yesterday'. Exists so tests can override this value. 49 * 50 * @var Date 51 */ 52 private $yesterday; 53 54 /** 55 * Date to use for 'today'. Exists so tests can override this value. 56 * 57 * @var $today 58 */ 59 private $today; 60 61 /** 62 * Date to use for 'now'. Exists so tests can override this value. 63 * 64 * @var int 65 */ 66 private $now; 67 68 /** 69 * @var LoggerInterface 70 */ 71 private $logger; 72 73 public function __construct(Model $model = null, Date $purgeCustomRangesOlderThan = null, LoggerInterface $logger = null) 74 { 75 $this->model = $model ?: new Model(); 76 77 $this->purgeCustomRangesOlderThan = $purgeCustomRangesOlderThan ?: self::getDefaultCustomRangeToPurgeAgeThreshold(); 78 79 $this->yesterday = Date::factory('yesterday'); 80 $this->today = Date::factory('today'); 81 $this->now = time(); 82 $this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface'); 83 } 84 85 /** 86 * Purge all invalidate archives for whom there are newer, valid archives from the archive 87 * table that stores data for `$date`. 88 * 89 * @param Date $date The date identifying the archive table. 90 * @return int The total number of archive rows deleted (from both the blog & numeric tables). 91 */ 92 public function purgeInvalidatedArchivesFrom(Date $date) 93 { 94 $numericTable = ArchiveTableCreator::getNumericTable($date); 95 96 $archiveIds = $this->model->getInvalidatedArchiveIdsSafeToDelete($numericTable); 97 if (empty($archiveIds)) { 98 $this->logger->debug("No invalidated archives found in {table} with newer, valid archives.", array('table' => $numericTable)); 99 return 0; 100 } 101 102 $emptyIdArchives = $this->model->getPlaceholderArchiveIds($numericTable); 103 $archiveIds = array_merge($archiveIds, $emptyIdArchives); 104 105 $this->logger->info("Found {countArchiveIds} invalidated archives safe to delete in {table}.", array( 106 'table' => $numericTable, 'countArchiveIds' => count($archiveIds) 107 )); 108 109 $deletedRowCount = $this->deleteArchiveIds($date, $archiveIds); 110 111 $this->logger->debug("Deleted {count} rows in {table} and its associated blob table.", array( 112 'table' => $numericTable, 'count' => $deletedRowCount 113 )); 114 115 return $deletedRowCount; 116 } 117 118 /** 119 * Removes the outdated archives for the given month. 120 * (meaning they are marked with a done flag of ArchiveWriter::DONE_OK_TEMPORARY or ArchiveWriter::DONE_ERROR) 121 * 122 * @param Date $dateStart Only the month will be used 123 * @return int Returns the total number of rows deleted. 124 */ 125 public function purgeOutdatedArchives(Date $dateStart) 126 { 127 $purgeArchivesOlderThan = $this->getOldestTemporaryArchiveToKeepThreshold(); 128 $deletedRowCount = 0; 129 130 $idArchivesToDelete = $this->getOutdatedArchiveIds($dateStart, $purgeArchivesOlderThan); 131 if (!empty($idArchivesToDelete)) { 132 $deletedRowCount = $this->deleteArchiveIds($dateStart, $idArchivesToDelete); 133 134 $this->logger->info("Deleted {count} rows in archive tables (numeric + blob) for {date}.", array( 135 'count' => $deletedRowCount, 136 'date' => $dateStart 137 )); 138 } else { 139 $this->logger->debug("No outdated archives found in archive numeric table for {date}.", array('date' => $dateStart)); 140 } 141 142 $this->logger->debug("Purging temporary archives: done [ purged archives older than {date} in {yearMonth} ] [Deleted IDs count: {deletedIds}]", array( 143 'date' => $purgeArchivesOlderThan, 144 'yearMonth' => $dateStart->toString('Y-m'), 145 'deletedIds' => count($idArchivesToDelete), 146 )); 147 148 return $deletedRowCount; 149 } 150 151 public function purgeDeletedSiteArchives(Date $dateStart) 152 { 153 $archiveTable = ArchiveTableCreator::getNumericTable($dateStart); 154 $idArchivesToDelete = $this->model->getArchiveIdsForDeletedSites($archiveTable); 155 156 return $this->purge($idArchivesToDelete, $dateStart, 'deleted sites'); 157 } 158 159 /** 160 * @param Date $dateStart 161 * @param array $deletedSegments List of segments whose archives should be purged 162 * @return int 163 */ 164 public function purgeDeletedSegmentArchives(Date $dateStart, array $deletedSegments) 165 { 166 if (count($deletedSegments)) { 167 $idArchivesToDelete = $this->getDeletedSegmentArchiveIds($dateStart, $deletedSegments); 168 return $this->purge($idArchivesToDelete, $dateStart, 'deleted segments'); 169 } 170 } 171 172 /** 173 * Purge all numeric and blob archives with the given IDs from the database. 174 * @param array $idArchivesToDelete 175 * @param Date $dateStart 176 * @param string $reason 177 * @return int 178 */ 179 protected function purge(array $idArchivesToDelete, Date $dateStart, $reason) 180 { 181 $deletedRowCount = 0; 182 if (!empty($idArchivesToDelete)) { 183 $deletedRowCount = $this->deleteArchiveIds($dateStart, $idArchivesToDelete); 184 185 $this->logger->info( 186 "Deleted {count} rows in archive tables (numeric + blob) for {reason} for {date}.", 187 array( 188 'count' => $deletedRowCount, 189 'date' => $dateStart, 190 'reason' => $reason 191 ) 192 ); 193 194 $this->logger->debug("[Deleted IDs count: {deletedIds}]", array( 195 'deletedIds' => count($idArchivesToDelete), 196 )); 197 } else { 198 $this->logger->debug( 199 "No archives for {reason} found in archive numeric table for {date}.", 200 array('date' => $dateStart, 'reason' => $reason) 201 ); 202 } 203 204 return $deletedRowCount; 205 } 206 207 protected function getDeletedSegmentArchiveIds(Date $date, array $deletedSegments) 208 { 209 $archiveTable = ArchiveTableCreator::getNumericTable($date); 210 return $this->model->getArchiveIdsForSegments( 211 $archiveTable, $deletedSegments, $this->getOldestTemporaryArchiveToKeepThreshold() 212 ); 213 } 214 215 protected function getOutdatedArchiveIds(Date $date, $purgeArchivesOlderThan) 216 { 217 $archiveTable = ArchiveTableCreator::getNumericTable($date); 218 219 $result = $this->model->getTemporaryArchivesOlderThan($archiveTable, $purgeArchivesOlderThan); 220 221 $idArchivesToDelete = array(); 222 if (!empty($result)) { 223 foreach ($result as $row) { 224 $idArchivesToDelete[] = $row['idarchive']; 225 } 226 } 227 228 return $idArchivesToDelete; 229 } 230 231 /** 232 * Deleting "Custom Date Range" reports after 1 day, since they can be re-processed and would take up un-necessary space. 233 * 234 * @param $date Date 235 * @return int The total number of rows deleted from both the numeric & blob table. 236 */ 237 public function purgeArchivesWithPeriodRange(Date $date) 238 { 239 $numericTable = ArchiveTableCreator::getNumericTable($date); 240 $blobTable = ArchiveTableCreator::getBlobTable($date); 241 242 $deletedCount = $this->model->deleteArchivesWithPeriod( 243 $numericTable, $blobTable, Piwik::$idPeriods['range'], $this->purgeCustomRangesOlderThan); 244 245 $level = $deletedCount == 0 ? LogLevel::DEBUG : LogLevel::INFO; 246 $this->logger->log($level, "Purged {count} range archive rows from {numericTable} & {blobTable}.", array( 247 'count' => $deletedCount, 248 'numericTable' => $numericTable, 249 'blobTable' => $blobTable 250 )); 251 252 $this->logger->debug(" [ purged archives older than {threshold} ]", array('threshold' => $this->purgeCustomRangesOlderThan)); 253 254 return $deletedCount; 255 } 256 257 /** 258 * Deletes by batches Archive IDs in the specified month, 259 * 260 * @param Date $date 261 * @param $idArchivesToDelete 262 * @return int Number of rows deleted from both numeric + blob table. 263 */ 264 protected function deleteArchiveIds(Date $date, $idArchivesToDelete) 265 { 266 $batches = array_chunk($idArchivesToDelete, 1000); 267 $numericTable = ArchiveTableCreator::getNumericTable($date); 268 $blobTable = ArchiveTableCreator::getBlobTable($date); 269 270 $deletedCount = 0; 271 foreach ($batches as $idsToDelete) { 272 $deletedCount += $this->model->deleteArchiveIds($numericTable, $blobTable, $idsToDelete); 273 } 274 return $deletedCount; 275 } 276 277 /** 278 * Returns a timestamp indicating outdated archives older than this timestamp (processed before) can be purged. 279 * 280 * @return int|bool Outdated archives older than this timestamp should be purged 281 */ 282 protected function getOldestTemporaryArchiveToKeepThreshold() 283 { 284 $temporaryArchivingTimeout = Rules::getTodayArchiveTimeToLive(); 285 if (Rules::isBrowserTriggerEnabled()) { 286 // If Browser Archiving is enabled, it is likely there are many more temporary archives 287 // We delete more often which is safe, since reports are re-processed on demand 288 return Date::factory($this->now - 2 * $temporaryArchivingTimeout)->getDateTime(); 289 } 290 291 // If cron core:archive command is building the reports, we should keep all temporary reports from today 292 return $this->yesterday->getDateTime(); 293 } 294 295 private static function getDefaultCustomRangeToPurgeAgeThreshold() 296 { 297 $daysRangesValid = Config::getInstance()->General['purge_date_range_archives_after_X_days']; 298 return Date::factory('today')->subDay($daysRangesValid)->getDateTime(); 299 } 300 301 /** 302 * For tests. 303 * 304 * @param Date $yesterday 305 */ 306 public function setYesterdayDate(Date $yesterday) 307 { 308 $this->yesterday = $yesterday; 309 } 310 311 /** 312 * For tests. 313 * 314 * @param Date $today 315 */ 316 public function setTodayDate(Date $today) 317 { 318 $this->today = $today; 319 } 320 321 /** 322 * For tests. 323 * 324 * @param int $now 325 */ 326 public function setNow($now) 327 { 328 $this->now = $now; 329 } 330} 331