1<?php
2/**
3 * Matomo - free/libre analytics platform
4 *
5 * @link https://matomo.org
6 * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
7 *
8 */
9namespace Piwik\Archive;
10
11use Piwik\ArchiveProcessor\Rules;
12use Piwik\Config;
13use Piwik\Container\StaticContainer;
14use Piwik\DataAccess\ArchiveTableCreator;
15use Piwik\DataAccess\Model;
16use Piwik\Date;
17use Piwik\Piwik;
18use Psr\Log\LoggerInterface;
19use Psr\Log\LogLevel;
20
21/**
22 * Service that purges temporary, error-ed, invalid and custom range archives from archive tables.
23 *
24 * Temporary archives are purged if they were archived before a specific time. The time is dependent
25 * on whether browser triggered archiving is enabled or not.
26 *
27 * Error-ed archives are purged w/o constraint.
28 *
29 * Invalid archives are purged if a new, valid, archive exists w/ the same site, date, period combination.
30 * Archives are marked as invalid via Piwik\Archive\ArchiveInvalidator.
31 */
32class ArchivePurger
33{
34    /**
35     * @var Model
36     */
37    private $model;
38
39    /**
40     * Date threshold for purging custom range archives. Archives that are older than this date
41     * are purged unconditionally from the requested archive table.
42     *
43     * @var Date
44     */
45    private $purgeCustomRangesOlderThan;
46
47    /**
48     * Date to use for 'yesterday'. Exists so tests can override this value.
49     *
50     * @var Date
51     */
52    private $yesterday;
53
54    /**
55     * Date to use for 'today'. Exists so tests can override this value.
56     *
57     * @var $today
58     */
59    private $today;
60
61    /**
62     * Date to use for 'now'. Exists so tests can override this value.
63     *
64     * @var int
65     */
66    private $now;
67
68    /**
69     * @var LoggerInterface
70     */
71    private $logger;
72
73    public function __construct(Model $model = null, Date $purgeCustomRangesOlderThan = null, LoggerInterface $logger = null)
74    {
75        $this->model = $model ?: new Model();
76
77        $this->purgeCustomRangesOlderThan = $purgeCustomRangesOlderThan ?: self::getDefaultCustomRangeToPurgeAgeThreshold();
78
79        $this->yesterday = Date::factory('yesterday');
80        $this->today = Date::factory('today');
81        $this->now = time();
82        $this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface');
83    }
84
85    /**
86     * Purge all invalidate archives for whom there are newer, valid archives from the archive
87     * table that stores data for `$date`.
88     *
89     * @param Date $date The date identifying the archive table.
90     * @return int The total number of archive rows deleted (from both the blog & numeric tables).
91     */
92    public function purgeInvalidatedArchivesFrom(Date $date)
93    {
94        $numericTable = ArchiveTableCreator::getNumericTable($date);
95
96        $archiveIds = $this->model->getInvalidatedArchiveIdsSafeToDelete($numericTable);
97        if (empty($archiveIds)) {
98            $this->logger->debug("No invalidated archives found in {table} with newer, valid archives.", array('table' => $numericTable));
99            return 0;
100        }
101
102        $emptyIdArchives = $this->model->getPlaceholderArchiveIds($numericTable);
103        $archiveIds = array_merge($archiveIds, $emptyIdArchives);
104
105        $this->logger->info("Found {countArchiveIds} invalidated archives safe to delete in {table}.", array(
106            'table' => $numericTable, 'countArchiveIds' => count($archiveIds)
107        ));
108
109        $deletedRowCount = $this->deleteArchiveIds($date, $archiveIds);
110
111        $this->logger->debug("Deleted {count} rows in {table} and its associated blob table.", array(
112            'table' => $numericTable, 'count' => $deletedRowCount
113        ));
114
115        return $deletedRowCount;
116    }
117
118    /**
119     * Removes the outdated archives for the given month.
120     * (meaning they are marked with a done flag of ArchiveWriter::DONE_OK_TEMPORARY or ArchiveWriter::DONE_ERROR)
121     *
122     * @param Date $dateStart Only the month will be used
123     * @return int Returns the total number of rows deleted.
124     */
125    public function purgeOutdatedArchives(Date $dateStart)
126    {
127        $purgeArchivesOlderThan = $this->getOldestTemporaryArchiveToKeepThreshold();
128        $deletedRowCount = 0;
129
130        $idArchivesToDelete = $this->getOutdatedArchiveIds($dateStart, $purgeArchivesOlderThan);
131        if (!empty($idArchivesToDelete)) {
132            $deletedRowCount = $this->deleteArchiveIds($dateStart, $idArchivesToDelete);
133
134            $this->logger->info("Deleted {count} rows in archive tables (numeric + blob) for {date}.", array(
135                'count' => $deletedRowCount,
136                'date' => $dateStart
137            ));
138        } else {
139            $this->logger->debug("No outdated archives found in archive numeric table for {date}.", array('date' => $dateStart));
140        }
141
142        $this->logger->debug("Purging temporary archives: done [ purged archives older than {date} in {yearMonth} ] [Deleted IDs count: {deletedIds}]", array(
143            'date' => $purgeArchivesOlderThan,
144            'yearMonth' => $dateStart->toString('Y-m'),
145            'deletedIds' => count($idArchivesToDelete),
146        ));
147
148        return $deletedRowCount;
149    }
150
151    public function purgeDeletedSiteArchives(Date $dateStart)
152    {
153        $archiveTable = ArchiveTableCreator::getNumericTable($dateStart);
154        $idArchivesToDelete = $this->model->getArchiveIdsForDeletedSites($archiveTable);
155
156        return $this->purge($idArchivesToDelete, $dateStart, 'deleted sites');
157    }
158
159    /**
160     * @param Date $dateStart
161     * @param array $deletedSegments List of segments whose archives should be purged
162     * @return int
163     */
164    public function purgeDeletedSegmentArchives(Date $dateStart, array $deletedSegments)
165    {
166        if (count($deletedSegments)) {
167            $idArchivesToDelete = $this->getDeletedSegmentArchiveIds($dateStart, $deletedSegments);
168            return $this->purge($idArchivesToDelete, $dateStart, 'deleted segments');
169        }
170    }
171
172    /**
173     * Purge all numeric and blob archives with the given IDs from the database.
174     * @param array $idArchivesToDelete
175     * @param Date $dateStart
176     * @param string $reason
177     * @return int
178     */
179    protected function purge(array $idArchivesToDelete, Date $dateStart, $reason)
180    {
181        $deletedRowCount = 0;
182        if (!empty($idArchivesToDelete)) {
183            $deletedRowCount = $this->deleteArchiveIds($dateStart, $idArchivesToDelete);
184
185            $this->logger->info(
186                "Deleted {count} rows in archive tables (numeric + blob) for {reason} for {date}.",
187                array(
188                    'count' => $deletedRowCount,
189                    'date' => $dateStart,
190                    'reason' => $reason
191                )
192            );
193
194            $this->logger->debug("[Deleted IDs count: {deletedIds}]", array(
195                'deletedIds' => count($idArchivesToDelete),
196            ));
197        } else {
198            $this->logger->debug(
199                "No archives for {reason} found in archive numeric table for {date}.",
200                array('date' => $dateStart, 'reason' => $reason)
201            );
202        }
203
204        return $deletedRowCount;
205    }
206
207    protected function getDeletedSegmentArchiveIds(Date $date, array $deletedSegments)
208    {
209        $archiveTable = ArchiveTableCreator::getNumericTable($date);
210        return $this->model->getArchiveIdsForSegments(
211            $archiveTable, $deletedSegments, $this->getOldestTemporaryArchiveToKeepThreshold()
212        );
213    }
214
215    protected function getOutdatedArchiveIds(Date $date, $purgeArchivesOlderThan)
216    {
217        $archiveTable = ArchiveTableCreator::getNumericTable($date);
218
219        $result = $this->model->getTemporaryArchivesOlderThan($archiveTable, $purgeArchivesOlderThan);
220
221        $idArchivesToDelete = array();
222        if (!empty($result)) {
223            foreach ($result as $row) {
224                $idArchivesToDelete[] = $row['idarchive'];
225            }
226        }
227
228        return $idArchivesToDelete;
229    }
230
231    /**
232     * Deleting "Custom Date Range" reports after 1 day, since they can be re-processed and would take up un-necessary space.
233     *
234     * @param $date Date
235     * @return int The total number of rows deleted from both the numeric & blob table.
236     */
237    public function purgeArchivesWithPeriodRange(Date $date)
238    {
239        $numericTable = ArchiveTableCreator::getNumericTable($date);
240        $blobTable    = ArchiveTableCreator::getBlobTable($date);
241
242        $deletedCount = $this->model->deleteArchivesWithPeriod(
243            $numericTable, $blobTable, Piwik::$idPeriods['range'], $this->purgeCustomRangesOlderThan);
244
245        $level = $deletedCount == 0 ? LogLevel::DEBUG : LogLevel::INFO;
246        $this->logger->log($level, "Purged {count} range archive rows from {numericTable} & {blobTable}.", array(
247            'count' => $deletedCount,
248            'numericTable' => $numericTable,
249            'blobTable' => $blobTable
250        ));
251
252        $this->logger->debug("  [ purged archives older than {threshold} ]", array('threshold' => $this->purgeCustomRangesOlderThan));
253
254        return $deletedCount;
255    }
256
257    /**
258     * Deletes by batches Archive IDs in the specified month,
259     *
260     * @param Date $date
261     * @param $idArchivesToDelete
262     * @return int Number of rows deleted from both numeric + blob table.
263     */
264    protected function deleteArchiveIds(Date $date, $idArchivesToDelete)
265    {
266        $batches      = array_chunk($idArchivesToDelete, 1000);
267        $numericTable = ArchiveTableCreator::getNumericTable($date);
268        $blobTable    = ArchiveTableCreator::getBlobTable($date);
269
270        $deletedCount = 0;
271        foreach ($batches as $idsToDelete) {
272            $deletedCount += $this->model->deleteArchiveIds($numericTable, $blobTable, $idsToDelete);
273        }
274        return $deletedCount;
275    }
276
277    /**
278     * Returns a timestamp indicating outdated archives older than this timestamp (processed before) can be purged.
279     *
280     * @return int|bool  Outdated archives older than this timestamp should be purged
281     */
282    protected function getOldestTemporaryArchiveToKeepThreshold()
283    {
284        $temporaryArchivingTimeout = Rules::getTodayArchiveTimeToLive();
285        if (Rules::isBrowserTriggerEnabled()) {
286            // If Browser Archiving is enabled, it is likely there are many more temporary archives
287            // We delete more often which is safe, since reports are re-processed on demand
288            return Date::factory($this->now - 2 * $temporaryArchivingTimeout)->getDateTime();
289        }
290
291        // If cron core:archive command is building the reports, we should keep all temporary reports from today
292        return $this->yesterday->getDateTime();
293    }
294
295    private static function getDefaultCustomRangeToPurgeAgeThreshold()
296    {
297        $daysRangesValid = Config::getInstance()->General['purge_date_range_archives_after_X_days'];
298        return Date::factory('today')->subDay($daysRangesValid)->getDateTime();
299    }
300
301    /**
302     * For tests.
303     *
304     * @param Date $yesterday
305     */
306    public function setYesterdayDate(Date $yesterday)
307    {
308        $this->yesterday = $yesterday;
309    }
310
311    /**
312     * For tests.
313     *
314     * @param Date $today
315     */
316    public function setTodayDate(Date $today)
317    {
318        $this->today = $today;
319    }
320
321    /**
322     * For tests.
323     *
324     * @param int $now
325     */
326    public function setNow($now)
327    {
328        $this->now = $now;
329    }
330}
331