1<?php
2/**
3 * Send purge requests for listed pages to CDN
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Maintenance
22 */
23
24use MediaWiki\MediaWikiServices;
25
26require_once __DIR__ . '/Maintenance.php';
27
28/**
29 * Maintenance script that sends purge requests for listed pages to CDN.
30 *
31 * @ingroup Maintenance
32 */
33class PurgeList extends Maintenance {
34	/** @var string|null */
35	private $namespaceId;
36	/** @var bool */
37	private $allNamespaces;
38	/** @var bool */
39	private $doDbTouch;
40	/** @var float */
41	private $delay;
42
43	public function __construct() {
44		parent::__construct();
45		$this->addDescription( "Send purge requests for listed pages to CDN.\n"
46			. "By default this expects a list of URLs or page names from STDIN. "
47			. "To query the database for input, use --namespace or --all-namespaces instead."
48		);
49		$this->addOption( 'namespace', 'Purge pages with this namespace number', false, true );
50		$this->addOption( 'all-namespaces', 'Purge pages in all namespaces', false, false );
51		$this->addOption( 'db-touch', 'Update the page.page_touched database field', false, false );
52		$this->addOption( 'delay', 'Number of seconds to delay between each purge', false, true );
53		$this->addOption( 'verbose', 'Show more output', false, false, 'v' );
54		$this->setBatchSize( 100 );
55	}
56
57	public function execute() {
58		$this->namespaceId = $this->getOption( 'namespace' );
59		$this->allNamespaces = $this->hasOption( 'all-namespaces' );
60		$this->doDbTouch = $this->hasOption( 'db-touch' );
61		$this->delay = floatval( $this->getOption( 'delay', '0' ) );
62
63		$conf = $this->getConfig();
64		if ( ( $this->namespaceId !== null || $this->allNamespaces )
65			&& $this->doDbTouch
66			&& $conf->get( 'MiserMode' )
67		) {
68			$this->fatalError( 'Prevented mass db-invalidation (MiserMode is enabled).' );
69		}
70
71		if ( $this->allNamespaces ) {
72			$this->purgeNamespace( false );
73		} elseif ( $this->namespaceId !== null ) {
74			$this->purgeNamespace( intval( $this->namespaceId ) );
75		} else {
76			$this->doPurge();
77		}
78		$this->output( "Done!\n" );
79	}
80
81	/**
82	 * Purge URL coming from stdin
83	 */
84	private function doPurge() {
85		$stdin = $this->getStdin();
86		$urls = [];
87		$htmlCacheUpdater = MediaWikiServices::getInstance()->getHtmlCacheUpdater();
88
89		while ( !feof( $stdin ) ) {
90			$page = trim( fgets( $stdin ) );
91			if ( preg_match( '%^https?://%', $page ) ) {
92				$urls[] = $page;
93			} elseif ( $page !== '' ) {
94				$title = Title::newFromText( $page );
95				if ( $title ) {
96					$newUrls = $htmlCacheUpdater->getUrls( $title );
97
98					foreach ( $newUrls as $url ) {
99						$this->output( "$url\n" );
100					}
101
102					$urls = array_merge( $urls, $newUrls );
103
104					if ( $this->doDbTouch ) {
105						$title->invalidateCache();
106					}
107				} else {
108					$this->output( "(Invalid title '$page')\n" );
109				}
110			}
111		}
112		$this->output( "Purging " . count( $urls ) . " urls\n" );
113		$this->sendPurgeRequest( $urls );
114	}
115
116	/**
117	 * Purge a namespace or all pages
118	 *
119	 * @param int|bool $namespace
120	 */
121	private function purgeNamespace( $namespace = false ) {
122		$dbr = $this->getDB( DB_REPLICA );
123		$htmlCacheUpdater = MediaWikiServices::getInstance()->getHtmlCacheUpdater();
124		$startId = 0;
125		if ( $namespace === false ) {
126			$conds = [];
127		} else {
128			$conds = [ 'page_namespace' => $namespace ];
129		}
130		while ( true ) {
131			$res = $dbr->select( 'page',
132				[ 'page_id', 'page_namespace', 'page_title' ],
133				$conds + [ 'page_id > ' . $dbr->addQuotes( $startId ) ],
134				__METHOD__,
135				[
136					'LIMIT' => $this->getBatchSize(),
137					'ORDER BY' => 'page_id'
138
139				]
140			);
141			if ( !$res->numRows() ) {
142				break;
143			}
144			$urls = [];
145			foreach ( $res as $row ) {
146				$title = Title::makeTitle( $row->page_namespace, $row->page_title );
147				$urls = array_merge( $urls, $htmlCacheUpdater->getUrls( $title ) );
148				$startId = $row->page_id;
149			}
150			$this->sendPurgeRequest( $urls );
151		}
152	}
153
154	/**
155	 * Helper to purge an array of $urls
156	 * @param array $urls List of URLS to purge from CDNs
157	 */
158	private function sendPurgeRequest( $urls ) {
159		$hcu = MediaWikiServices::getInstance()->getHtmlCacheUpdater();
160		if ( $this->delay > 0 ) {
161			foreach ( $urls as $url ) {
162				if ( $this->hasOption( 'verbose' ) ) {
163					$this->output( $url . "\n" );
164				}
165				$hcu->purgeUrls( $url, $hcu::PURGE_NAIVE );
166				usleep( $this->delay * 1e6 );
167			}
168		} else {
169			if ( $this->hasOption( 'verbose' ) ) {
170				$this->output( implode( "\n", $urls ) . "\n" );
171			}
172			$hcu->purgeUrls( $urls, $hcu::PURGE_NAIVE );
173		}
174	}
175}
176
177$maintClass = PurgeList::class;
178require_once RUN_MAINTENANCE_IF_MAIN;
179