1<?php
2/**
3 * Import pages from text files
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Maintenance
22 */
23
24use MediaWiki\MediaWikiServices;
25use MediaWiki\Revision\SlotRecord;
26
27require_once __DIR__ . '/Maintenance.php';
28
29/**
30 * Maintenance script which reads in text files
31 * and imports their content to a page of the wiki.
32 *
33 * @ingroup Maintenance
34 */
35class ImportTextFiles extends Maintenance {
36	public function __construct() {
37		parent::__construct();
38		$this->addDescription( 'Reads in text files and imports their content to pages of the wiki' );
39		$this->addOption( 'user', 'Username to which edits should be attributed. ' .
40			'Default: "Maintenance script"', false, true, 'u' );
41		$this->addOption( 'summary', 'Specify edit summary for the edits', false, true, 's' );
42		$this->addOption( 'use-timestamp', 'Use the modification date of the text file ' .
43			'as the timestamp for the edit' );
44		$this->addOption( 'overwrite', 'Overwrite existing pages. If --use-timestamp is passed, this ' .
45			'will only overwrite pages if the file has been modified since the page was last modified.' );
46		$this->addOption( 'prefix', 'A string to place in front of the file name', false, true, 'p' );
47		$this->addOption( 'bot', 'Mark edits as bot edits in the recent changes list.' );
48		$this->addOption( 'rc', 'Place revisions in RecentChanges.' );
49		$this->addArg( 'files', 'Files to import' );
50	}
51
52	public function execute() {
53		$userName = $this->getOption( 'user', false );
54		$summary = $this->getOption( 'summary', 'Imported from text file' );
55		$useTimestamp = $this->hasOption( 'use-timestamp' );
56		$rc = $this->hasOption( 'rc' );
57		$bot = $this->hasOption( 'bot' );
58		$overwrite = $this->hasOption( 'overwrite' );
59		$prefix = $this->getOption( 'prefix', '' );
60
61		// Get all the arguments. A loop is required since Maintenance doesn't
62		// support an arbitrary number of arguments.
63		$files = [];
64		$i = 0;
65		while ( $arg = $this->getArg( $i++ ) ) {
66			if ( file_exists( $arg ) ) {
67				$files[$arg] = file_get_contents( $arg );
68			} else {
69				// use glob to support the Windows shell, which doesn't automatically
70				// expand wildcards
71				$found = false;
72				foreach ( glob( $arg ) as $filename ) {
73					$found = true;
74					$files[$filename] = file_get_contents( $filename );
75				}
76				if ( !$found ) {
77					$this->fatalError( "Fatal error: The file '$arg' does not exist!" );
78				}
79			}
80		}
81
82		$count = count( $files );
83		$this->output( "Importing $count pages...\n" );
84
85		if ( $userName === false ) {
86			$user = User::newSystemUser( 'Maintenance script', [ 'steal' => true ] );
87		} else {
88			$user = User::newFromName( $userName );
89		}
90
91		if ( !$user ) {
92			$this->fatalError( "Invalid username\n" );
93		}
94		if ( $user->isAnon() ) {
95			$user->addToDatabase();
96		}
97
98		$exit = 0;
99
100		$successCount = 0;
101		$failCount = 0;
102		$skipCount = 0;
103
104		$revLookup = MediaWikiServices::getInstance()->getRevisionLookup();
105		foreach ( $files as $file => $text ) {
106			$pageName = $prefix . pathinfo( $file, PATHINFO_FILENAME );
107			$timestamp = $useTimestamp ? wfTimestamp( TS_UNIX, filemtime( $file ) ) : wfTimestampNow();
108
109			$title = Title::newFromText( $pageName );
110			// Have to check for # manually, since it gets interpreted as a fragment
111			if ( !$title || $title->hasFragment() ) {
112				$this->error( "Invalid title $pageName. Skipping.\n" );
113				$skipCount++;
114				continue;
115			}
116
117			$exists = $title->exists();
118			$oldRevID = $title->getLatestRevID();
119			$oldRevRecord = $oldRevID ? $revLookup->getRevisionById( $oldRevID ) : null;
120			$actualTitle = $title->getPrefixedText();
121
122			if ( $exists ) {
123				$touched = wfTimestamp( TS_UNIX, $title->getTouched() );
124				if ( !$overwrite ) {
125					$this->output( "Title $actualTitle already exists. Skipping.\n" );
126					$skipCount++;
127					continue;
128				} elseif ( $useTimestamp && intval( $touched ) >= intval( $timestamp ) ) {
129					$this->output( "File for title $actualTitle has not been modified since the " .
130						"destination page was touched. Skipping.\n" );
131					$skipCount++;
132					continue;
133				}
134			}
135
136			$content = ContentHandler::makeContent( rtrim( $text ), $title );
137			$rev = new WikiRevision( MediaWikiServices::getInstance()->getMainConfig() );
138			$rev->setContent( SlotRecord::MAIN, $content );
139			$rev->setTitle( $title );
140			$rev->setUserObj( $user );
141			$rev->setComment( $summary );
142			$rev->setTimestamp( $timestamp );
143
144			if ( $exists &&
145				$overwrite &&
146				$rev->getContent()->equals( $oldRevRecord->getContent( SlotRecord::MAIN ) )
147			) {
148				$this->output( "File for title $actualTitle contains no changes from the current " .
149					"revision. Skipping.\n" );
150				$skipCount++;
151				continue;
152			}
153
154			$status = $rev->importOldRevision();
155			$newId = $title->getLatestRevID();
156
157			if ( $status ) {
158				$action = $exists ? 'updated' : 'created';
159				$this->output( "Successfully $action $actualTitle\n" );
160				$successCount++;
161			} else {
162				$action = $exists ? 'update' : 'create';
163				$this->output( "Failed to $action $actualTitle\n" );
164				$failCount++;
165				$exit = 1;
166			}
167
168			// Create the RecentChanges entry if necessary
169			if ( $rc && $status ) {
170				if ( $exists ) {
171					if ( is_object( $oldRevRecord ) ) {
172						RecentChange::notifyEdit(
173							$timestamp,
174							$title,
175							$rev->getMinor(),
176							$user,
177							$summary,
178							$oldRevID,
179							$oldRevRecord->getTimestamp(),
180							$bot,
181							'',
182							$oldRevRecord->getSize(),
183							$rev->getSize(),
184							$newId,
185							// the pages don't need to be patrolled
186							1
187						);
188					}
189				} else {
190					RecentChange::notifyNew(
191						$timestamp,
192						$title,
193						$rev->getMinor(),
194						$user,
195						$summary,
196						$bot,
197						'',
198						$rev->getSize(),
199						$newId,
200						1
201					);
202				}
203			}
204		}
205
206		$this->output( "Done! $successCount succeeded, $skipCount skipped.\n" );
207		if ( $exit ) {
208			$this->fatalError( "Import failed with $failCount failed pages.\n", $exit );
209		}
210	}
211}
212
213$maintClass = ImportTextFiles::class;
214require_once RUN_MAINTENANCE_IF_MAIN;
215