1<?php
2// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project
3//
4// All Rights Reserved. See copyright.txt for details and a complete list of authors.
5// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details.
6// $Id$
7
8namespace Tiki\Command;
9
10use Symfony\Component\Console\Command\Command;
11use Symfony\Component\Console\Helper\ProgressBar;
12use Symfony\Component\Console\Input\InputInterface;
13use Symfony\Component\Console\Input\InputOption;
14use Symfony\Component\Console\Output\OutputInterface;
15use Symfony\Component\Console\Command\HelpCommand;
16use Language;
17use Language_FileType_Php;
18
19/**
20 * Add a singleton command "englishupdate" using the Symfony console component for this script
21 *
22 * Class EnglishUpdateCommand
23 * @package Tiki\Command
24 */
25
26class EnglishUpdateCommand extends Command
27{
28	protected function configure()
29	{
30		$this
31			->setName('translation:englishupdate')
32			->setDescription('Fix English strings after modifying them.')
33			->setHelp('Update translation files with updates made to English strings. Will compare working copy by default.')
34			->addOption(
35				'scm',
36				null,
37				InputOption::VALUE_REQUIRED,
38				'Source code management type: svn or git'
39			)
40			->addOption(
41				'revision',
42				'r',
43				InputOption::VALUE_REQUIRED,
44				'Revision numbers may be selected eg. 63000:63010, or simply 63000 to update strings from 63000 onward.'
45			)
46			->addOption(
47				'lag',
48				'l',
49				InputOption::VALUE_REQUIRED,
50				'Search through previous commits by X number of days, for updated translation strings. Working copy will be ignored.'
51			)
52			->addOption(
53				'audit',
54				'a',
55				InputOption::VALUE_NONE,
56				'Reports any translation strings that have been broken. Will not change repository. '
57			)
58			->addOption(
59				'email',
60				'e',
61				InputOption::VALUE_REQUIRED,
62				'Email address to send a message to if untranslated strings are found. Must be used in conjunction with "audit".'
63			)
64			->addOption(
65				'diff-command',
66				null,
67				InputOption::VALUE_REQUIRED,
68				'Set a shell command to return the diff (ex. in case of a based git repository) override svn default diff. Options revision and lag will be ignored.'
69			)
70			->addOption(
71				'git',
72				null,
73				InputOption::VALUE_NONE,
74				'Set thi if diff-command is based on git'
75			);
76	}
77
78	/**
79	 * The total number of changed strings
80	 * @var int
81	 */
82	private $stringCount = 0;
83
84
85	/**
86	 * The number of identical original & changed pairs found.
87	 * @var int
88	 */
89	private $duplicates = 0;
90
91	/**
92	 * An array of all the language directories in Tiki
93	 * @var array
94	 */
95	private $languages;
96	/**
97	 *
98	 * Seperates svn diff output into changes made in PHP and TPL files
99	 *
100	 * @param $content string raw svn diff output
101	 * @param string $diff git or svn depending on the version control used to generate the diff.
102	 *
103	 * @return array with [0] containing PHP and [1] containing TPL strings
104	 */
105
106	/**
107	 * Run svn diff command
108	 * @param array $revisions revisions to use in diff
109	 * @param int $lag number of days to search previously
110	 * @return mixed diff result
111	 */
112	private function getSvnDiff($revisions, $lag = 0)
113	{
114		$rev = '';
115		if ($lag > 0) {
116			// current time minus number of days specified through lag
117			$rev = date('{"Y-m-d H:i"}', time() - $lag * 60 * 60 * 24);
118			$rev = '-r ' . $rev;
119		} elseif ($revisions) {
120			$rev = '-r ' . implode(":", $revisions);
121		}
122
123		$raw = shell_exec("svn diff $rev 2>&1");
124
125		// strip any empty translation strings now to avoid complexities later
126		$raw = preg_replace('/tra?\(["\'](\s*?)[\'"]\)/m', '', $raw);
127
128//		$output->writeln($raw, OutputInterface::VERBOSITY_DEBUG);
129
130		return $this->separatePhpTpl($raw);
131	}
132
133	/**
134	 * Run git diff command
135	 * @param array $revisions revisions to use in diff
136	 * @param int $lag number of days to search previously
137	 * @return mixed diff result
138	 */
139	private function getGitDiff($revisions, $lag = 0)
140	{
141		if ($lag > 0) {
142			// current time minus number of days specified through lag
143			$rev = 'HEAD \'HEAD@{' . $lag . ' weeks ago}\'';
144		} else {
145			$rev = implode(' ', $revisions);
146		}
147
148		$raw = shell_exec("git diff $rev 2>&1");
149
150
151		// strip any empty translation strings now to avoid complexities later
152		$raw = preg_replace('/tra?\(["\'](\s*?)[\'"]\)/m', '', $raw);
153
154//		$output->writeln($raw, OutputInterface::VERBOSITY_DEBUG);
155
156		return $this->separatePhpTpl($raw, 'git');
157	}
158
159
160	private function separatePhpTpl($content, $diff = 'svn')
161	{
162
163		if ($diff === 'git') {
164			preg_match_all('/^diff --git .+(php|tpl)$\nindex .+\n([\w\W]+?)(?=\n^diff --git.+\n|\n$)/m', $content, $phpTpl);
165		} else {
166			$content .= "\nIndex:  \n=";                            // used as a dummy to match the last entry
167
168			// Separate php and tpl content
169			preg_match_all('/^Index:\s.+(php|tpl)$\n={10}([\w\W]+?)(?=^Index:.+\n=)/m', $content, $phpTpl);
170		}
171
172
173		$changes['php'] = '';
174		$changes['tpl'] = '';
175		$count = 0;
176		while ($count < count($phpTpl[1])) {
177			if ($phpTpl[1][$count] === 'php') {
178				$changes['php'] .= $phpTpl[2][$count];
179			} elseif ($phpTpl[1][$count] === 'tpl') {
180				$changes['tpl'] .= $phpTpl[2][$count];
181			}
182			$count++;
183		}
184
185		return $changes;
186	}
187
188	/**
189	 * @param $content string diff content to split into pairs of removed and added content
190	 *
191	 * @return array equal pairs of added and removed diff content
192	 */
193
194	private function pairMatches($content)
195	{
196
197		/**
198		 * @var $pairedMatches array any changes that took away and added lines.
199		 */
200
201		// strip some diff verbiage to prevent conflict in next match
202		$content = preg_replace('/(?>---|\+\+\+)\s.*\)$/m', '', $content);
203		// place in an array changes that have multiple lines changes
204		preg_match_all('/(\n[-+].*){2,}/m', $content, $diffs);
205
206		$content = $diffs[0];
207		unset($diffs);
208
209		$pairs = [];
210		foreach ($content as $diff) {
211			//now trim it down so its a - then + pair
212			if (preg_match('/^-[\s\S]*^\+.*/m', $diff, $pair)) {
213				// now extract a equally paired sets
214				$count = min(preg_match_all('/^-/m', $pair[0]), preg_match_all('/^\+/m', $pair[0]));
215				if ($count) {
216					preg_match('/(?>\n-.*){' . $count . '}(?>\n\+.*){' . $count . '}/', "\n" . $pair[0], $equilPair);
217					$pairs[] = $equilPair[0];
218				}
219			}
220		}
221
222		unset($content);
223		$count = 0;
224		$pairedMatches = [];
225
226		foreach ($pairs as $pair) {
227			if (preg_match_all('/^-(.*)/m', $pair, $negativeMatch)) {
228				if (preg_match_all('/^\+(.*)/m', $pair, $positiveMatch)) {
229					$pairedMatches[$count]['-'] = implode(' ', $negativeMatch[1]);
230					$pairedMatches[$count]['+'] = implode(' ', $positiveMatch[1]);
231					$count++;
232				}
233			}
234		}
235
236		return $pairedMatches;
237	}
238
239	/**
240	 * Takes a semi-prepared list of commit changes (from a diff) and extracts pairs of original and changed translatoion strings
241	 *
242	 * @param $content array of equally paired diff content pairs of removed and added, previously precessed by pairMatches()
243	 * @param $file string can be 'php' or 'tpl'. Will determine how strings are extracted.
244	 *
245	 * @return array extracted strings
246	 */
247	private function pairStrings($content, $file)
248	{
249
250		$count = 0;
251		$pairedStrings = [];
252
253		// set what regex to use depending on file type.
254		if ($file === 'php') {
255			$regex = '/\Wtra?\s*\(\s*([\'"])(.+?)\1\s*[\),]/';
256			$php = new Language_FileType_Php;
257		} else {
258			$regex = '/\{(t)r(?:\s+[^\}]*)?\}(.+?)\{\/tr\}/';
259		}
260
261		foreach ($content as $pair) {
262			if (preg_match_all($regex, $pair['-'], $negativeMatch)) {
263				if (preg_match_all($regex, $pair['+'], $positiveMatch)) {
264					// strip out any changes that have a dissimilar number of translation strings. No way to match them properly :(
265					if (count($negativeMatch[1]) === count($positiveMatch[1])) {
266						// content needs post processing based on single or double quote matches
267						if (isset($negativeMatch[1][0])) {
268							if ($negativeMatch[1][0] == "'") {
269								$negativeMatch[2] = $php->singleQuoted($negativeMatch[2]);
270							} elseif ($negativeMatch[1][0] == '"') {
271								$negativeMatch[2] = $php->doubleQuoted($negativeMatch[2]);
272							}
273							if ($positiveMatch[1][0] == "'") {
274								$positiveMatch[2] = $php->singleQuoted($positiveMatch[2]);
275							} elseif ($positiveMatch[1][0] == '"') {
276								$positiveMatch[2] = $php->doubleQuoted($positiveMatch[2]);
277							}
278						}
279						$pairedStrings[$count]['-'] = $negativeMatch[2];
280						$pairedStrings[$count]['+'] = $positiveMatch[2];
281						$count++;
282					}
283				}
284			}
285		}
286
287		return $pairedStrings;
288	}
289
290	/**
291	 * Filters, formats & escapes paired translation strings to produce a final list of translation changes.
292	 *
293	 * @param $content array paired strings previously processed by pairStrings()
294	 *
295	 * @return array A final list of before and after translation strings to update.
296	 */
297
298	private function filterStrings($content)
299	{
300
301		$updateStrings = [];
302		foreach ($content as $strings) {
303			$count = 0;
304			while (isset($strings['-'][$count])) {
305				// strip any end punctuation from both strings to support tikis punctuations translation functionality.
306				if (in_array(substr($strings['-'][$count], -1), Language::punctuations)) {
307					$strings['-'][$count] = substr($strings['-'][$count], 0, -1);
308				}
309				if (in_array(substr($strings['+'][$count], -1), Language::punctuations)) {
310					$strings['+'][$count] = substr($strings['+'][$count], 0, -1);
311				}
312
313				if ($strings['-'][$count] !== $strings['+'][$count]) {
314					$updateStrings[$this->stringCount]['-'] = Language::addPhpSlashes($strings['-'][$count]);
315					$updateStrings[$this->stringCount]['+'] = Language::addPhpSlashes($strings['+'][$count]);
316					$this->stringCount++;
317				}
318				$count++;
319			}
320		}
321
322		return $updateStrings;
323	}
324
325	/**
326	 * Takes a paired list of original and replacement strings and checks if they are identical
327	 *
328	 * @param $content array paired string, that has previously been processed by filterStrings()
329	 *
330	 * @return array return an array of paired strings with duplicate entries omitted
331	 */
332
333	private function removeIdentical($content)
334	{
335
336		$filtered = [];
337		foreach ($content as $array) {
338			if (! in_array($array, $filtered)) {
339				$filtered[] = $array;
340			}
341		}
342		$this->duplicates = $this->stringCount - count($filtered);
343		$this->stringCount -= $this->duplicates;
344
345		return $filtered;
346	}
347
348	protected function execute(InputInterface $input, OutputInterface $output)
349	{
350		$output->writeln('*******************************************************');
351		$output->writeln('*                     <info>Limitations</info>                     *');
352		$output->writeln('* Will not find strings if they span multiple lines.  *');
353		$output->writeln('*                                                     *');
354		$output->writeln('* Will not match strings if a translation string has  *');
355		$output->writeln('* been added or removed on the line above or below.   *');
356		$output->writeln('*******************************************************');
357		$output->writeln('');
358
359		// check that email is being used in audit mode
360		if ($input->getOption('email') && ! $input->getOption('audit')) {
361			$help = new HelpCommand();
362			$help->setCommand($this);
363			$help->run($input, $output);
364
365			return $output->writeln(' --email, only available when running in --audit mode.');
366		}
367		// check that scm is being used and validate
368		$scm = $input->getOption('scm');
369		if (! empty($scm) && ! in_array($scm, ['svn', 'git'])) {
370			$help = new HelpCommand();
371			$help->setCommand($this);
372			$help->run($input, $output);
373
374			return $output->writeln('<error> --scm, invalid value. ex: svn or git. </error>');
375		}
376
377		if (empty($scm)) {//detect if is svn or git repo
378			if (file_exists(TIKI_PATH . DIRECTORY_SEPARATOR . '.git')) {
379				$scm = 'git';
380			} elseif (file_exists(TIKI_PATH . DIRECTORY_SEPARATOR . '.svn')) {
381				$scm = 'svn';
382			} else {
383				return $output->writeln('<error>SCM not found in this tiki installation</error>');
384			}
385		}
386
387		$lag = $input->getOption('lag');
388		$revision = $input->getOption('revision');
389		$revisions = [];
390		// check that the --lag option is valid, and complain if its not.
391		if ($lag) {
392			if ($input->getOption('lag') < 0 || ! is_numeric($lag)) {
393				$help = new HelpCommand();
394				$help->setCommand($this);
395				$help->run($input, $output);
396
397				return $output->writeln('<error>Invalid option for --lag, must be a positive integer.</error>');
398			}
399		} elseif ($revision) {
400			$revisions = explode(':', $revision);
401			if (count($revisions) > 2) {
402				return $output->writeln('<error>Invalid amount of revisions</error>');
403			}
404		}
405
406		$this->languages = glob(TIKI_PATH . DIRECTORY_SEPARATOR . 'lang' . DIRECTORY_SEPARATOR . '*', GLOB_ONLYDIR);
407
408		$progress = new ProgressBar($output, count($this->languages) + 7);
409		if ($output->getVerbosity() >= OutputInterface::VERBOSITY_VERBOSE) {
410			$progress->setOverwrite(false);
411		}
412		$progress->setFormatDefinition('custom', ' %current%/%max% [%bar%] -- %message%');
413		$progress->setFormat('custom');
414
415		$progress->setMessage('Checking System');
416		$progress->start();
417
418		// die gracefully if shell_exec is not enabled;
419		if (! is_callable('shell_exec')) {
420			$progress->setMessage('<error>Translation string update Failed. Could not execute shell_exec()</error>');
421			$progress->finish();
422
423			return false;
424		}
425
426		$progress->setMessage('Getting String Changes');
427		$progress->advance();
428
429		if ($scm === 'git') {
430			$diffs = $this->getGitDiff($revisions, $lag);
431		} else {
432			$diffs = $this->getSvnDiff($revisions, $lag);
433		}
434
435		$progress->setMessage('Finding Updated Strings');
436		$progress->advance();
437
438//		$output->writeln(var_export($diffs, true), OutputInterface::VERBOSITY_DEBUG);
439
440		$diffs['php'] = $this->pairMatches($diffs['php']);
441		$diffs['tpl'] = $this->pairMatches($diffs['tpl']);
442
443		$progress->setMessage('Found ' . count($diffs['php']) . ' PHP and ' . count($diffs['tpl']) . ' TPL changes');
444		$progress->advance();
445
446//		$output->writeln(var_export($diffs, true), OutputInterface::VERBOSITY_DEBUG);
447
448		$diffs['php'] = $this->pairStrings($diffs['php'], 'php');
449		$diffs['tpl'] = $this->pairStrings($diffs['tpl'], 'tpl');
450		$diffs = array_merge($diffs['php'], $diffs['tpl']);
451
452		$progress->setMessage('Found ' . count($diffs) . ' String pairs');
453		$progress->advance();
454
455//		$output->writeln(var_export($diffs, true), OutputInterface::VERBOSITY_DEBUG);
456
457		$diffs = $this->filterStrings($diffs);
458
459		$progress->setMessage("Found $this->stringCount translation strings");
460		$progress->advance();
461
462		$diffs = $this->removeIdentical($diffs);
463
464		$progress->setMessage('Found ' . $this->duplicates . ' duplicate translation strings');
465		$progress->advance();
466
467		if ($output->getVerbosity() >= OutputInterface::VERBOSITY_VERY_VERBOSE) {
468			$output->writeln("\n\n<info>Strings Being Updated</info>\n");
469			foreach ($diffs as $diff) {
470				$output->writeln('* ' . $diff['-']);
471				$output->writeln('* ' . $diff['+'] . "\n");
472			}
473		}
474
475		/**
476		 * Tokens indicating that the replacement sting was found and replaced in the language file
477		 * @ver array
478		 */
479		$string = [];
480
481		/**
482		 * Tokens indicating that the replacement string was already present in the language file, so was skipped
483		 * @var array
484		 */
485		$skipped = [];
486
487		/**
488		 * Tokens indicating what language files have had changes made to them
489		 * @var array
490		 */
491		$lang = [];
492
493		// update the language files with the new strings
494
495		if ($this->stringCount) {
496			foreach ($this->languages as $directory) {
497				$langNow = substr($directory, strrpos($directory, "/") + 1);
498				if (is_writable($directory . '/language.php')) {
499					$file = file_get_contents($directory . '/language.php');
500					foreach ($diffs as $key => $entry) {
501						// if the original string is in the language file
502						if (preg_match('/^"' . preg_quote($entry['-'], '/') . '[' . implode('', Language::punctuations) . ']?".*/m', $file, $match)) {
503							// if the replacement string does not already exist
504							if (! strpos($file, "\n\"" . $entry['+'] . '"')) {
505								// then replace the original string with an exact copy and a 'updated' copy on the next line
506								$replace = preg_replace('/"' . preg_quote($entry['-'], '/') . '[' . implode('', Language::punctuations) . ']?"/', '"' . $entry['+'] . '"', $match[0], 1);
507								$file = str_replace($match[0], $match[0] . "\n" . $replace, $file);
508
509								// keep track of overall numbers
510								$string[$key] = true;
511								$lang[$langNow] = true;
512							} else {
513								$skipped[$key] = true;
514							}
515						}
516					}
517					if (isset($lang[$langNow])) {
518						$progress->setMessage($langNow . "\tStrings to update");
519						$progress->advance();
520						if (! $input->getOption('audit')) {
521							file_put_contents($directory . '/language.php', $file);
522						}
523					} else {
524						$progress->setMessage($langNow . "\tNo changes to make");
525						$progress->advance();
526					}
527				} else {
528					$progress->setMessage($langNow . "\tSkipping <info>language.php not writable</info>");
529					$progress->advance();
530				}
531			}
532		}
533		$skippedMessage = '';
534		if ($this->duplicates) {
535			$skippedMessage = ' Skipped ' . $this->duplicates . ' duplicate strings.';
536		}
537
538		if ($input->getOption('audit')) {
539			$updateMessage = 'Out of Sync';
540		} else {
541			$updateMessage = 'Updated';
542		}
543		$progress->setMessage(count($string) . " of $this->stringCount strings $updateMessage in " . count($lang) . ' of ' . count($this->languages) . ' language files.' . $skippedMessage);
544		$progress->finish();
545
546		if ($input->getOption('audit')) {
547			if (count($string)) {
548				$syncMessage = "\n";
549				$output->writeln("\n\n<info>Updated Strings not found in Language Files</info>");
550				foreach ($diffs as $key => $entry) {
551					if (isset($string[$key])) {
552						$syncMessage .= '* ' . $entry['-'] . "\n";
553					}
554				}
555				$output->writeln($syncMessage);
556				if ($input->getOption('email')) {
557					mail($input->getOption('email'), 'Updated Strings not found in Language Files', wordwrap(TIKI_PATH . "\n" . $syncMessage, 70, "\r\n"));
558				}
559				exit(1);
560			}
561			$output->writeln("\n\n<info>English and Translations are in Sync</info>\n");
562			// if were not in audit mode
563		} else {
564			if (count($string) < $this->stringCount) {
565				$output->writeln("\n\n<info>Strings Not Translated</info>");
566				foreach ($diffs as $key => $entry) {
567					if (! isset($string[$key]) && ! isset($skipped[$key])) {
568						$output->writeln('* ' . $entry['-']);
569					}
570				}
571			}
572			$output->writeln("\n\nOptionally run php get_strings.php to remove any unused translation strings.");
573			$output->writeln("Verify before committing.\n");
574		}
575		exit(0);
576	}
577}