1<?php
2
3/**
4 * Manages lint execution. When you run 'arc lint' or 'arc diff', Arcanist
5 * attempts to run lint rules using a lint engine.
6 *
7 * Lint engines are high-level strategic classes which do not contain any
8 * actual linting rules. Linting rules live in `Linter` classes. The lint
9 * engine builds and configures linters.
10 *
11 * Most modern linters can be configured with an `.arclint` file, which is
12 * managed by the builtin @{class:ArcanistConfigurationDrivenLintEngine}.
13 * Consult the documentation for more information on these files.
14 *
15 * In the majority of cases, you do not need to write a custom lint engine.
16 * For example, to add new rules for a new language, write a linter instead.
17 * However, if you have a very advanced or specialized use case, you can write
18 * a custom lint engine by extending this class; custom lint engines are more
19 * powerful but much more complex than the builtin engines.
20 *
21 * The lint engine is given a list of paths (generally, the paths that you
22 * modified in your change) and determines which linters to run on them. The
23 * linters themselves are responsible for actually analyzing file text and
24 * finding warnings and errors. For example, if the modified paths include some
25 * JS files and some Python files, you might want to run JSLint on the JS files
26 * and PyLint on the Python files.
27 *
28 * You can also run multiple linters on a single file. For instance, you might
29 * run one linter on all text files to make sure they don't have trailing
30 * whitespace, or enforce tab vs space rules, or make sure there are enough
31 * curse words in them.
32 *
33 * You can test an engine like this:
34 *
35 *   arc lint --engine YourLintEngineClassName --lintall some_file.py
36 *
37 * ...which will show you all the lint issues raised in the file.
38 *
39 * See @{article@phabricator:Arcanist User Guide: Customizing Lint, Unit Tests
40 * and Workflows} for more information about configuring lint engines.
41 */
42abstract class ArcanistLintEngine extends Phobject {
43
44  protected $workingCopy;
45  protected $paths = array();
46  protected $fileData = array();
47
48  protected $charToLine = array();
49  protected $lineToFirstChar = array();
50  private $cachedResults;
51  private $cacheVersion;
52  private $repositoryVersion;
53  private $results = array();
54  private $stopped = array();
55  private $minimumSeverity = ArcanistLintSeverity::SEVERITY_DISABLED;
56
57  private $changedLines = array();
58
59  private $configurationManager;
60
61  private $linterResources = array();
62
63  public function __construct() {}
64
65  final public function setConfigurationManager(
66    ArcanistConfigurationManager $configuration_manager) {
67    $this->configurationManager = $configuration_manager;
68    return $this;
69  }
70
71  final public function getConfigurationManager() {
72    return $this->configurationManager;
73  }
74
75  final public function setWorkingCopy(
76    ArcanistWorkingCopyIdentity $working_copy) {
77    $this->workingCopy = $working_copy;
78    return $this;
79  }
80
81  final public function getWorkingCopy() {
82    return $this->workingCopy;
83  }
84
85  final public function setPaths($paths) {
86    $this->paths = $paths;
87    return $this;
88  }
89
90  public function getPaths() {
91    return $this->paths;
92  }
93
94  final public function setPathChangedLines($path, $changed) {
95    if ($changed === null) {
96      $this->changedLines[$path] = null;
97    } else {
98      $this->changedLines[$path] = array_fill_keys($changed, true);
99    }
100    return $this;
101  }
102
103  final public function getPathChangedLines($path) {
104    return idx($this->changedLines, $path);
105  }
106
107  final public function setFileData($data) {
108    $this->fileData = $data + $this->fileData;
109    return $this;
110  }
111
112  final public function loadData($path) {
113    if (!isset($this->fileData[$path])) {
114      $disk_path = $this->getFilePathOnDisk($path);
115      $this->fileData[$path] = Filesystem::readFile($disk_path);
116    }
117    return $this->fileData[$path];
118  }
119
120  public function pathExists($path) {
121    $disk_path = $this->getFilePathOnDisk($path);
122    return Filesystem::pathExists($disk_path);
123  }
124
125  final public function isDirectory($path) {
126    $disk_path = $this->getFilePathOnDisk($path);
127    return is_dir($disk_path);
128  }
129
130  final public function isBinaryFile($path) {
131    try {
132      $data = $this->loadData($path);
133    } catch (Exception $ex) {
134      return false;
135    }
136
137    return ArcanistDiffUtils::isHeuristicBinaryFile($data);
138  }
139
140  final public function isSymbolicLink($path) {
141    return is_link($this->getFilePathOnDisk($path));
142  }
143
144  final public function getFilePathOnDisk($path) {
145    return Filesystem::resolvePath(
146      $path,
147      $this->getWorkingCopy()->getProjectRoot());
148  }
149
150  final public function setMinimumSeverity($severity) {
151    $this->minimumSeverity = $severity;
152    return $this;
153  }
154
155  final public function run() {
156    $linters = $this->buildLinters();
157    if (!$linters) {
158      throw new ArcanistNoEffectException(pht('No linters to run.'));
159    }
160
161    foreach ($linters as $key => $linter) {
162      $linter->setLinterID($key);
163    }
164
165    $linters = msort($linters, 'getLinterPriority');
166    foreach ($linters as $linter) {
167      $linter->setEngine($this);
168    }
169
170    $have_paths = false;
171    foreach ($linters as $linter) {
172      if ($linter->getPaths()) {
173        $have_paths = true;
174        break;
175      }
176    }
177
178    if (!$have_paths) {
179      throw new ArcanistNoEffectException(pht('No paths are lintable.'));
180    }
181
182    $versions = array($this->getCacheVersion());
183
184    foreach ($linters as $linter) {
185      $version = get_class($linter).':'.$linter->getCacheVersion();
186
187      $symbols = id(new PhutilSymbolLoader())
188        ->setType('class')
189        ->setName(get_class($linter))
190        ->selectSymbolsWithoutLoading();
191      $symbol = idx($symbols, 'class$'.get_class($linter));
192      if ($symbol) {
193        $version .= ':'.md5_file(
194          phutil_get_library_root($symbol['library']).'/'.$symbol['where']);
195      }
196
197      $versions[] = $version;
198    }
199
200    $this->cacheVersion = crc32(implode("\n", $versions));
201
202    $runnable = $this->getRunnableLinters($linters);
203
204    $this->stopped = array();
205
206    $exceptions = $this->executeLinters($runnable);
207
208    foreach ($runnable as $linter) {
209      foreach ($linter->getLintMessages() as $message) {
210        $this->validateLintMessage($linter, $message);
211
212        if (!$this->isSeverityEnabled($message->getSeverity())) {
213          continue;
214        }
215        if (!$this->isRelevantMessage($message)) {
216          continue;
217        }
218        $message->setGranularity($linter->getCacheGranularity());
219        $result = $this->getResultForPath($message->getPath());
220        $result->addMessage($message);
221      }
222    }
223
224    if ($this->cachedResults) {
225      foreach ($this->cachedResults as $path => $messages) {
226        $messages = idx($messages, $this->cacheVersion, array());
227        $repository_version = idx($messages, 'repository_version');
228        unset($messages['stopped']);
229        unset($messages['repository_version']);
230        foreach ($messages as $message) {
231          $use_cache = $this->shouldUseCache(
232            idx($message, 'granularity'),
233            $repository_version);
234          if ($use_cache) {
235            $this->getResultForPath($path)->addMessage(
236              ArcanistLintMessage::newFromDictionary($message));
237          }
238        }
239      }
240    }
241
242    foreach ($this->results as $path => $result) {
243      $disk_path = $this->getFilePathOnDisk($path);
244      $result->setFilePathOnDisk($disk_path);
245      if (isset($this->fileData[$path])) {
246        $result->setData($this->fileData[$path]);
247      } else if ($disk_path && Filesystem::pathExists($disk_path)) {
248        // TODO: this may cause us to, e.g., load a large binary when we only
249        // raised an error about its filename. We could refine this by looking
250        // through the lint messages and doing this load only if any of them
251        // have original/replacement text or something like that.
252        try {
253          $this->fileData[$path] = Filesystem::readFile($disk_path);
254          $result->setData($this->fileData[$path]);
255        } catch (FilesystemException $ex) {
256          // Ignore this, it's noncritical that we access this data and it
257          // might be unreadable or a directory or whatever else for plenty
258          // of legitimate reasons.
259        }
260      }
261    }
262
263    if ($exceptions) {
264      throw new PhutilAggregateException(
265        pht('Some linters failed:'),
266        $exceptions);
267    }
268
269    return $this->results;
270  }
271
272  final public function isSeverityEnabled($severity) {
273    $minimum = $this->minimumSeverity;
274    return ArcanistLintSeverity::isAtLeastAsSevere($severity, $minimum);
275  }
276
277  private function shouldUseCache(
278    $cache_granularity,
279    $repository_version) {
280
281    switch ($cache_granularity) {
282      case ArcanistLinter::GRANULARITY_FILE:
283        return true;
284      case ArcanistLinter::GRANULARITY_DIRECTORY:
285      case ArcanistLinter::GRANULARITY_REPOSITORY:
286        return ($this->repositoryVersion == $repository_version);
287      default:
288        return false;
289    }
290  }
291
292  /**
293   * @param dict<string path, dict<string version, list<dict message>>>
294   * @return this
295   */
296  final public function setCachedResults(array $results) {
297    $this->cachedResults = $results;
298    return $this;
299  }
300
301  final public function getResults() {
302    return $this->results;
303  }
304
305  final public function getStoppedPaths() {
306    return $this->stopped;
307  }
308
309  abstract public function buildLinters();
310
311  final public function setRepositoryVersion($version) {
312    $this->repositoryVersion = $version;
313    return $this;
314  }
315
316  private function isRelevantMessage(ArcanistLintMessage $message) {
317    // When a user runs "arc lint", we default to raising only warnings on
318    // lines they have changed (errors are still raised anywhere in the
319    // file). The list of $changed lines may be null, to indicate that the
320    // path is a directory or a binary file so we should not exclude
321    // warnings.
322
323    if (!$this->changedLines ||
324        $message->isError() ||
325        $message->shouldBypassChangedLineFiltering()) {
326      return true;
327    }
328
329    $locations = $message->getOtherLocations();
330    $locations[] = $message->toDictionary();
331
332    foreach ($locations as $location) {
333      $path = idx($location, 'path', $message->getPath());
334
335      if (!array_key_exists($path, $this->changedLines)) {
336        if (phutil_is_windows()) {
337          // We try checking the UNIX path form as well, on Windows.  Linters
338          // store noramlized paths, which use the Windows-style "\" as a
339          // delimiter; as such, they don't match the UNIX-style paths stored
340          // in changedLines, which come from the VCS.
341          $path = str_replace('\\', '/', $path);
342          if (!array_key_exists($path, $this->changedLines)) {
343            continue;
344          }
345        } else {
346          continue;
347        }
348      }
349
350      $changed = $this->getPathChangedLines($path);
351
352      if ($changed === null || !$location['line']) {
353        return true;
354      }
355
356      $last_line = $location['line'];
357      if (isset($location['original'])) {
358        $last_line += substr_count($location['original'], "\n");
359      }
360
361      for ($l = $location['line']; $l <= $last_line; $l++) {
362        if (!empty($changed[$l])) {
363          return true;
364        }
365      }
366    }
367
368    return false;
369  }
370
371  final protected function getResultForPath($path) {
372    if (empty($this->results[$path])) {
373      $result = new ArcanistLintResult();
374      $result->setPath($path);
375      $result->setCacheVersion($this->cacheVersion);
376      $this->results[$path] = $result;
377    }
378    return $this->results[$path];
379  }
380
381  final public function getLineAndCharFromOffset($path, $offset) {
382    if (!isset($this->charToLine[$path])) {
383      $char_to_line = array();
384      $line_to_first_char = array();
385
386      $lines = explode("\n", $this->loadData($path));
387      $line_number = 0;
388      $line_start = 0;
389      foreach ($lines as $line) {
390        $len = strlen($line) + 1; // Account for "\n".
391        $line_to_first_char[] = $line_start;
392        $line_start += $len;
393        for ($ii = 0; $ii < $len; $ii++) {
394          $char_to_line[] = $line_number;
395        }
396        $line_number++;
397      }
398      $this->charToLine[$path] = $char_to_line;
399      $this->lineToFirstChar[$path] = $line_to_first_char;
400    }
401
402    $line = $this->charToLine[$path][$offset];
403    $char = $offset - $this->lineToFirstChar[$path][$line];
404
405    return array($line, $char);
406  }
407
408  protected function getCacheVersion() {
409    return 1;
410  }
411
412
413  /**
414   * Get a named linter resource shared by another linter.
415   *
416   * This mechanism allows linters to share arbitrary resources, like the
417   * results of computation. If several linters need to perform the same
418   * expensive computation step, they can use a named resource to synchronize
419   * construction of the result so it doesn't need to be built multiple
420   * times.
421   *
422   * @param string  Resource identifier.
423   * @param wild    Optionally, default value to return if resource does not
424   *                exist.
425   * @return wild   Resource, or default value if not present.
426   */
427  public function getLinterResource($key, $default = null) {
428    return idx($this->linterResources, $key, $default);
429  }
430
431
432  /**
433   * Set a linter resource that other linters can access.
434   *
435   * See @{method:getLinterResource} for a description of this mechanism.
436   *
437   * @param string Resource identifier.
438   * @param wild   Resource.
439   * @return this
440   */
441  public function setLinterResource($key, $value) {
442    $this->linterResources[$key] = $value;
443    return $this;
444  }
445
446
447  private function getRunnableLinters(array $linters) {
448    assert_instances_of($linters, 'ArcanistLinter');
449
450    // TODO: The canRun() mechanism is only used by one linter, and just
451    // silently disables the linter. Almost every other linter handles this
452    // by throwing `ArcanistMissingLinterException`. Both mechanisms are not
453    // ideal; linters which can not run should emit a message, get marked as
454    // "skipped", and allow execution to continue. See T7045.
455
456    $runnable = array();
457    foreach ($linters as $key => $linter) {
458      if ($linter->canRun()) {
459        $runnable[$key] = $linter;
460      }
461    }
462
463    return $runnable;
464  }
465
466  private function executeLinters(array $runnable) {
467    assert_instances_of($runnable, 'ArcanistLinter');
468
469    $all_paths = $this->getPaths();
470    $path_chunks = array_chunk($all_paths, 32, $preserve_keys = true);
471
472    $exception_lists = array();
473    foreach ($path_chunks as $chunk) {
474      $exception_lists[] = $this->executeLintersOnChunk($runnable, $chunk);
475    }
476
477    return array_mergev($exception_lists);
478  }
479
480
481  private function executeLintersOnChunk(array $runnable, array $path_list) {
482    assert_instances_of($runnable, 'ArcanistLinter');
483
484    $path_map = array_fuse($path_list);
485
486    $exceptions = array();
487    $did_lint = array();
488    foreach ($runnable as $linter) {
489      $linter_id = $linter->getLinterID();
490      $paths = $linter->getPaths();
491
492      foreach ($paths as $key => $path) {
493        // If we aren't running this path in the current chunk of paths,
494        // skip it completely.
495        if (empty($path_map[$path])) {
496          unset($paths[$key]);
497          continue;
498        }
499
500        // Make sure each path has a result generated, even if it is empty
501        // (i.e., the file has no lint messages).
502        $result = $this->getResultForPath($path);
503
504        // If a linter has stopped all other linters for this path, don't
505        // actually run the linter.
506        if (isset($this->stopped[$path])) {
507          unset($paths[$key]);
508          continue;
509        }
510
511        // If we have a cached result for this path, don't actually run the
512        // linter.
513        if (isset($this->cachedResults[$path][$this->cacheVersion])) {
514          $cached_result = $this->cachedResults[$path][$this->cacheVersion];
515
516          $use_cache = $this->shouldUseCache(
517            $linter->getCacheGranularity(),
518            idx($cached_result, 'repository_version'));
519
520          if ($use_cache) {
521            unset($paths[$key]);
522            if (idx($cached_result, 'stopped') == $linter_id) {
523              $this->stopped[$path] = $linter_id;
524            }
525          }
526        }
527      }
528
529      $paths = array_values($paths);
530
531      if (!$paths) {
532        continue;
533      }
534
535      try {
536        $this->executeLinterOnPaths($linter, $paths);
537        $did_lint[] = array($linter, $paths);
538      } catch (Exception $ex) {
539        $exceptions[] = $ex;
540      }
541    }
542
543    foreach ($did_lint as $info) {
544      list($linter, $paths) = $info;
545      try {
546        $this->executeDidLintOnPaths($linter, $paths);
547      } catch (Exception $ex) {
548        $exceptions[] = $ex;
549      }
550    }
551
552    return $exceptions;
553  }
554
555  private function beginLintServiceCall(ArcanistLinter $linter, array $paths) {
556    $profiler = PhutilServiceProfiler::getInstance();
557
558    return $profiler->beginServiceCall(
559      array(
560        'type' => 'lint',
561        'linter' => $linter->getInfoName(),
562        'paths' => $paths,
563      ));
564  }
565
566  private function endLintServiceCall($call_id) {
567    $profiler = PhutilServiceProfiler::getInstance();
568    $profiler->endServiceCall($call_id, array());
569  }
570
571  private function executeLinterOnPaths(ArcanistLinter $linter, array $paths) {
572    $call_id = $this->beginLintServiceCall($linter, $paths);
573
574    try {
575      $linter->willLintPaths($paths);
576      foreach ($paths as $path) {
577        $linter->setActivePath($path);
578        $linter->lintPath($path);
579        if ($linter->didStopAllLinters()) {
580          $this->stopped[$path] = $linter->getLinterID();
581        }
582      }
583    } catch (Exception $ex) {
584      $this->endLintServiceCall($call_id);
585      throw $ex;
586    }
587
588    $this->endLintServiceCall($call_id);
589  }
590
591  private function executeDidLintOnPaths(ArcanistLinter $linter, array $paths) {
592    $call_id = $this->beginLintServiceCall($linter, $paths);
593
594    try {
595      $linter->didLintPaths($paths);
596    } catch (Exception $ex) {
597      $this->endLintServiceCall($call_id);
598      throw $ex;
599    }
600
601    $this->endLintServiceCall($call_id);
602  }
603
604  private function validateLintMessage(
605    ArcanistLinter $linter,
606    ArcanistLintMessage $message) {
607
608    $name = $message->getName();
609    if (!strlen($name)) {
610      throw new Exception(
611        pht(
612          'Linter "%s" generated a lint message that is invalid because it '.
613          'does not have a name. Lint messages must have a name.',
614          get_class($linter)));
615    }
616  }
617
618}
619