1<?php 2 3/** 4 * Manages lint execution. When you run 'arc lint' or 'arc diff', Arcanist 5 * attempts to run lint rules using a lint engine. 6 * 7 * Lint engines are high-level strategic classes which do not contain any 8 * actual linting rules. Linting rules live in `Linter` classes. The lint 9 * engine builds and configures linters. 10 * 11 * Most modern linters can be configured with an `.arclint` file, which is 12 * managed by the builtin @{class:ArcanistConfigurationDrivenLintEngine}. 13 * Consult the documentation for more information on these files. 14 * 15 * In the majority of cases, you do not need to write a custom lint engine. 16 * For example, to add new rules for a new language, write a linter instead. 17 * However, if you have a very advanced or specialized use case, you can write 18 * a custom lint engine by extending this class; custom lint engines are more 19 * powerful but much more complex than the builtin engines. 20 * 21 * The lint engine is given a list of paths (generally, the paths that you 22 * modified in your change) and determines which linters to run on them. The 23 * linters themselves are responsible for actually analyzing file text and 24 * finding warnings and errors. For example, if the modified paths include some 25 * JS files and some Python files, you might want to run JSLint on the JS files 26 * and PyLint on the Python files. 27 * 28 * You can also run multiple linters on a single file. For instance, you might 29 * run one linter on all text files to make sure they don't have trailing 30 * whitespace, or enforce tab vs space rules, or make sure there are enough 31 * curse words in them. 32 * 33 * You can test an engine like this: 34 * 35 * arc lint --engine YourLintEngineClassName --lintall some_file.py 36 * 37 * ...which will show you all the lint issues raised in the file. 38 * 39 * See @{article@phabricator:Arcanist User Guide: Customizing Lint, Unit Tests 40 * and Workflows} for more information about configuring lint engines. 41 */ 42abstract class ArcanistLintEngine extends Phobject { 43 44 protected $workingCopy; 45 protected $paths = array(); 46 protected $fileData = array(); 47 48 protected $charToLine = array(); 49 protected $lineToFirstChar = array(); 50 private $cachedResults; 51 private $cacheVersion; 52 private $repositoryVersion; 53 private $results = array(); 54 private $stopped = array(); 55 private $minimumSeverity = ArcanistLintSeverity::SEVERITY_DISABLED; 56 57 private $changedLines = array(); 58 59 private $configurationManager; 60 61 private $linterResources = array(); 62 63 public function __construct() {} 64 65 final public function setConfigurationManager( 66 ArcanistConfigurationManager $configuration_manager) { 67 $this->configurationManager = $configuration_manager; 68 return $this; 69 } 70 71 final public function getConfigurationManager() { 72 return $this->configurationManager; 73 } 74 75 final public function setWorkingCopy( 76 ArcanistWorkingCopyIdentity $working_copy) { 77 $this->workingCopy = $working_copy; 78 return $this; 79 } 80 81 final public function getWorkingCopy() { 82 return $this->workingCopy; 83 } 84 85 final public function setPaths($paths) { 86 $this->paths = $paths; 87 return $this; 88 } 89 90 public function getPaths() { 91 return $this->paths; 92 } 93 94 final public function setPathChangedLines($path, $changed) { 95 if ($changed === null) { 96 $this->changedLines[$path] = null; 97 } else { 98 $this->changedLines[$path] = array_fill_keys($changed, true); 99 } 100 return $this; 101 } 102 103 final public function getPathChangedLines($path) { 104 return idx($this->changedLines, $path); 105 } 106 107 final public function setFileData($data) { 108 $this->fileData = $data + $this->fileData; 109 return $this; 110 } 111 112 final public function loadData($path) { 113 if (!isset($this->fileData[$path])) { 114 $disk_path = $this->getFilePathOnDisk($path); 115 $this->fileData[$path] = Filesystem::readFile($disk_path); 116 } 117 return $this->fileData[$path]; 118 } 119 120 public function pathExists($path) { 121 $disk_path = $this->getFilePathOnDisk($path); 122 return Filesystem::pathExists($disk_path); 123 } 124 125 final public function isDirectory($path) { 126 $disk_path = $this->getFilePathOnDisk($path); 127 return is_dir($disk_path); 128 } 129 130 final public function isBinaryFile($path) { 131 try { 132 $data = $this->loadData($path); 133 } catch (Exception $ex) { 134 return false; 135 } 136 137 return ArcanistDiffUtils::isHeuristicBinaryFile($data); 138 } 139 140 final public function isSymbolicLink($path) { 141 return is_link($this->getFilePathOnDisk($path)); 142 } 143 144 final public function getFilePathOnDisk($path) { 145 return Filesystem::resolvePath( 146 $path, 147 $this->getWorkingCopy()->getProjectRoot()); 148 } 149 150 final public function setMinimumSeverity($severity) { 151 $this->minimumSeverity = $severity; 152 return $this; 153 } 154 155 final public function run() { 156 $linters = $this->buildLinters(); 157 if (!$linters) { 158 throw new ArcanistNoEffectException(pht('No linters to run.')); 159 } 160 161 foreach ($linters as $key => $linter) { 162 $linter->setLinterID($key); 163 } 164 165 $linters = msort($linters, 'getLinterPriority'); 166 foreach ($linters as $linter) { 167 $linter->setEngine($this); 168 } 169 170 $have_paths = false; 171 foreach ($linters as $linter) { 172 if ($linter->getPaths()) { 173 $have_paths = true; 174 break; 175 } 176 } 177 178 if (!$have_paths) { 179 throw new ArcanistNoEffectException(pht('No paths are lintable.')); 180 } 181 182 $versions = array($this->getCacheVersion()); 183 184 foreach ($linters as $linter) { 185 $version = get_class($linter).':'.$linter->getCacheVersion(); 186 187 $symbols = id(new PhutilSymbolLoader()) 188 ->setType('class') 189 ->setName(get_class($linter)) 190 ->selectSymbolsWithoutLoading(); 191 $symbol = idx($symbols, 'class$'.get_class($linter)); 192 if ($symbol) { 193 $version .= ':'.md5_file( 194 phutil_get_library_root($symbol['library']).'/'.$symbol['where']); 195 } 196 197 $versions[] = $version; 198 } 199 200 $this->cacheVersion = crc32(implode("\n", $versions)); 201 202 $runnable = $this->getRunnableLinters($linters); 203 204 $this->stopped = array(); 205 206 $exceptions = $this->executeLinters($runnable); 207 208 foreach ($runnable as $linter) { 209 foreach ($linter->getLintMessages() as $message) { 210 $this->validateLintMessage($linter, $message); 211 212 if (!$this->isSeverityEnabled($message->getSeverity())) { 213 continue; 214 } 215 if (!$this->isRelevantMessage($message)) { 216 continue; 217 } 218 $message->setGranularity($linter->getCacheGranularity()); 219 $result = $this->getResultForPath($message->getPath()); 220 $result->addMessage($message); 221 } 222 } 223 224 if ($this->cachedResults) { 225 foreach ($this->cachedResults as $path => $messages) { 226 $messages = idx($messages, $this->cacheVersion, array()); 227 $repository_version = idx($messages, 'repository_version'); 228 unset($messages['stopped']); 229 unset($messages['repository_version']); 230 foreach ($messages as $message) { 231 $use_cache = $this->shouldUseCache( 232 idx($message, 'granularity'), 233 $repository_version); 234 if ($use_cache) { 235 $this->getResultForPath($path)->addMessage( 236 ArcanistLintMessage::newFromDictionary($message)); 237 } 238 } 239 } 240 } 241 242 foreach ($this->results as $path => $result) { 243 $disk_path = $this->getFilePathOnDisk($path); 244 $result->setFilePathOnDisk($disk_path); 245 if (isset($this->fileData[$path])) { 246 $result->setData($this->fileData[$path]); 247 } else if ($disk_path && Filesystem::pathExists($disk_path)) { 248 // TODO: this may cause us to, e.g., load a large binary when we only 249 // raised an error about its filename. We could refine this by looking 250 // through the lint messages and doing this load only if any of them 251 // have original/replacement text or something like that. 252 try { 253 $this->fileData[$path] = Filesystem::readFile($disk_path); 254 $result->setData($this->fileData[$path]); 255 } catch (FilesystemException $ex) { 256 // Ignore this, it's noncritical that we access this data and it 257 // might be unreadable or a directory or whatever else for plenty 258 // of legitimate reasons. 259 } 260 } 261 } 262 263 if ($exceptions) { 264 throw new PhutilAggregateException( 265 pht('Some linters failed:'), 266 $exceptions); 267 } 268 269 return $this->results; 270 } 271 272 final public function isSeverityEnabled($severity) { 273 $minimum = $this->minimumSeverity; 274 return ArcanistLintSeverity::isAtLeastAsSevere($severity, $minimum); 275 } 276 277 private function shouldUseCache( 278 $cache_granularity, 279 $repository_version) { 280 281 switch ($cache_granularity) { 282 case ArcanistLinter::GRANULARITY_FILE: 283 return true; 284 case ArcanistLinter::GRANULARITY_DIRECTORY: 285 case ArcanistLinter::GRANULARITY_REPOSITORY: 286 return ($this->repositoryVersion == $repository_version); 287 default: 288 return false; 289 } 290 } 291 292 /** 293 * @param dict<string path, dict<string version, list<dict message>>> 294 * @return this 295 */ 296 final public function setCachedResults(array $results) { 297 $this->cachedResults = $results; 298 return $this; 299 } 300 301 final public function getResults() { 302 return $this->results; 303 } 304 305 final public function getStoppedPaths() { 306 return $this->stopped; 307 } 308 309 abstract public function buildLinters(); 310 311 final public function setRepositoryVersion($version) { 312 $this->repositoryVersion = $version; 313 return $this; 314 } 315 316 private function isRelevantMessage(ArcanistLintMessage $message) { 317 // When a user runs "arc lint", we default to raising only warnings on 318 // lines they have changed (errors are still raised anywhere in the 319 // file). The list of $changed lines may be null, to indicate that the 320 // path is a directory or a binary file so we should not exclude 321 // warnings. 322 323 if (!$this->changedLines || 324 $message->isError() || 325 $message->shouldBypassChangedLineFiltering()) { 326 return true; 327 } 328 329 $locations = $message->getOtherLocations(); 330 $locations[] = $message->toDictionary(); 331 332 foreach ($locations as $location) { 333 $path = idx($location, 'path', $message->getPath()); 334 335 if (!array_key_exists($path, $this->changedLines)) { 336 if (phutil_is_windows()) { 337 // We try checking the UNIX path form as well, on Windows. Linters 338 // store noramlized paths, which use the Windows-style "\" as a 339 // delimiter; as such, they don't match the UNIX-style paths stored 340 // in changedLines, which come from the VCS. 341 $path = str_replace('\\', '/', $path); 342 if (!array_key_exists($path, $this->changedLines)) { 343 continue; 344 } 345 } else { 346 continue; 347 } 348 } 349 350 $changed = $this->getPathChangedLines($path); 351 352 if ($changed === null || !$location['line']) { 353 return true; 354 } 355 356 $last_line = $location['line']; 357 if (isset($location['original'])) { 358 $last_line += substr_count($location['original'], "\n"); 359 } 360 361 for ($l = $location['line']; $l <= $last_line; $l++) { 362 if (!empty($changed[$l])) { 363 return true; 364 } 365 } 366 } 367 368 return false; 369 } 370 371 final protected function getResultForPath($path) { 372 if (empty($this->results[$path])) { 373 $result = new ArcanistLintResult(); 374 $result->setPath($path); 375 $result->setCacheVersion($this->cacheVersion); 376 $this->results[$path] = $result; 377 } 378 return $this->results[$path]; 379 } 380 381 final public function getLineAndCharFromOffset($path, $offset) { 382 if (!isset($this->charToLine[$path])) { 383 $char_to_line = array(); 384 $line_to_first_char = array(); 385 386 $lines = explode("\n", $this->loadData($path)); 387 $line_number = 0; 388 $line_start = 0; 389 foreach ($lines as $line) { 390 $len = strlen($line) + 1; // Account for "\n". 391 $line_to_first_char[] = $line_start; 392 $line_start += $len; 393 for ($ii = 0; $ii < $len; $ii++) { 394 $char_to_line[] = $line_number; 395 } 396 $line_number++; 397 } 398 $this->charToLine[$path] = $char_to_line; 399 $this->lineToFirstChar[$path] = $line_to_first_char; 400 } 401 402 $line = $this->charToLine[$path][$offset]; 403 $char = $offset - $this->lineToFirstChar[$path][$line]; 404 405 return array($line, $char); 406 } 407 408 protected function getCacheVersion() { 409 return 1; 410 } 411 412 413 /** 414 * Get a named linter resource shared by another linter. 415 * 416 * This mechanism allows linters to share arbitrary resources, like the 417 * results of computation. If several linters need to perform the same 418 * expensive computation step, they can use a named resource to synchronize 419 * construction of the result so it doesn't need to be built multiple 420 * times. 421 * 422 * @param string Resource identifier. 423 * @param wild Optionally, default value to return if resource does not 424 * exist. 425 * @return wild Resource, or default value if not present. 426 */ 427 public function getLinterResource($key, $default = null) { 428 return idx($this->linterResources, $key, $default); 429 } 430 431 432 /** 433 * Set a linter resource that other linters can access. 434 * 435 * See @{method:getLinterResource} for a description of this mechanism. 436 * 437 * @param string Resource identifier. 438 * @param wild Resource. 439 * @return this 440 */ 441 public function setLinterResource($key, $value) { 442 $this->linterResources[$key] = $value; 443 return $this; 444 } 445 446 447 private function getRunnableLinters(array $linters) { 448 assert_instances_of($linters, 'ArcanistLinter'); 449 450 // TODO: The canRun() mechanism is only used by one linter, and just 451 // silently disables the linter. Almost every other linter handles this 452 // by throwing `ArcanistMissingLinterException`. Both mechanisms are not 453 // ideal; linters which can not run should emit a message, get marked as 454 // "skipped", and allow execution to continue. See T7045. 455 456 $runnable = array(); 457 foreach ($linters as $key => $linter) { 458 if ($linter->canRun()) { 459 $runnable[$key] = $linter; 460 } 461 } 462 463 return $runnable; 464 } 465 466 private function executeLinters(array $runnable) { 467 assert_instances_of($runnable, 'ArcanistLinter'); 468 469 $all_paths = $this->getPaths(); 470 $path_chunks = array_chunk($all_paths, 32, $preserve_keys = true); 471 472 $exception_lists = array(); 473 foreach ($path_chunks as $chunk) { 474 $exception_lists[] = $this->executeLintersOnChunk($runnable, $chunk); 475 } 476 477 return array_mergev($exception_lists); 478 } 479 480 481 private function executeLintersOnChunk(array $runnable, array $path_list) { 482 assert_instances_of($runnable, 'ArcanistLinter'); 483 484 $path_map = array_fuse($path_list); 485 486 $exceptions = array(); 487 $did_lint = array(); 488 foreach ($runnable as $linter) { 489 $linter_id = $linter->getLinterID(); 490 $paths = $linter->getPaths(); 491 492 foreach ($paths as $key => $path) { 493 // If we aren't running this path in the current chunk of paths, 494 // skip it completely. 495 if (empty($path_map[$path])) { 496 unset($paths[$key]); 497 continue; 498 } 499 500 // Make sure each path has a result generated, even if it is empty 501 // (i.e., the file has no lint messages). 502 $result = $this->getResultForPath($path); 503 504 // If a linter has stopped all other linters for this path, don't 505 // actually run the linter. 506 if (isset($this->stopped[$path])) { 507 unset($paths[$key]); 508 continue; 509 } 510 511 // If we have a cached result for this path, don't actually run the 512 // linter. 513 if (isset($this->cachedResults[$path][$this->cacheVersion])) { 514 $cached_result = $this->cachedResults[$path][$this->cacheVersion]; 515 516 $use_cache = $this->shouldUseCache( 517 $linter->getCacheGranularity(), 518 idx($cached_result, 'repository_version')); 519 520 if ($use_cache) { 521 unset($paths[$key]); 522 if (idx($cached_result, 'stopped') == $linter_id) { 523 $this->stopped[$path] = $linter_id; 524 } 525 } 526 } 527 } 528 529 $paths = array_values($paths); 530 531 if (!$paths) { 532 continue; 533 } 534 535 try { 536 $this->executeLinterOnPaths($linter, $paths); 537 $did_lint[] = array($linter, $paths); 538 } catch (Exception $ex) { 539 $exceptions[] = $ex; 540 } 541 } 542 543 foreach ($did_lint as $info) { 544 list($linter, $paths) = $info; 545 try { 546 $this->executeDidLintOnPaths($linter, $paths); 547 } catch (Exception $ex) { 548 $exceptions[] = $ex; 549 } 550 } 551 552 return $exceptions; 553 } 554 555 private function beginLintServiceCall(ArcanistLinter $linter, array $paths) { 556 $profiler = PhutilServiceProfiler::getInstance(); 557 558 return $profiler->beginServiceCall( 559 array( 560 'type' => 'lint', 561 'linter' => $linter->getInfoName(), 562 'paths' => $paths, 563 )); 564 } 565 566 private function endLintServiceCall($call_id) { 567 $profiler = PhutilServiceProfiler::getInstance(); 568 $profiler->endServiceCall($call_id, array()); 569 } 570 571 private function executeLinterOnPaths(ArcanistLinter $linter, array $paths) { 572 $call_id = $this->beginLintServiceCall($linter, $paths); 573 574 try { 575 $linter->willLintPaths($paths); 576 foreach ($paths as $path) { 577 $linter->setActivePath($path); 578 $linter->lintPath($path); 579 if ($linter->didStopAllLinters()) { 580 $this->stopped[$path] = $linter->getLinterID(); 581 } 582 } 583 } catch (Exception $ex) { 584 $this->endLintServiceCall($call_id); 585 throw $ex; 586 } 587 588 $this->endLintServiceCall($call_id); 589 } 590 591 private function executeDidLintOnPaths(ArcanistLinter $linter, array $paths) { 592 $call_id = $this->beginLintServiceCall($linter, $paths); 593 594 try { 595 $linter->didLintPaths($paths); 596 } catch (Exception $ex) { 597 $this->endLintServiceCall($call_id); 598 throw $ex; 599 } 600 601 $this->endLintServiceCall($call_id); 602 } 603 604 private function validateLintMessage( 605 ArcanistLinter $linter, 606 ArcanistLintMessage $message) { 607 608 $name = $message->getName(); 609 if (!strlen($name)) { 610 throw new Exception( 611 pht( 612 'Linter "%s" generated a lint message that is invalid because it '. 613 'does not have a name. Lint messages must have a name.', 614 get_class($linter))); 615 } 616 } 617 618} 619