1<?php
2
3/**
4 * Enforces basic spelling. Spelling inside code is actually pretty hard to
5 * get right without false positives. I take a conservative approach and just
6 * use a blacklisted set of words that are commonly spelled incorrectly.
7 */
8final class ArcanistSpellingLinter extends ArcanistLinter {
9
10  const LINT_SPELLING_EXACT   = 1;
11  const LINT_SPELLING_PARTIAL = 2;
12
13  private $dictionaries     = array();
14  private $exactWordRules   = array();
15  private $partialWordRules = array();
16
17  public function getInfoName() {
18    return pht('Spellchecker');
19  }
20
21  public function getInfoDescription() {
22    return pht('Detects common misspellings of English words.');
23  }
24
25  public function getLinterName() {
26    return 'SPELL';
27  }
28
29  public function getLinterConfigurationName() {
30    return 'spelling';
31  }
32
33  public function getLinterConfigurationOptions() {
34    $options = array(
35      'spelling.dictionaries' => array(
36        'type' => 'optional list<string>',
37        'help' => pht('Pass in custom dictionaries.'),
38      ),
39    );
40
41    return $options + parent::getLinterConfigurationOptions();
42  }
43
44  public function setLinterConfigurationValue($key, $value) {
45    switch ($key) {
46      case 'spelling.dictionaries':
47        foreach ($value as $dictionary) {
48          $this->loadDictionary($dictionary);
49        }
50        return;
51    }
52
53    return parent::setLinterConfigurationValue($key, $value);
54  }
55
56  public function loadDictionary($path) {
57    $root = $this->getProjectRoot();
58    $path = Filesystem::resolvePath($path, $root);
59
60    $dict = phutil_json_decode(Filesystem::readFile($path));
61    PhutilTypeSpec::checkMap(
62      $dict,
63      array(
64        'rules' => 'map<string, map<string, string>>',
65      ));
66    $rules = $dict['rules'];
67
68    $this->dictionaries[] = $path;
69    $this->exactWordRules = array_merge(
70      $this->exactWordRules,
71      idx($rules, 'exact', array()));
72    $this->partialWordRules = array_merge(
73      $this->partialWordRules,
74      idx($rules, 'partial', array()));
75  }
76
77  public function addExactWordRule($misspelling, $correction) {
78    $this->exactWordRules = array_merge(
79      $this->exactWordRules,
80      array($misspelling => $correction));
81    return $this;
82  }
83
84  public function addPartialWordRule($misspelling, $correction) {
85    $this->partialWordRules = array_merge(
86      $this->partialWordRules,
87      array($misspelling => $correction));
88    return $this;
89  }
90
91  public function getLintSeverityMap() {
92    return array(
93      self::LINT_SPELLING_EXACT   => ArcanistLintSeverity::SEVERITY_WARNING,
94      self::LINT_SPELLING_PARTIAL => ArcanistLintSeverity::SEVERITY_WARNING,
95    );
96  }
97
98  public function getLintNameMap() {
99    return array(
100      self::LINT_SPELLING_EXACT   => pht('Possible Spelling Mistake'),
101      self::LINT_SPELLING_PARTIAL => pht('Possible Spelling Mistake'),
102    );
103  }
104
105  public function lintPath($path) {
106    // TODO: This is a bit hacky. If no dictionaries were specified, then add
107    // the default dictionary.
108    if (!$this->dictionaries) {
109      $root = dirname(phutil_get_library_root('arcanist'));
110      $this->loadDictionary($root.'/resources/spelling/english.json');
111    }
112
113    foreach ($this->exactWordRules as $misspelling => $correction) {
114      $this->checkExactWord($path, $misspelling, $correction);
115    }
116
117    foreach ($this->partialWordRules as $misspelling => $correction) {
118      $this->checkPartialWord($path, $misspelling, $correction);
119    }
120  }
121
122  private function checkExactWord($path, $word, $correction) {
123    $text = $this->getData($path);
124    $matches = array();
125    $num_matches = preg_match_all(
126      '#\b'.preg_quote($word, '#').'\b#i',
127      $text,
128      $matches,
129      PREG_OFFSET_CAPTURE);
130    if (!$num_matches) {
131      return;
132    }
133    foreach ($matches[0] as $match) {
134      $original = $match[0];
135      $replacement = self::fixLetterCase($correction, $original);
136      $this->raiseLintAtOffset(
137        $match[1],
138        self::LINT_SPELLING_EXACT,
139        pht(
140          "Possible spelling error. You wrote '%s', but did you mean '%s'?",
141          $word,
142          $correction),
143        $original,
144        $replacement);
145    }
146  }
147
148  private function checkPartialWord($path, $word, $correction) {
149    $text = $this->getData($path);
150    $pos = 0;
151    while ($pos < strlen($text)) {
152      $next = stripos($text, $word, $pos);
153      if ($next === false) {
154        return;
155      }
156      $original = substr($text, $next, strlen($word));
157      $replacement = self::fixLetterCase($correction, $original);
158      $this->raiseLintAtOffset(
159        $next,
160        self::LINT_SPELLING_PARTIAL,
161        pht(
162          "Possible spelling error. You wrote '%s', but did you mean '%s'?",
163          $word,
164          $correction),
165        $original,
166        $replacement);
167      $pos = $next + 1;
168    }
169  }
170
171  public static function fixLetterCase($string, $case) {
172    switch ($case) {
173      case strtolower($case):
174        return strtolower($string);
175      case strtoupper($case):
176        return strtoupper($string);
177      case ucwords(strtolower($case)):
178        return ucwords(strtolower($string));
179      default:
180        return null;
181    }
182  }
183
184}
185