1 /* Find near-matches for strings and identifiers. 2 Copyright (C) 2015-2018 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free 8 Software Foundation; either version 3, or (at your option) any later 9 version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #ifndef GCC_SPELLCHECK_H 21 #define GCC_SPELLCHECK_H 22 23 typedef unsigned int edit_distance_t; 24 const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX; 25 26 /* spellcheck.c */ 27 extern edit_distance_t 28 levenshtein_distance (const char *s, int len_s, 29 const char *t, int len_t); 30 31 extern edit_distance_t 32 levenshtein_distance (const char *s, const char *t); 33 34 extern const char * 35 find_closest_string (const char *target, 36 const auto_vec<const char *> *candidates); 37 38 /* A traits class for describing a string-like type usable by 39 class best_match. 40 Specializations should provide the implementations of the following: 41 42 static size_t get_length (TYPE); 43 static const char *get_string (TYPE); 44 45 get_string should return a non-NULL ptr, which does not need to be 46 0-terminated. */ 47 48 template <typename TYPE> 49 struct edit_distance_traits {}; 50 51 /* Specialization of edit_distance_traits for C-style strings. */ 52 53 template <> 54 struct edit_distance_traits<const char *> 55 { 56 static size_t get_length (const char *str) 57 { 58 gcc_assert (str); 59 return strlen (str); 60 } 61 62 static const char *get_string (const char *str) 63 { 64 gcc_assert (str); 65 return str; 66 } 67 }; 68 69 /* A type for use when determining the best match against a string, 70 expressed as a template so that we can match against various 71 string-like types (const char *, frontend identifiers, and preprocessor 72 macros). 73 74 This type accumulates the best possible match against GOAL_TYPE for 75 a sequence of elements of CANDIDATE_TYPE, whilst minimizing the 76 number of calls to levenshtein_distance and to 77 edit_distance_traits<T>::get_length. */ 78 79 template <typename GOAL_TYPE, typename CANDIDATE_TYPE> 80 class best_match 81 { 82 public: 83 typedef GOAL_TYPE goal_t; 84 typedef CANDIDATE_TYPE candidate_t; 85 typedef edit_distance_traits<goal_t> goal_traits; 86 typedef edit_distance_traits<candidate_t> candidate_traits; 87 88 /* Constructor. */ 89 90 best_match (GOAL_TYPE goal, 91 edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE) 92 : m_goal (goal_traits::get_string (goal)), 93 m_goal_len (goal_traits::get_length (goal)), 94 m_best_candidate (NULL), 95 m_best_distance (best_distance_so_far) 96 {} 97 98 /* Compare the edit distance between CANDIDATE and m_goal, 99 and if it's the best so far, record it. */ 100 101 void consider (candidate_t candidate) 102 { 103 size_t candidate_len = candidate_traits::get_length (candidate); 104 105 /* Calculate a lower bound on the candidate's distance to the goal, 106 based on the difference in lengths; it will require at least 107 this many insertions/deletions. */ 108 edit_distance_t min_candidate_distance 109 = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len); 110 111 /* If the candidate's length is sufficiently different to that 112 of the goal string, then the number of insertions/deletions 113 may be >= the best distance so far. If so, we can reject 114 the candidate immediately without needing to compute 115 the exact distance, since it won't be an improvement. */ 116 if (min_candidate_distance >= m_best_distance) 117 return; 118 119 /* If the candidate will be unable to beat the criterion in 120 get_best_meaningful_candidate, reject it without computing 121 the exact distance. */ 122 unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2; 123 if (min_candidate_distance > cutoff) 124 return; 125 126 /* Otherwise, compute the distance and see if the candidate 127 has beaten the previous best value. */ 128 edit_distance_t dist 129 = levenshtein_distance (m_goal, m_goal_len, 130 candidate_traits::get_string (candidate), 131 candidate_len); 132 if (dist < m_best_distance) 133 { 134 m_best_distance = dist; 135 m_best_candidate = candidate; 136 m_best_candidate_len = candidate_len; 137 } 138 } 139 140 /* Assuming that BEST_CANDIDATE is known to be better than 141 m_best_candidate, update (without recomputing the edit distance to 142 the goal). */ 143 144 void set_best_so_far (CANDIDATE_TYPE best_candidate, 145 edit_distance_t best_distance, 146 size_t best_candidate_len) 147 { 148 gcc_assert (best_distance < m_best_distance); 149 m_best_candidate = best_candidate; 150 m_best_distance = best_distance; 151 m_best_candidate_len = best_candidate_len; 152 } 153 154 /* Get the best candidate so far, but applying a filter to ensure 155 that we return NULL if none of the candidates are close to the goal, 156 to avoid offering nonsensical suggestions to the user. */ 157 158 candidate_t get_best_meaningful_candidate () const 159 { 160 /* If more than half of the letters were misspelled, the suggestion is 161 likely to be meaningless. */ 162 if (m_best_candidate) 163 { 164 unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2; 165 if (m_best_distance > cutoff) 166 return NULL; 167 } 168 169 /* If the goal string somehow makes it into the candidate list, offering 170 it as a suggestion will be nonsensical e.g. 171 'constexpr' does not name a type; did you mean 'constexpr'? 172 Ultimately such suggestions are due to bugs in constructing the 173 candidate list, but as a band-aid, do not offer suggestions for 174 distance == 0 (where candidate == goal). */ 175 if (m_best_distance == 0) 176 return NULL; 177 178 return m_best_candidate; 179 } 180 181 /* Get the closest candidate so far, without applying any filtering. */ 182 183 candidate_t blithely_get_best_candidate () const 184 { 185 return m_best_candidate; 186 } 187 188 edit_distance_t get_best_distance () const { return m_best_distance; } 189 size_t get_best_candidate_length () const { return m_best_candidate_len; } 190 191 private: 192 const char *m_goal; 193 size_t m_goal_len; 194 candidate_t m_best_candidate; 195 edit_distance_t m_best_distance; 196 size_t m_best_candidate_len; 197 }; 198 199 #endif /* GCC_SPELLCHECK_H */ 200