1 /* Find near-matches for strings and identifiers. 2 Copyright (C) 2015-2019 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free 8 Software Foundation; either version 3, or (at your option) any later 9 version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #ifndef GCC_SPELLCHECK_H 21 #define GCC_SPELLCHECK_H 22 23 typedef unsigned int edit_distance_t; 24 const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX; 25 26 /* spellcheck.c */ 27 extern edit_distance_t 28 get_edit_distance (const char *s, int len_s, 29 const char *t, int len_t); 30 31 extern edit_distance_t 32 get_edit_distance (const char *s, const char *t); 33 34 extern const char * 35 find_closest_string (const char *target, 36 const auto_vec<const char *> *candidates); 37 38 /* A traits class for describing a string-like type usable by 39 class best_match. 40 Specializations should provide the implementations of the following: 41 42 static size_t get_length (TYPE); 43 static const char *get_string (TYPE); 44 45 get_string should return a non-NULL ptr, which does not need to be 46 0-terminated. */ 47 48 template <typename TYPE> 49 struct edit_distance_traits {}; 50 51 /* Specialization of edit_distance_traits for C-style strings. */ 52 53 template <> 54 struct edit_distance_traits<const char *> 55 { 56 static size_t get_length (const char *str) 57 { 58 gcc_assert (str); 59 return strlen (str); 60 } 61 62 static const char *get_string (const char *str) 63 { 64 gcc_assert (str); 65 return str; 66 } 67 }; 68 69 extern edit_distance_t get_edit_distance_cutoff (size_t goal_len, 70 size_t candidate_len); 71 72 /* A type for use when determining the best match against a string, 73 expressed as a template so that we can match against various 74 string-like types (const char *, frontend identifiers, and preprocessor 75 macros). 76 77 This type accumulates the best possible match against GOAL_TYPE for 78 a sequence of elements of CANDIDATE_TYPE, whilst minimizing the 79 number of calls to get_edit_distance and to 80 edit_distance_traits<T>::get_length. */ 81 82 template <typename GOAL_TYPE, typename CANDIDATE_TYPE> 83 class best_match 84 { 85 public: 86 typedef GOAL_TYPE goal_t; 87 typedef CANDIDATE_TYPE candidate_t; 88 typedef edit_distance_traits<goal_t> goal_traits; 89 typedef edit_distance_traits<candidate_t> candidate_traits; 90 91 /* Constructor. */ 92 93 best_match (GOAL_TYPE goal, 94 edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE) 95 : m_goal (goal_traits::get_string (goal)), 96 m_goal_len (goal_traits::get_length (goal)), 97 m_best_candidate (NULL), 98 m_best_distance (best_distance_so_far) 99 {} 100 101 /* Compare the edit distance between CANDIDATE and m_goal, 102 and if it's the best so far, record it. */ 103 104 void consider (candidate_t candidate) 105 { 106 size_t candidate_len = candidate_traits::get_length (candidate); 107 108 /* Calculate a lower bound on the candidate's distance to the goal, 109 based on the difference in lengths; it will require at least 110 this many insertions/deletions. */ 111 edit_distance_t min_candidate_distance 112 = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len); 113 114 /* If the candidate's length is sufficiently different to that 115 of the goal string, then the number of insertions/deletions 116 may be >= the best distance so far. If so, we can reject 117 the candidate immediately without needing to compute 118 the exact distance, since it won't be an improvement. */ 119 if (min_candidate_distance >= m_best_distance) 120 return; 121 122 /* If the candidate will be unable to beat the criterion in 123 get_best_meaningful_candidate, reject it without computing 124 the exact distance. */ 125 edit_distance_t cutoff = get_cutoff (candidate_len); 126 if (min_candidate_distance > cutoff) 127 return; 128 129 /* Otherwise, compute the distance and see if the candidate 130 has beaten the previous best value. */ 131 edit_distance_t dist 132 = get_edit_distance (m_goal, m_goal_len, 133 candidate_traits::get_string (candidate), 134 candidate_len); 135 if (dist < m_best_distance) 136 { 137 m_best_distance = dist; 138 m_best_candidate = candidate; 139 m_best_candidate_len = candidate_len; 140 } 141 } 142 143 /* Assuming that BEST_CANDIDATE is known to be better than 144 m_best_candidate, update (without recomputing the edit distance to 145 the goal). */ 146 147 void set_best_so_far (CANDIDATE_TYPE best_candidate, 148 edit_distance_t best_distance, 149 size_t best_candidate_len) 150 { 151 gcc_assert (best_distance < m_best_distance); 152 m_best_candidate = best_candidate; 153 m_best_distance = best_distance; 154 m_best_candidate_len = best_candidate_len; 155 } 156 157 /* Generate the maximum edit distance for which we consider a suggestion 158 to be meaningful, given a candidate of length CANDIDATE_LEN. */ 159 160 edit_distance_t get_cutoff (size_t candidate_len) const 161 { 162 return ::get_edit_distance_cutoff (m_goal_len, candidate_len); 163 } 164 165 /* Get the best candidate so far, but applying a filter to ensure 166 that we return NULL if none of the candidates are close to the goal, 167 to avoid offering nonsensical suggestions to the user. */ 168 169 candidate_t get_best_meaningful_candidate () const 170 { 171 /* If the edit distance is too high, the suggestion is likely to be 172 meaningless. */ 173 if (m_best_candidate) 174 { 175 edit_distance_t cutoff = get_cutoff (m_best_candidate_len); 176 if (m_best_distance > cutoff) 177 return NULL; 178 } 179 180 /* If the goal string somehow makes it into the candidate list, offering 181 it as a suggestion will be nonsensical e.g. 182 'constexpr' does not name a type; did you mean 'constexpr'? 183 Ultimately such suggestions are due to bugs in constructing the 184 candidate list, but as a band-aid, do not offer suggestions for 185 distance == 0 (where candidate == goal). */ 186 if (m_best_distance == 0) 187 return NULL; 188 189 return m_best_candidate; 190 } 191 192 /* Get the closest candidate so far, without applying any filtering. */ 193 194 candidate_t blithely_get_best_candidate () const 195 { 196 return m_best_candidate; 197 } 198 199 edit_distance_t get_best_distance () const { return m_best_distance; } 200 size_t get_best_candidate_length () const { return m_best_candidate_len; } 201 202 private: 203 const char *m_goal; 204 size_t m_goal_len; 205 candidate_t m_best_candidate; 206 edit_distance_t m_best_distance; 207 size_t m_best_candidate_len; 208 }; 209 210 #endif /* GCC_SPELLCHECK_H */ 211