1 /* Find near-matches for strings and identifiers.
2    Copyright (C) 2015-2019 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #ifndef GCC_SPELLCHECK_H
21 #define GCC_SPELLCHECK_H
22 
23 typedef unsigned int edit_distance_t;
24 const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
25 
26 /* spellcheck.c  */
27 extern edit_distance_t
28 get_edit_distance (const char *s, int len_s,
29 		   const char *t, int len_t);
30 
31 extern edit_distance_t
32 get_edit_distance (const char *s, const char *t);
33 
34 extern const char *
35 find_closest_string (const char *target,
36 		     const auto_vec<const char *> *candidates);
37 
38 /* A traits class for describing a string-like type usable by
39    class best_match.
40    Specializations should provide the implementations of the following:
41 
42      static size_t get_length (TYPE);
43      static const char *get_string (TYPE);
44 
45    get_string should return a non-NULL ptr, which does not need to be
46    0-terminated.  */
47 
48 template <typename TYPE>
49 struct edit_distance_traits {};
50 
51 /* Specialization of edit_distance_traits for C-style strings.  */
52 
53 template <>
54 struct edit_distance_traits<const char *>
55 {
56   static size_t get_length (const char *str)
57   {
58     gcc_assert (str);
59     return strlen (str);
60   }
61 
62   static const char *get_string (const char *str)
63   {
64     gcc_assert (str);
65     return str;
66   }
67 };
68 
69 extern edit_distance_t get_edit_distance_cutoff (size_t goal_len,
70 						 size_t candidate_len);
71 
72 /* A type for use when determining the best match against a string,
73    expressed as a template so that we can match against various
74    string-like types (const char *, frontend identifiers, and preprocessor
75    macros).
76 
77    This type accumulates the best possible match against GOAL_TYPE for
78    a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
79    number of calls to get_edit_distance and to
80    edit_distance_traits<T>::get_length.  */
81 
82 template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
83 class best_match
84 {
85  public:
86   typedef GOAL_TYPE goal_t;
87   typedef CANDIDATE_TYPE candidate_t;
88   typedef edit_distance_traits<goal_t> goal_traits;
89   typedef edit_distance_traits<candidate_t> candidate_traits;
90 
91   /* Constructor.  */
92 
93   best_match (GOAL_TYPE goal,
94 	      edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE)
95   : m_goal (goal_traits::get_string (goal)),
96     m_goal_len (goal_traits::get_length (goal)),
97     m_best_candidate (NULL),
98     m_best_distance (best_distance_so_far)
99   {}
100 
101   /* Compare the edit distance between CANDIDATE and m_goal,
102      and if it's the best so far, record it.  */
103 
104   void consider (candidate_t candidate)
105   {
106     size_t candidate_len = candidate_traits::get_length (candidate);
107 
108     /* Calculate a lower bound on the candidate's distance to the goal,
109        based on the difference in lengths; it will require at least
110        this many insertions/deletions.  */
111     edit_distance_t min_candidate_distance
112       = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
113 
114     /* If the candidate's length is sufficiently different to that
115        of the goal string, then the number of insertions/deletions
116        may be >= the best distance so far.  If so, we can reject
117        the candidate immediately without needing to compute
118        the exact distance, since it won't be an improvement.  */
119     if (min_candidate_distance >= m_best_distance)
120       return;
121 
122     /* If the candidate will be unable to beat the criterion in
123        get_best_meaningful_candidate, reject it without computing
124        the exact distance.  */
125     edit_distance_t cutoff = get_cutoff (candidate_len);
126     if (min_candidate_distance > cutoff)
127       return;
128 
129     /* Otherwise, compute the distance and see if the candidate
130        has beaten the previous best value.  */
131     edit_distance_t dist
132       = get_edit_distance (m_goal, m_goal_len,
133 			   candidate_traits::get_string (candidate),
134 			   candidate_len);
135     if (dist < m_best_distance)
136       {
137 	m_best_distance = dist;
138 	m_best_candidate = candidate;
139 	m_best_candidate_len = candidate_len;
140       }
141   }
142 
143   /* Assuming that BEST_CANDIDATE is known to be better than
144      m_best_candidate, update (without recomputing the edit distance to
145      the goal).  */
146 
147   void set_best_so_far (CANDIDATE_TYPE best_candidate,
148 			edit_distance_t best_distance,
149 			size_t best_candidate_len)
150   {
151     gcc_assert (best_distance < m_best_distance);
152     m_best_candidate = best_candidate;
153     m_best_distance = best_distance;
154     m_best_candidate_len = best_candidate_len;
155   }
156 
157   /* Generate the maximum edit distance for which we consider a suggestion
158      to be meaningful, given a candidate of length CANDIDATE_LEN.  */
159 
160   edit_distance_t get_cutoff (size_t candidate_len) const
161   {
162     return ::get_edit_distance_cutoff (m_goal_len, candidate_len);
163   }
164 
165   /* Get the best candidate so far, but applying a filter to ensure
166      that we return NULL if none of the candidates are close to the goal,
167      to avoid offering nonsensical suggestions to the user.  */
168 
169   candidate_t get_best_meaningful_candidate () const
170   {
171     /* If the edit distance is too high, the suggestion is likely to be
172        meaningless.  */
173     if (m_best_candidate)
174       {
175 	edit_distance_t cutoff = get_cutoff (m_best_candidate_len);
176 	if (m_best_distance > cutoff)
177 	  return NULL;
178     }
179 
180     /* If the goal string somehow makes it into the candidate list, offering
181        it as a suggestion will be nonsensical e.g.
182          'constexpr' does not name a type; did you mean 'constexpr'?
183        Ultimately such suggestions are due to bugs in constructing the
184        candidate list, but as a band-aid, do not offer suggestions for
185        distance == 0 (where candidate == goal).  */
186     if (m_best_distance == 0)
187       return NULL;
188 
189     return m_best_candidate;
190   }
191 
192   /* Get the closest candidate so far, without applying any filtering.  */
193 
194   candidate_t blithely_get_best_candidate () const
195   {
196     return m_best_candidate;
197   }
198 
199   edit_distance_t get_best_distance () const { return m_best_distance; }
200   size_t get_best_candidate_length () const { return m_best_candidate_len; }
201 
202  private:
203   const char *m_goal;
204   size_t m_goal_len;
205   candidate_t m_best_candidate;
206   edit_distance_t m_best_distance;
207   size_t m_best_candidate_len;
208 };
209 
210 #endif  /* GCC_SPELLCHECK_H  */
211