/* Find near-matches for strings and identifiers.
Copyright (C) 2015-2018 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
. */
#ifndef GCC_SPELLCHECK_H
#define GCC_SPELLCHECK_H
typedef unsigned int edit_distance_t;
const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
/* spellcheck.c */
extern edit_distance_t
levenshtein_distance (const char *s, int len_s,
const char *t, int len_t);
extern edit_distance_t
levenshtein_distance (const char *s, const char *t);
extern const char *
find_closest_string (const char *target,
const auto_vec *candidates);
/* A traits class for describing a string-like type usable by
class best_match.
Specializations should provide the implementations of the following:
static size_t get_length (TYPE);
static const char *get_string (TYPE);
get_string should return a non-NULL ptr, which does not need to be
0-terminated. */
template
struct edit_distance_traits {};
/* Specialization of edit_distance_traits for C-style strings. */
template <>
struct edit_distance_traits
{
static size_t get_length (const char *str)
{
gcc_assert (str);
return strlen (str);
}
static const char *get_string (const char *str)
{
gcc_assert (str);
return str;
}
};
/* A type for use when determining the best match against a string,
expressed as a template so that we can match against various
string-like types (const char *, frontend identifiers, and preprocessor
macros).
This type accumulates the best possible match against GOAL_TYPE for
a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
number of calls to levenshtein_distance and to
edit_distance_traits::get_length. */
template
class best_match
{
public:
typedef GOAL_TYPE goal_t;
typedef CANDIDATE_TYPE candidate_t;
typedef edit_distance_traits goal_traits;
typedef edit_distance_traits candidate_traits;
/* Constructor. */
best_match (GOAL_TYPE goal,
edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE)
: m_goal (goal_traits::get_string (goal)),
m_goal_len (goal_traits::get_length (goal)),
m_best_candidate (NULL),
m_best_distance (best_distance_so_far)
{}
/* Compare the edit distance between CANDIDATE and m_goal,
and if it's the best so far, record it. */
void consider (candidate_t candidate)
{
size_t candidate_len = candidate_traits::get_length (candidate);
/* Calculate a lower bound on the candidate's distance to the goal,
based on the difference in lengths; it will require at least
this many insertions/deletions. */
edit_distance_t min_candidate_distance
= abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
/* If the candidate's length is sufficiently different to that
of the goal string, then the number of insertions/deletions
may be >= the best distance so far. If so, we can reject
the candidate immediately without needing to compute
the exact distance, since it won't be an improvement. */
if (min_candidate_distance >= m_best_distance)
return;
/* If the candidate will be unable to beat the criterion in
get_best_meaningful_candidate, reject it without computing
the exact distance. */
unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2;
if (min_candidate_distance > cutoff)
return;
/* Otherwise, compute the distance and see if the candidate
has beaten the previous best value. */
edit_distance_t dist
= levenshtein_distance (m_goal, m_goal_len,
candidate_traits::get_string (candidate),
candidate_len);
if (dist < m_best_distance)
{
m_best_distance = dist;
m_best_candidate = candidate;
m_best_candidate_len = candidate_len;
}
}
/* Assuming that BEST_CANDIDATE is known to be better than
m_best_candidate, update (without recomputing the edit distance to
the goal). */
void set_best_so_far (CANDIDATE_TYPE best_candidate,
edit_distance_t best_distance,
size_t best_candidate_len)
{
gcc_assert (best_distance < m_best_distance);
m_best_candidate = best_candidate;
m_best_distance = best_distance;
m_best_candidate_len = best_candidate_len;
}
/* Get the best candidate so far, but applying a filter to ensure
that we return NULL if none of the candidates are close to the goal,
to avoid offering nonsensical suggestions to the user. */
candidate_t get_best_meaningful_candidate () const
{
/* If more than half of the letters were misspelled, the suggestion is
likely to be meaningless. */
if (m_best_candidate)
{
unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2;
if (m_best_distance > cutoff)
return NULL;
}
/* If the goal string somehow makes it into the candidate list, offering
it as a suggestion will be nonsensical e.g.
'constexpr' does not name a type; did you mean 'constexpr'?
Ultimately such suggestions are due to bugs in constructing the
candidate list, but as a band-aid, do not offer suggestions for
distance == 0 (where candidate == goal). */
if (m_best_distance == 0)
return NULL;
return m_best_candidate;
}
/* Get the closest candidate so far, without applying any filtering. */
candidate_t blithely_get_best_candidate () const
{
return m_best_candidate;
}
edit_distance_t get_best_distance () const { return m_best_distance; }
size_t get_best_candidate_length () const { return m_best_candidate_len; }
private:
const char *m_goal;
size_t m_goal_len;
candidate_t m_best_candidate;
edit_distance_t m_best_distance;
size_t m_best_candidate_len;
};
#endif /* GCC_SPELLCHECK_H */