1# -*- coding: utf-8 -*- #
2# Copyright 2016 Google LLC. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#    http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Methods for suggesting corrections to command typos."""
17
18from __future__ import absolute_import
19from __future__ import division
20from __future__ import unicode_literals
21
22import collections
23import os
24import re
25
26from googlecloudsdk.command_lib.static_completion import lookup
27from googlecloudsdk.core import log
28from googlecloudsdk.core.util import files
29import six
30
31
32# Command noun and verb variants mapped to most likely gcloud counterpart.
33SYNONYMS = {
34    'change': 'update',
35    # TODO(b/119555246): Delete 'copy-files' entry once
36    # 'gcloud compute copy-files' is removed.
37    'copy-files': 'scp',
38    'create': 'add',
39    'delete': 'remove',
40    'describe': 'get',
41    # TODO(b/119550681): Delete 'docker' entry once 'gcloud docker' is removed.
42    'docker': 'auth-configure-docker',
43    'get': 'describe',
44    'image': 'images',
45    'instance': 'instances',
46    'instances': 'instance',
47    'make': 'create',
48    'modify': 'update',
49    'patch': 'update',
50    'remove': 'delete',
51    'show': 'describe',
52}
53
54MIN_RATIO = 0.7  # Minimum score/top_score ratio of accepted suggestions.
55MIN_SUGGESTED_GROUPS = 4  # Check for group prefix if less groups than this.
56MAX_SUGGESTIONS = 10  # Maximum number of suggestions.
57# Factor to multiply logged command frequencies by before incrementing
58# canonical command scores.
59FREQUENCY_FACTOR = 100
60
61
62def _GetSurfaceHistoryFrequencies(logs_dir):
63  """Load the last 100 surfaces user used today from local command history.
64
65  Args:
66    logs_dir: str, the path to today's logs directory
67
68  Returns:
69    dict mapping surfaces to normalized frequencies.
70  """
71  surfaces_count = collections.defaultdict(int)
72  if not logs_dir:
73    return surfaces_count
74  total = 0
75  last_100_invocations = sorted(os.listdir(logs_dir), reverse=True)[:100]
76  for filename in last_100_invocations:
77    file_path = os.path.join(logs_dir, filename)
78    with files.FileReader(file_path) as log_file:
79      for line in log_file:
80        match = re.search(log.USED_SURFACE_PATTERN, line)
81        if match:
82          surface = match.group(1)
83          total += 1
84          surfaces_count[surface] += 1
85  # normalize surface frequencies
86  return {surface: count / total
87          for surface, count in six.iteritems(surfaces_count)}
88
89
90def _GetCanonicalCommandsHelper(tree, results, prefix):
91  """Helper method to _GetCanonicalCommands.
92
93  Args:
94    tree: The root of the tree that will be traversed to find commands.
95    results: The results list to append to.
96    prefix: [str], the canonical command line words so far. Once we reach
97      a leaf node, prefix contains a canonical command and a copy is
98      appended to results.
99
100  Returns:
101    None
102  """
103  if not tree.get(lookup.LOOKUP_COMMANDS):
104    results.append(prefix[:])
105    return
106  for command, command_tree in six.iteritems(tree[lookup.LOOKUP_COMMANDS]):
107    prefix.append(command)
108    _GetCanonicalCommandsHelper(command_tree, results, prefix)
109    prefix.pop()
110
111
112def _GetCanonicalCommands(tree):
113  """Return list of all canonical commands in CLI tree in arbitrary order.
114
115  Args:
116    tree: The root of the tree that will be traversed to find commands.
117
118  Returns:
119    [[canonical_command_words]]: List of lists, all possible sequences of
120      canonical command words in the tree.
121  """
122  results = []
123  _GetCanonicalCommandsHelper(tree, results, prefix=[])
124  return results
125
126
127def _WordScore(index, normalized_command_word,
128               canonical_command_word, canonical_command_length):
129  """Returns the integer word match score for a command word.
130
131  Args:
132    index: The position of the word in the command.
133    normalized_command_word: The normalized command word.
134    canonical_command_word: The actual command word to compare with.
135    canonical_command_length: The length of the actual command.
136
137  Returns:
138    The integer word match score, always >= 0.
139  """
140  score = 0
141
142  # The match can go either way.
143  if normalized_command_word in canonical_command_word:
144    shorter_word = normalized_command_word
145    longer_word = canonical_command_word
146  elif canonical_command_word in normalized_command_word:
147    shorter_word = canonical_command_word
148    longer_word = normalized_command_word
149  else:
150    return score
151
152  # Inner match must be a word boundary.
153  hit = longer_word.find(shorter_word)
154  if hit > 0 and longer_word[hit-1] != '-':
155    return score
156
157  # Partial hit.
158  score += 10
159
160  # Prefer a match in less words.
161  if canonical_command_length == 1:
162    score += 30
163  elif canonical_command_length == 2:
164    score += 20
165  elif canonical_command_length == 3:
166    score += 10
167
168  # Prefer a match in order.
169  if index == 0:
170    score += 25
171  elif index == 1:
172    score += 15
173  else:
174    score += 5
175
176  # Prefer matching more chars and beginning of word.
177  # This also handles minor suffix diffs, like singular vs. plural.
178  extra = len(longer_word) - len(shorter_word)
179  if extra <= 2:
180    extra = 3 - extra
181    if longer_word.startswith(shorter_word):
182      extra *= 2
183    score += extra
184
185  # Prefer matching on surface words.
186  if index == 0 and canonical_command_length > 1:
187    score += 30
188  # Also prefer matching on group words.
189  elif index > 0 and canonical_command_length > index + 1:
190    score += 15
191
192  return score
193
194
195def _GetScoredCommandsContaining(command_words):
196  """Return scored canonical commands containing input command words.
197
198  Args:
199    command_words: List of input command words.
200
201  Returns:
202    [(canonical_command_words, score)]: List of tuples, where
203      canonical_command_words is a list of strings and score is an integer > 0.
204      The tuples are sorted from highest score to lowest, and commands with
205      the same score appear in lexicographic order.
206  """
207  root = lookup.LoadCompletionCliTree()
208  surface_history = _GetSurfaceHistoryFrequencies(log.GetLogDir())
209  normalized_command_words = [command_word.lower().replace('_', '-')
210                              for command_word in command_words]
211  scored_commands = []
212  all_canonical_commands = _GetCanonicalCommands(root)
213  canonical_command_set = set(map(tuple, all_canonical_commands))
214  for canonical_command_words in all_canonical_commands:
215    canonical_command_length = len(canonical_command_words)
216    matched = set()
217    score = 0
218    for index, canonical_command_word in enumerate(canonical_command_words):
219      for normalized_command_word in normalized_command_words:
220        # Prefer the higher score of the normalized word or its synonym if any.
221        increment = _WordScore(index,
222                               normalized_command_word,
223                               canonical_command_word,
224                               canonical_command_length)
225        alternate_command_word = SYNONYMS.get(normalized_command_word)
226        if alternate_command_word:
227          alternate_increment = _WordScore(index,
228                                           alternate_command_word,
229                                           canonical_command_word,
230                                           canonical_command_length)
231          if increment < alternate_increment:
232            increment = alternate_increment
233        if increment:
234          matched.add(normalized_command_word)
235          score += increment
236
237    # Prefer all command words to match.
238    if len(matched) == len(normalized_command_words):
239      score += 10
240    # 0 score is always ignored, no need to save.
241    if score > 0:
242      surface = '.'.join(canonical_command_words[:-1])
243      if surface in surface_history:
244        score += int(surface_history[surface] * FREQUENCY_FACTOR)
245      # We want to display `alpha` and `beta` commands in the Maybe You Mean
246      # list as well, however we should display them with a lower confidence
247      # score, and not display them if their higher track counterpart exists.
248      better_track_exists = False
249      if 'alpha' == canonical_command_words[0]:
250        score -= 5
251        if tuple(canonical_command_words[1:]) in canonical_command_set:
252          better_track_exists = True
253        if tuple(['beta'] +
254                 canonical_command_words[1:]) in canonical_command_set:
255          better_track_exists = True
256      if 'beta' == canonical_command_words[0]:
257        score -= 5
258        if tuple(canonical_command_words[1:]) in canonical_command_set:
259          better_track_exists = True
260      if not better_track_exists:
261        scored_commands.append((canonical_command_words, score))
262
263  # Sort scores descending, commands ascending.
264  scored_commands.sort(key=lambda tuple: (-tuple[1], tuple[0]))
265  return scored_commands
266
267
268def GetCommandSuggestions(command_words):
269  """Return suggested commands containing input command words.
270
271  Args:
272    command_words: List of input command words.
273
274  Returns:
275    [command]: A list of canonical command strings with 'gcloud' prepended. Only
276      commands whose scores have a ratio of at least MIN_RATIO against the top
277      score are returned. At most MAX_SUGGESTIONS command strings are returned.
278      If many commands from the same group are being suggested, then the common
279      groups are suggested instead.
280  """
281  suggested_commands = []
282  try:
283    scored_commands = _GetScoredCommandsContaining(command_words)
284  except lookup.CannotHandleCompletionError:
285    # Don't crash error reports on static completion misconfiguration.
286    scored_commands = None
287  if not scored_commands:
288    return suggested_commands
289
290  # Scores are greater than zero and sorted highest to lowest.
291  top_score = float(scored_commands[0][1])
292  too_many = False
293  suggested_groups = set()
294  for command, score in scored_commands:
295    if score / top_score >= MIN_RATIO:
296      suggested_commands.append(' '.join(['gcloud'] + command))
297      suggested_groups.add(' '.join(command[:-1]))
298      if len(suggested_commands) >= MAX_SUGGESTIONS:
299        too_many = True
300        break
301
302  # Too many most likely indicates the suggested commands have common groups.
303  if too_many and len(suggested_groups) < MIN_SUGGESTED_GROUPS:
304    min_length = len(scored_commands[0][0])
305    for command, score in scored_commands:
306      if score / top_score < MIN_RATIO:
307        break
308      if min_length > len(command):
309        min_length = len(command)
310    common_length = min_length - 1
311    if common_length:
312      suggested_groups = set()
313      for command, score in scored_commands:
314        if score / top_score < MIN_RATIO:
315          break
316        suggested_groups.add(' '.join(['gcloud'] + command[:common_length]))
317        if len(suggested_groups) >= MAX_SUGGESTIONS:
318          break
319      suggested_commands = sorted(suggested_groups)
320
321  return suggested_commands
322