1# -*- coding: utf-8 -*- # 2# Copyright 2016 Google LLC. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Methods for suggesting corrections to command typos.""" 17 18from __future__ import absolute_import 19from __future__ import division 20from __future__ import unicode_literals 21 22import collections 23import os 24import re 25 26from googlecloudsdk.command_lib.static_completion import lookup 27from googlecloudsdk.core import log 28from googlecloudsdk.core.util import files 29import six 30 31 32# Command noun and verb variants mapped to most likely gcloud counterpart. 33SYNONYMS = { 34 'change': 'update', 35 # TODO(b/119555246): Delete 'copy-files' entry once 36 # 'gcloud compute copy-files' is removed. 37 'copy-files': 'scp', 38 'create': 'add', 39 'delete': 'remove', 40 'describe': 'get', 41 # TODO(b/119550681): Delete 'docker' entry once 'gcloud docker' is removed. 42 'docker': 'auth-configure-docker', 43 'get': 'describe', 44 'image': 'images', 45 'instance': 'instances', 46 'instances': 'instance', 47 'make': 'create', 48 'modify': 'update', 49 'patch': 'update', 50 'remove': 'delete', 51 'show': 'describe', 52} 53 54MIN_RATIO = 0.7 # Minimum score/top_score ratio of accepted suggestions. 55MIN_SUGGESTED_GROUPS = 4 # Check for group prefix if less groups than this. 56MAX_SUGGESTIONS = 10 # Maximum number of suggestions. 57# Factor to multiply logged command frequencies by before incrementing 58# canonical command scores. 59FREQUENCY_FACTOR = 100 60 61 62def _GetSurfaceHistoryFrequencies(logs_dir): 63 """Load the last 100 surfaces user used today from local command history. 64 65 Args: 66 logs_dir: str, the path to today's logs directory 67 68 Returns: 69 dict mapping surfaces to normalized frequencies. 70 """ 71 surfaces_count = collections.defaultdict(int) 72 if not logs_dir: 73 return surfaces_count 74 total = 0 75 last_100_invocations = sorted(os.listdir(logs_dir), reverse=True)[:100] 76 for filename in last_100_invocations: 77 file_path = os.path.join(logs_dir, filename) 78 with files.FileReader(file_path) as log_file: 79 for line in log_file: 80 match = re.search(log.USED_SURFACE_PATTERN, line) 81 if match: 82 surface = match.group(1) 83 total += 1 84 surfaces_count[surface] += 1 85 # normalize surface frequencies 86 return {surface: count / total 87 for surface, count in six.iteritems(surfaces_count)} 88 89 90def _GetCanonicalCommandsHelper(tree, results, prefix): 91 """Helper method to _GetCanonicalCommands. 92 93 Args: 94 tree: The root of the tree that will be traversed to find commands. 95 results: The results list to append to. 96 prefix: [str], the canonical command line words so far. Once we reach 97 a leaf node, prefix contains a canonical command and a copy is 98 appended to results. 99 100 Returns: 101 None 102 """ 103 if not tree.get(lookup.LOOKUP_COMMANDS): 104 results.append(prefix[:]) 105 return 106 for command, command_tree in six.iteritems(tree[lookup.LOOKUP_COMMANDS]): 107 prefix.append(command) 108 _GetCanonicalCommandsHelper(command_tree, results, prefix) 109 prefix.pop() 110 111 112def _GetCanonicalCommands(tree): 113 """Return list of all canonical commands in CLI tree in arbitrary order. 114 115 Args: 116 tree: The root of the tree that will be traversed to find commands. 117 118 Returns: 119 [[canonical_command_words]]: List of lists, all possible sequences of 120 canonical command words in the tree. 121 """ 122 results = [] 123 _GetCanonicalCommandsHelper(tree, results, prefix=[]) 124 return results 125 126 127def _WordScore(index, normalized_command_word, 128 canonical_command_word, canonical_command_length): 129 """Returns the integer word match score for a command word. 130 131 Args: 132 index: The position of the word in the command. 133 normalized_command_word: The normalized command word. 134 canonical_command_word: The actual command word to compare with. 135 canonical_command_length: The length of the actual command. 136 137 Returns: 138 The integer word match score, always >= 0. 139 """ 140 score = 0 141 142 # The match can go either way. 143 if normalized_command_word in canonical_command_word: 144 shorter_word = normalized_command_word 145 longer_word = canonical_command_word 146 elif canonical_command_word in normalized_command_word: 147 shorter_word = canonical_command_word 148 longer_word = normalized_command_word 149 else: 150 return score 151 152 # Inner match must be a word boundary. 153 hit = longer_word.find(shorter_word) 154 if hit > 0 and longer_word[hit-1] != '-': 155 return score 156 157 # Partial hit. 158 score += 10 159 160 # Prefer a match in less words. 161 if canonical_command_length == 1: 162 score += 30 163 elif canonical_command_length == 2: 164 score += 20 165 elif canonical_command_length == 3: 166 score += 10 167 168 # Prefer a match in order. 169 if index == 0: 170 score += 25 171 elif index == 1: 172 score += 15 173 else: 174 score += 5 175 176 # Prefer matching more chars and beginning of word. 177 # This also handles minor suffix diffs, like singular vs. plural. 178 extra = len(longer_word) - len(shorter_word) 179 if extra <= 2: 180 extra = 3 - extra 181 if longer_word.startswith(shorter_word): 182 extra *= 2 183 score += extra 184 185 # Prefer matching on surface words. 186 if index == 0 and canonical_command_length > 1: 187 score += 30 188 # Also prefer matching on group words. 189 elif index > 0 and canonical_command_length > index + 1: 190 score += 15 191 192 return score 193 194 195def _GetScoredCommandsContaining(command_words): 196 """Return scored canonical commands containing input command words. 197 198 Args: 199 command_words: List of input command words. 200 201 Returns: 202 [(canonical_command_words, score)]: List of tuples, where 203 canonical_command_words is a list of strings and score is an integer > 0. 204 The tuples are sorted from highest score to lowest, and commands with 205 the same score appear in lexicographic order. 206 """ 207 root = lookup.LoadCompletionCliTree() 208 surface_history = _GetSurfaceHistoryFrequencies(log.GetLogDir()) 209 normalized_command_words = [command_word.lower().replace('_', '-') 210 for command_word in command_words] 211 scored_commands = [] 212 all_canonical_commands = _GetCanonicalCommands(root) 213 canonical_command_set = set(map(tuple, all_canonical_commands)) 214 for canonical_command_words in all_canonical_commands: 215 canonical_command_length = len(canonical_command_words) 216 matched = set() 217 score = 0 218 for index, canonical_command_word in enumerate(canonical_command_words): 219 for normalized_command_word in normalized_command_words: 220 # Prefer the higher score of the normalized word or its synonym if any. 221 increment = _WordScore(index, 222 normalized_command_word, 223 canonical_command_word, 224 canonical_command_length) 225 alternate_command_word = SYNONYMS.get(normalized_command_word) 226 if alternate_command_word: 227 alternate_increment = _WordScore(index, 228 alternate_command_word, 229 canonical_command_word, 230 canonical_command_length) 231 if increment < alternate_increment: 232 increment = alternate_increment 233 if increment: 234 matched.add(normalized_command_word) 235 score += increment 236 237 # Prefer all command words to match. 238 if len(matched) == len(normalized_command_words): 239 score += 10 240 # 0 score is always ignored, no need to save. 241 if score > 0: 242 surface = '.'.join(canonical_command_words[:-1]) 243 if surface in surface_history: 244 score += int(surface_history[surface] * FREQUENCY_FACTOR) 245 # We want to display `alpha` and `beta` commands in the Maybe You Mean 246 # list as well, however we should display them with a lower confidence 247 # score, and not display them if their higher track counterpart exists. 248 better_track_exists = False 249 if 'alpha' == canonical_command_words[0]: 250 score -= 5 251 if tuple(canonical_command_words[1:]) in canonical_command_set: 252 better_track_exists = True 253 if tuple(['beta'] + 254 canonical_command_words[1:]) in canonical_command_set: 255 better_track_exists = True 256 if 'beta' == canonical_command_words[0]: 257 score -= 5 258 if tuple(canonical_command_words[1:]) in canonical_command_set: 259 better_track_exists = True 260 if not better_track_exists: 261 scored_commands.append((canonical_command_words, score)) 262 263 # Sort scores descending, commands ascending. 264 scored_commands.sort(key=lambda tuple: (-tuple[1], tuple[0])) 265 return scored_commands 266 267 268def GetCommandSuggestions(command_words): 269 """Return suggested commands containing input command words. 270 271 Args: 272 command_words: List of input command words. 273 274 Returns: 275 [command]: A list of canonical command strings with 'gcloud' prepended. Only 276 commands whose scores have a ratio of at least MIN_RATIO against the top 277 score are returned. At most MAX_SUGGESTIONS command strings are returned. 278 If many commands from the same group are being suggested, then the common 279 groups are suggested instead. 280 """ 281 suggested_commands = [] 282 try: 283 scored_commands = _GetScoredCommandsContaining(command_words) 284 except lookup.CannotHandleCompletionError: 285 # Don't crash error reports on static completion misconfiguration. 286 scored_commands = None 287 if not scored_commands: 288 return suggested_commands 289 290 # Scores are greater than zero and sorted highest to lowest. 291 top_score = float(scored_commands[0][1]) 292 too_many = False 293 suggested_groups = set() 294 for command, score in scored_commands: 295 if score / top_score >= MIN_RATIO: 296 suggested_commands.append(' '.join(['gcloud'] + command)) 297 suggested_groups.add(' '.join(command[:-1])) 298 if len(suggested_commands) >= MAX_SUGGESTIONS: 299 too_many = True 300 break 301 302 # Too many most likely indicates the suggested commands have common groups. 303 if too_many and len(suggested_groups) < MIN_SUGGESTED_GROUPS: 304 min_length = len(scored_commands[0][0]) 305 for command, score in scored_commands: 306 if score / top_score < MIN_RATIO: 307 break 308 if min_length > len(command): 309 min_length = len(command) 310 common_length = min_length - 1 311 if common_length: 312 suggested_groups = set() 313 for command, score in scored_commands: 314 if score / top_score < MIN_RATIO: 315 break 316 suggested_groups.add(' '.join(['gcloud'] + command[:common_length])) 317 if len(suggested_groups) >= MAX_SUGGESTIONS: 318 break 319 suggested_commands = sorted(suggested_groups) 320 321 return suggested_commands 322