1# Copyright 2010 United States Government as represented by the 2# Administrator of the National Aeronautics and Space Administration. 3# All Rights Reserved. 4# Copyright (c) 2010 Citrix Systems, Inc. 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); you may 7# not use this file except in compliance with the License. You may obtain 8# a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 15# License for the specific language governing permissions and limitations 16# under the License. 17 18""" 19Helper methods to deal with images. 20 21.. versionadded:: 3.1 22 23.. versionchanged:: 3.14.0 24 add paramter format. 25 26""" 27 28import json 29import re 30 31import debtcollector 32 33from oslo_utils._i18n import _ 34from oslo_utils import strutils 35 36 37class QemuImgInfo(object): 38 """Parse Qemu image information from command `qemu-img info`'s output. 39 40 The instance of :class:`QemuImgInfo` has properties: `image`, 41 `backing_file`, `file_format`, `virtual_size`, `cluster_size`, 42 `disk_size`, `snapshots` and `encrypted`. 43 44 The parameter format can be set to 'json' or 'human'. With 'json' format 45 output, qemu image information will be parsed more easily and readable. 46 However 'human' format support will be dropped in next cycle and only 47 'json' format will be supported. Prefer to use 'json' instead of 'human'. 48 """ 49 BACKING_FILE_RE = re.compile((r"^(.*?)\s*\(actual\s+path\s*:" 50 r"\s+(.*?)\)\s*$"), re.I) 51 TOP_LEVEL_RE = re.compile(r"^([\w\d\s\_\-]+):(.*)$") 52 SIZE_RE = re.compile(r"([0-9]+[eE][-+][0-9]+|\d*\.?\d+)" 53 r"\s*(\w+)?(\s*\(\s*(\d+)\s+bytes\s*\))?", 54 re.I) 55 56 def __init__(self, cmd_output=None, format='human'): 57 if format == 'json': 58 details = json.loads(cmd_output or '{}') 59 self.image = details.get('filename') 60 self.backing_file = details.get('backing-filename') 61 self.backing_file_format = details.get('backing-filename-format') 62 self.file_format = details.get('format') 63 self.virtual_size = details.get('virtual-size') 64 self.cluster_size = details.get('cluster-size') 65 self.disk_size = details.get('actual-size') 66 self.snapshots = details.get('snapshots', []) 67 self.encrypted = 'yes' if details.get('encrypted') else None 68 self.format_specific = details.get('format-specific') 69 else: 70 if cmd_output is not None: 71 debtcollector.deprecate( 72 'The human format is deprecated and the format parameter ' 73 'will be removed. Use explicitly json instead', 74 version="xena", 75 category=FutureWarning) 76 details = self._parse(cmd_output or '') 77 self.image = details.get('image') 78 self.backing_file = details.get('backing_file') 79 self.backing_file_format = details.get('backing_file_format') 80 self.file_format = details.get('file_format') 81 self.virtual_size = details.get('virtual_size') 82 self.cluster_size = details.get('cluster_size') 83 self.disk_size = details.get('disk_size') 84 self.snapshots = details.get('snapshot_list', []) 85 self.encrypted = details.get('encrypted') 86 self.format_specific = None 87 88 def __str__(self): 89 lines = [ 90 'image: %s' % self.image, 91 'file_format: %s' % self.file_format, 92 'virtual_size: %s' % self.virtual_size, 93 'disk_size: %s' % self.disk_size, 94 'cluster_size: %s' % self.cluster_size, 95 'backing_file: %s' % self.backing_file, 96 'backing_file_format: %s' % self.backing_file_format, 97 ] 98 if self.snapshots: 99 lines.append("snapshots: %s" % self.snapshots) 100 if self.encrypted: 101 lines.append("encrypted: %s" % self.encrypted) 102 if self.format_specific: 103 lines.appened("format_specific: %s" % self.format_specific) 104 return "\n".join(lines) 105 106 def _canonicalize(self, field): 107 # Standardize on underscores/lc/no dash and no spaces 108 # since qemu seems to have mixed outputs here... and 109 # this format allows for better integration with python 110 # - i.e. for usage in kwargs and such... 111 field = field.lower().strip() 112 for c in (" ", "-"): 113 field = field.replace(c, '_') 114 return field 115 116 def _extract_bytes(self, details): 117 # Replace it with the byte amount 118 real_size = self.SIZE_RE.search(details) 119 if not real_size: 120 raise ValueError(_('Invalid input value "%s".') % details) 121 magnitude = real_size.group(1) 122 if "e" in magnitude.lower(): 123 magnitude = format(float(real_size.group(1)), '.0f') 124 unit_of_measure = real_size.group(2) 125 bytes_info = real_size.group(3) 126 if bytes_info: 127 return int(real_size.group(4)) 128 elif not unit_of_measure: 129 return int(magnitude) 130 # Allow abbreviated unit such as K to mean KB for compatibility. 131 if len(unit_of_measure) == 1 and unit_of_measure != 'B': 132 unit_of_measure += 'B' 133 return strutils.string_to_bytes('%s%s' % (magnitude, unit_of_measure), 134 return_int=True) 135 136 def _extract_details(self, root_cmd, root_details, lines_after): 137 real_details = root_details 138 if root_cmd == 'backing_file': 139 # Replace it with the real backing file 140 backing_match = self.BACKING_FILE_RE.match(root_details) 141 if backing_match: 142 real_details = backing_match.group(2).strip() 143 elif root_cmd in ['virtual_size', 'cluster_size', 'disk_size']: 144 # Replace it with the byte amount (if we can convert it) 145 if root_details in ('None', 'unavailable'): 146 real_details = 0 147 else: 148 real_details = self._extract_bytes(root_details) 149 elif root_cmd == 'file_format': 150 real_details = real_details.strip().lower() 151 elif root_cmd == 'snapshot_list': 152 # Next line should be a header, starting with 'ID' 153 if not lines_after or not lines_after.pop(0).startswith("ID"): 154 msg = _("Snapshot list encountered but no header found!") 155 raise ValueError(msg) 156 real_details = [] 157 # This is the sprintf pattern we will try to match 158 # "%-10s%-20s%7s%20s%15s" 159 # ID TAG VM SIZE DATE VM CLOCK (current header) 160 while lines_after: 161 line = lines_after[0] 162 line_pieces = line.split() 163 if len(line_pieces) != 6: 164 break 165 # Check against this pattern in the final position 166 # "%02d:%02d:%02d.%03d" 167 date_pieces = line_pieces[5].split(":") 168 if len(date_pieces) != 3: 169 break 170 lines_after.pop(0) 171 real_details.append({ 172 'id': line_pieces[0], 173 'tag': line_pieces[1], 174 'vm_size': line_pieces[2], 175 'date': line_pieces[3], 176 'vm_clock': line_pieces[4] + " " + line_pieces[5], 177 }) 178 return real_details 179 180 def _parse(self, cmd_output): 181 # Analysis done of qemu-img.c to figure out what is going on here 182 # Find all points start with some chars and then a ':' then a newline 183 # and then handle the results of those 'top level' items in a separate 184 # function. 185 # 186 # TODO(harlowja): newer versions might have a json output format 187 # we should switch to that whenever possible. 188 # see: http://bit.ly/XLJXDX 189 contents = {} 190 lines = [x for x in cmd_output.splitlines() if x.strip()] 191 while lines: 192 line = lines.pop(0) 193 top_level = self.TOP_LEVEL_RE.match(line) 194 if top_level: 195 root = self._canonicalize(top_level.group(1)) 196 if not root: 197 continue 198 root_details = top_level.group(2).strip() 199 details = self._extract_details(root, root_details, lines) 200 contents[root] = details 201 return contents 202