1# Copyright 2010 United States Government as represented by the
2# Administrator of the National Aeronautics and Space Administration.
3# All Rights Reserved.
4# Copyright (c) 2010 Citrix Systems, Inc.
5#
6#    Licensed under the Apache License, Version 2.0 (the "License"); you may
7#    not use this file except in compliance with the License. You may obtain
8#    a copy of the License at
9#
10#         http://www.apache.org/licenses/LICENSE-2.0
11#
12#    Unless required by applicable law or agreed to in writing, software
13#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15#    License for the specific language governing permissions and limitations
16#    under the License.
17
18"""
19Helper methods to deal with images.
20
21.. versionadded:: 3.1
22
23.. versionchanged:: 3.14.0
24   add paramter format.
25
26"""
27
28import json
29import re
30
31import debtcollector
32
33from oslo_utils._i18n import _
34from oslo_utils import strutils
35
36
37class QemuImgInfo(object):
38    """Parse Qemu image information from command `qemu-img info`'s output.
39
40    The instance of :class:`QemuImgInfo` has properties: `image`,
41    `backing_file`, `file_format`, `virtual_size`, `cluster_size`,
42    `disk_size`, `snapshots` and `encrypted`.
43
44    The parameter format can be set to 'json' or 'human'. With 'json' format
45    output, qemu image information will be parsed more easily and readable.
46    However 'human' format support will be dropped in next cycle and only
47    'json' format will be supported. Prefer to use 'json' instead of 'human'.
48    """
49    BACKING_FILE_RE = re.compile((r"^(.*?)\s*\(actual\s+path\s*:"
50                                  r"\s+(.*?)\)\s*$"), re.I)
51    TOP_LEVEL_RE = re.compile(r"^([\w\d\s\_\-]+):(.*)$")
52    SIZE_RE = re.compile(r"([0-9]+[eE][-+][0-9]+|\d*\.?\d+)"
53                         r"\s*(\w+)?(\s*\(\s*(\d+)\s+bytes\s*\))?",
54                         re.I)
55
56    def __init__(self, cmd_output=None, format='human'):
57        if format == 'json':
58            details = json.loads(cmd_output or '{}')
59            self.image = details.get('filename')
60            self.backing_file = details.get('backing-filename')
61            self.backing_file_format = details.get('backing-filename-format')
62            self.file_format = details.get('format')
63            self.virtual_size = details.get('virtual-size')
64            self.cluster_size = details.get('cluster-size')
65            self.disk_size = details.get('actual-size')
66            self.snapshots = details.get('snapshots', [])
67            self.encrypted = 'yes' if details.get('encrypted') else None
68            self.format_specific = details.get('format-specific')
69        else:
70            if cmd_output is not None:
71                debtcollector.deprecate(
72                    'The human format is deprecated and the format parameter '
73                    'will be removed. Use explicitly json instead',
74                    version="xena",
75                    category=FutureWarning)
76            details = self._parse(cmd_output or '')
77            self.image = details.get('image')
78            self.backing_file = details.get('backing_file')
79            self.backing_file_format = details.get('backing_file_format')
80            self.file_format = details.get('file_format')
81            self.virtual_size = details.get('virtual_size')
82            self.cluster_size = details.get('cluster_size')
83            self.disk_size = details.get('disk_size')
84            self.snapshots = details.get('snapshot_list', [])
85            self.encrypted = details.get('encrypted')
86            self.format_specific = None
87
88    def __str__(self):
89        lines = [
90            'image: %s' % self.image,
91            'file_format: %s' % self.file_format,
92            'virtual_size: %s' % self.virtual_size,
93            'disk_size: %s' % self.disk_size,
94            'cluster_size: %s' % self.cluster_size,
95            'backing_file: %s' % self.backing_file,
96            'backing_file_format: %s' % self.backing_file_format,
97        ]
98        if self.snapshots:
99            lines.append("snapshots: %s" % self.snapshots)
100        if self.encrypted:
101            lines.append("encrypted: %s" % self.encrypted)
102        if self.format_specific:
103            lines.appened("format_specific: %s" % self.format_specific)
104        return "\n".join(lines)
105
106    def _canonicalize(self, field):
107        # Standardize on underscores/lc/no dash and no spaces
108        # since qemu seems to have mixed outputs here... and
109        # this format allows for better integration with python
110        # - i.e. for usage in kwargs and such...
111        field = field.lower().strip()
112        for c in (" ", "-"):
113            field = field.replace(c, '_')
114        return field
115
116    def _extract_bytes(self, details):
117        # Replace it with the byte amount
118        real_size = self.SIZE_RE.search(details)
119        if not real_size:
120            raise ValueError(_('Invalid input value "%s".') % details)
121        magnitude = real_size.group(1)
122        if "e" in magnitude.lower():
123            magnitude = format(float(real_size.group(1)), '.0f')
124        unit_of_measure = real_size.group(2)
125        bytes_info = real_size.group(3)
126        if bytes_info:
127            return int(real_size.group(4))
128        elif not unit_of_measure:
129            return int(magnitude)
130        # Allow abbreviated unit such as K to mean KB for compatibility.
131        if len(unit_of_measure) == 1 and unit_of_measure != 'B':
132            unit_of_measure += 'B'
133        return strutils.string_to_bytes('%s%s' % (magnitude, unit_of_measure),
134                                        return_int=True)
135
136    def _extract_details(self, root_cmd, root_details, lines_after):
137        real_details = root_details
138        if root_cmd == 'backing_file':
139            # Replace it with the real backing file
140            backing_match = self.BACKING_FILE_RE.match(root_details)
141            if backing_match:
142                real_details = backing_match.group(2).strip()
143        elif root_cmd in ['virtual_size', 'cluster_size', 'disk_size']:
144            # Replace it with the byte amount (if we can convert it)
145            if root_details in ('None', 'unavailable'):
146                real_details = 0
147            else:
148                real_details = self._extract_bytes(root_details)
149        elif root_cmd == 'file_format':
150            real_details = real_details.strip().lower()
151        elif root_cmd == 'snapshot_list':
152            # Next line should be a header, starting with 'ID'
153            if not lines_after or not lines_after.pop(0).startswith("ID"):
154                msg = _("Snapshot list encountered but no header found!")
155                raise ValueError(msg)
156            real_details = []
157            # This is the sprintf pattern we will try to match
158            # "%-10s%-20s%7s%20s%15s"
159            # ID TAG VM SIZE DATE VM CLOCK (current header)
160            while lines_after:
161                line = lines_after[0]
162                line_pieces = line.split()
163                if len(line_pieces) != 6:
164                    break
165                # Check against this pattern in the final position
166                # "%02d:%02d:%02d.%03d"
167                date_pieces = line_pieces[5].split(":")
168                if len(date_pieces) != 3:
169                    break
170                lines_after.pop(0)
171                real_details.append({
172                    'id': line_pieces[0],
173                    'tag': line_pieces[1],
174                    'vm_size': line_pieces[2],
175                    'date': line_pieces[3],
176                    'vm_clock': line_pieces[4] + " " + line_pieces[5],
177                })
178        return real_details
179
180    def _parse(self, cmd_output):
181        # Analysis done of qemu-img.c to figure out what is going on here
182        # Find all points start with some chars and then a ':' then a newline
183        # and then handle the results of those 'top level' items in a separate
184        # function.
185        #
186        # TODO(harlowja): newer versions might have a json output format
187        #                 we should switch to that whenever possible.
188        #                 see: http://bit.ly/XLJXDX
189        contents = {}
190        lines = [x for x in cmd_output.splitlines() if x.strip()]
191        while lines:
192            line = lines.pop(0)
193            top_level = self.TOP_LEVEL_RE.match(line)
194            if top_level:
195                root = self._canonicalize(top_level.group(1))
196                if not root:
197                    continue
198                root_details = top_level.group(2).strip()
199                details = self._extract_details(root, root_details, lines)
200                contents[root] = details
201        return contents
202