1# Generator of fuzzed qcow2 images
2#
3# Copyright (C) 2014 Maria Kustova <maria.k@catit.be>
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 2 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program.  If not, see <http://www.gnu.org/licenses/>.
17#
18
19from __future__ import absolute_import
20import random
21import struct
22from . import fuzz
23from math import ceil
24from os import urandom
25from itertools import chain
26
27MAX_IMAGE_SIZE = 10 * (1 << 20)
28# Standard sizes
29UINT32_S = 4
30UINT64_S = 8
31
32
33class Field(object):
34
35    """Atomic image element (field).
36
37    The class represents an image field as quadruple of a data format
38    of value necessary for its packing to binary form, an offset from
39    the beginning of the image, a value and a name.
40
41    The field can be iterated as a list [format, offset, value, name].
42    """
43
44    __slots__ = ('fmt', 'offset', 'value', 'name')
45
46    def __init__(self, fmt, offset, val, name):
47        self.fmt = fmt
48        self.offset = offset
49        self.value = val
50        self.name = name
51
52    def __iter__(self):
53        return iter([self.fmt, self.offset, self.value, self.name])
54
55    def __repr__(self):
56        return "Field(fmt='%s', offset=%d, value=%s, name=%s)" % \
57            (self.fmt, self.offset, str(self.value), self.name)
58
59
60class FieldsList(object):
61
62    """List of fields.
63
64    The class allows access to a field in the list by its name.
65    """
66
67    def __init__(self, meta_data=None):
68        if meta_data is None:
69            self.data = []
70        else:
71            self.data = [Field(*f)
72                         for f in meta_data]
73
74    def __getitem__(self, name):
75        return [x for x in self.data if x.name == name]
76
77    def __iter__(self):
78        return iter(self.data)
79
80    def __len__(self):
81        return len(self.data)
82
83
84class Image(object):
85
86    """ Qcow2 image object.
87
88    This class allows to create qcow2 images with random valid structures and
89    values, fuzz them via external qcow2.fuzz module and write the result to
90    a file.
91    """
92
93    def __init__(self, backing_file_name=None):
94        """Create a random valid qcow2 image with the correct header and stored
95        backing file name.
96        """
97        cluster_bits, self.image_size = self._size_params()
98        self.cluster_size = 1 << cluster_bits
99        self.header = FieldsList()
100        self.backing_file_name = FieldsList()
101        self.backing_file_format = FieldsList()
102        self.feature_name_table = FieldsList()
103        self.end_of_extension_area = FieldsList()
104        self.l2_tables = FieldsList()
105        self.l1_table = FieldsList()
106        self.refcount_table = FieldsList()
107        self.refcount_blocks = FieldsList()
108        self.ext_offset = 0
109        self.create_header(cluster_bits, backing_file_name)
110        self.set_backing_file_name(backing_file_name)
111        self.data_clusters = self._alloc_data(self.image_size,
112                                              self.cluster_size)
113        # Percentage of fields will be fuzzed
114        self.bias = random.uniform(0.2, 0.5)
115
116    def __iter__(self):
117        return chain(self.header, self.backing_file_format,
118                     self.feature_name_table, self.end_of_extension_area,
119                     self.backing_file_name, self.l1_table, self.l2_tables,
120                     self.refcount_table, self.refcount_blocks)
121
122    def create_header(self, cluster_bits, backing_file_name=None):
123        """Generate a random valid header."""
124        meta_header = [
125            ['>4s', 0, "QFI\xfb", 'magic'],
126            ['>I', 4, random.randint(2, 3), 'version'],
127            ['>Q', 8, 0, 'backing_file_offset'],
128            ['>I', 16, 0, 'backing_file_size'],
129            ['>I', 20, cluster_bits, 'cluster_bits'],
130            ['>Q', 24, self.image_size, 'size'],
131            ['>I', 32, 0, 'crypt_method'],
132            ['>I', 36, 0, 'l1_size'],
133            ['>Q', 40, 0, 'l1_table_offset'],
134            ['>Q', 48, 0, 'refcount_table_offset'],
135            ['>I', 56, 0, 'refcount_table_clusters'],
136            ['>I', 60, 0, 'nb_snapshots'],
137            ['>Q', 64, 0, 'snapshots_offset'],
138            ['>Q', 72, 0, 'incompatible_features'],
139            ['>Q', 80, 0, 'compatible_features'],
140            ['>Q', 88, 0, 'autoclear_features'],
141            # Only refcount_order = 4 is supported by current (07.2014)
142            # implementation of QEMU
143            ['>I', 96, 4, 'refcount_order'],
144            ['>I', 100, 0, 'header_length']
145        ]
146        self.header = FieldsList(meta_header)
147
148        if self.header['version'][0].value == 2:
149            self.header['header_length'][0].value = 72
150        else:
151            self.header['incompatible_features'][0].value = \
152                                                        random.getrandbits(2)
153            self.header['compatible_features'][0].value = random.getrandbits(1)
154            self.header['header_length'][0].value = 104
155        # Extensions start at the header last field offset and the field size
156        self.ext_offset = struct.calcsize(
157            self.header['header_length'][0].fmt) + \
158            self.header['header_length'][0].offset
159        end_of_extension_area_len = 2 * UINT32_S
160        free_space = self.cluster_size - self.ext_offset - \
161                     end_of_extension_area_len
162        # If the backing file name specified and there is enough space for it
163        # in the first cluster, then it's placed in the very end of the first
164        # cluster.
165        if (backing_file_name is not None) and \
166           (free_space >= len(backing_file_name)):
167            self.header['backing_file_size'][0].value = len(backing_file_name)
168            self.header['backing_file_offset'][0].value = \
169                                    self.cluster_size - len(backing_file_name)
170
171    def set_backing_file_name(self, backing_file_name=None):
172        """Add the name of the backing file at the offset specified
173        in the header.
174        """
175        if (backing_file_name is not None) and \
176           (not self.header['backing_file_offset'][0].value == 0):
177            data_len = len(backing_file_name)
178            data_fmt = '>' + str(data_len) + 's'
179            self.backing_file_name = FieldsList([
180                [data_fmt, self.header['backing_file_offset'][0].value,
181                 backing_file_name, 'bf_name']
182            ])
183
184    def set_backing_file_format(self, backing_file_fmt=None):
185        """Generate the header extension for the backing file format."""
186        if backing_file_fmt is not None:
187            # Calculation of the free space available in the first cluster
188            end_of_extension_area_len = 2 * UINT32_S
189            high_border = (self.header['backing_file_offset'][0].value or
190                           (self.cluster_size - 1)) - \
191                end_of_extension_area_len
192            free_space = high_border - self.ext_offset
193            ext_size = 2 * UINT32_S + ((len(backing_file_fmt) + 7) & ~7)
194
195            if free_space >= ext_size:
196                ext_data_len = len(backing_file_fmt)
197                ext_data_fmt = '>' + str(ext_data_len) + 's'
198                ext_padding_len = 7 - (ext_data_len - 1) % 8
199                self.backing_file_format = FieldsList([
200                    ['>I', self.ext_offset, 0xE2792ACA, 'ext_magic'],
201                    ['>I', self.ext_offset + UINT32_S, ext_data_len,
202                     'ext_length'],
203                    [ext_data_fmt, self.ext_offset + UINT32_S * 2,
204                     backing_file_fmt, 'bf_format']
205                ])
206                self.ext_offset = \
207                        struct.calcsize(
208                            self.backing_file_format['bf_format'][0].fmt) + \
209                        ext_padding_len + \
210                        self.backing_file_format['bf_format'][0].offset
211
212    def create_feature_name_table(self):
213        """Generate a random header extension for names of features used in
214        the image.
215        """
216        def gen_feat_ids():
217            """Return random feature type and feature bit."""
218            return (random.randint(0, 2), random.randint(0, 63))
219
220        end_of_extension_area_len = 2 * UINT32_S
221        high_border = (self.header['backing_file_offset'][0].value or
222                       (self.cluster_size - 1)) - \
223            end_of_extension_area_len
224        free_space = high_border - self.ext_offset
225        # Sum of sizes of 'magic' and 'length' header extension fields
226        ext_header_len = 2 * UINT32_S
227        fnt_entry_size = 6 * UINT64_S
228        num_fnt_entries = min(10, (free_space - ext_header_len) /
229                              fnt_entry_size)
230        if not num_fnt_entries == 0:
231            feature_tables = []
232            feature_ids = []
233            inner_offset = self.ext_offset + ext_header_len
234            feat_name = 'some cool feature'
235            while len(feature_tables) < num_fnt_entries * 3:
236                feat_type, feat_bit = gen_feat_ids()
237                # Remove duplicates
238                while (feat_type, feat_bit) in feature_ids:
239                    feat_type, feat_bit = gen_feat_ids()
240                feature_ids.append((feat_type, feat_bit))
241                feat_fmt = '>' + str(len(feat_name)) + 's'
242                feature_tables += [['B', inner_offset,
243                                    feat_type, 'feature_type'],
244                                   ['B', inner_offset + 1, feat_bit,
245                                    'feature_bit_number'],
246                                   [feat_fmt, inner_offset + 2,
247                                    feat_name, 'feature_name']
248                ]
249                inner_offset += fnt_entry_size
250            # No padding for the extension is necessary, because
251            # the extension length is multiple of 8
252            self.feature_name_table = FieldsList([
253                ['>I', self.ext_offset, 0x6803f857, 'ext_magic'],
254                # One feature table contains 3 fields and takes 48 bytes
255                ['>I', self.ext_offset + UINT32_S,
256                 len(feature_tables) / 3 * 48, 'ext_length']
257            ] + feature_tables)
258            self.ext_offset = inner_offset
259
260    def set_end_of_extension_area(self):
261        """Generate a mandatory header extension marking end of header
262        extensions.
263        """
264        self.end_of_extension_area = FieldsList([
265            ['>I', self.ext_offset, 0, 'ext_magic'],
266            ['>I', self.ext_offset + UINT32_S, 0, 'ext_length']
267        ])
268
269    def create_l_structures(self):
270        """Generate random valid L1 and L2 tables."""
271        def create_l2_entry(host, guest, l2_cluster):
272            """Generate one L2 entry."""
273            offset = l2_cluster * self.cluster_size
274            l2_size = self.cluster_size / UINT64_S
275            entry_offset = offset + UINT64_S * (guest % l2_size)
276            cluster_descriptor = host * self.cluster_size
277            if not self.header['version'][0].value == 2:
278                cluster_descriptor += random.randint(0, 1)
279            # While snapshots are not supported, bit #63 = 1
280            # Compressed clusters are not supported => bit #62 = 0
281            entry_val = (1 << 63) + cluster_descriptor
282            return ['>Q', entry_offset, entry_val, 'l2_entry']
283
284        def create_l1_entry(l2_cluster, l1_offset, guest):
285            """Generate one L1 entry."""
286            l2_size = self.cluster_size / UINT64_S
287            entry_offset = l1_offset + UINT64_S * (guest / l2_size)
288            # While snapshots are not supported bit #63 = 1
289            entry_val = (1 << 63) + l2_cluster * self.cluster_size
290            return ['>Q', entry_offset, entry_val, 'l1_entry']
291
292        if len(self.data_clusters) == 0:
293            # All metadata for an empty guest image needs 4 clusters:
294            # header, rfc table, rfc block, L1 table.
295            # Header takes cluster #0, other clusters ##1-3 can be used
296            l1_offset = random.randint(1, 3) * self.cluster_size
297            l1 = [['>Q', l1_offset, 0, 'l1_entry']]
298            l2 = []
299        else:
300            meta_data = self._get_metadata()
301            guest_clusters = random.sample(range(self.image_size /
302                                                 self.cluster_size),
303                                           len(self.data_clusters))
304            # Number of entries in a L1/L2 table
305            l_size = self.cluster_size / UINT64_S
306            # Number of clusters necessary for L1 table
307            l1_size = int(ceil((max(guest_clusters) + 1) / float(l_size**2)))
308            l1_start = self._get_adjacent_clusters(self.data_clusters |
309                                                   meta_data, l1_size)
310            meta_data |= set(range(l1_start, l1_start + l1_size))
311            l1_offset = l1_start * self.cluster_size
312            # Indices of L2 tables
313            l2_ids = []
314            # Host clusters allocated for L2 tables
315            l2_clusters = []
316            # L1 entries
317            l1 = []
318            # L2 entries
319            l2 = []
320            for host, guest in zip(self.data_clusters, guest_clusters):
321                l2_id = guest / l_size
322                if l2_id not in l2_ids:
323                    l2_ids.append(l2_id)
324                    l2_clusters.append(self._get_adjacent_clusters(
325                        self.data_clusters | meta_data | set(l2_clusters),
326                        1))
327                    l1.append(create_l1_entry(l2_clusters[-1], l1_offset,
328                                              guest))
329                l2.append(create_l2_entry(host, guest,
330                                          l2_clusters[l2_ids.index(l2_id)]))
331        self.l2_tables = FieldsList(l2)
332        self.l1_table = FieldsList(l1)
333        self.header['l1_size'][0].value = int(ceil(UINT64_S * self.image_size /
334                                                float(self.cluster_size**2)))
335        self.header['l1_table_offset'][0].value = l1_offset
336
337    def create_refcount_structures(self):
338        """Generate random refcount blocks and refcount table."""
339        def allocate_rfc_blocks(data, size):
340            """Return indices of clusters allocated for refcount blocks."""
341            cluster_ids = set()
342            diff = block_ids = set([x / size for x in data])
343            while len(diff) != 0:
344                # Allocate all yet not allocated clusters
345                new = self._get_available_clusters(data | cluster_ids,
346                                                   len(diff))
347                # Indices of new refcount blocks necessary to cover clusters
348                # in 'new'
349                diff = set([x / size for x in new]) - block_ids
350                cluster_ids |= new
351                block_ids |= diff
352            return cluster_ids, block_ids
353
354        def allocate_rfc_table(data, init_blocks, block_size):
355            """Return indices of clusters allocated for the refcount table
356            and updated indices of clusters allocated for blocks and indices
357            of blocks.
358            """
359            blocks = set(init_blocks)
360            clusters = set()
361            # Number of entries in one cluster of the refcount table
362            size = self.cluster_size / UINT64_S
363            # Number of clusters necessary for the refcount table based on
364            # the current number of refcount blocks
365            table_size = int(ceil((max(blocks) + 1) / float(size)))
366            # Index of the first cluster of the refcount table
367            table_start = self._get_adjacent_clusters(data, table_size + 1)
368            # Clusters allocated for the current length of the refcount table
369            table_clusters = set(range(table_start, table_start + table_size))
370            # Clusters allocated for the refcount table including
371            # last optional one for potential l1 growth
372            table_clusters_allocated = set(range(table_start, table_start +
373                                                 table_size + 1))
374            # New refcount blocks necessary for clusters occupied by the
375            # refcount table
376            diff = set([c / block_size for c in table_clusters]) - blocks
377            blocks |= diff
378            while len(diff) != 0:
379                # Allocate clusters for new refcount blocks
380                new = self._get_available_clusters((data | clusters) |
381                                                   table_clusters_allocated,
382                                                   len(diff))
383                # Indices of new refcount blocks necessary to cover
384                # clusters in 'new'
385                diff = set([x / block_size for x in new]) - blocks
386                clusters |= new
387                blocks |= diff
388                # Check if the refcount table needs one more cluster
389                if int(ceil((max(blocks) + 1) / float(size))) > table_size:
390                    new_block_id = (table_start + table_size) / block_size
391                    # Check if the additional table cluster needs
392                    # one more refcount block
393                    if new_block_id not in blocks:
394                        diff.add(new_block_id)
395                    table_clusters.add(table_start + table_size)
396                    table_size += 1
397            return table_clusters, blocks, clusters
398
399        def create_table_entry(table_offset, block_cluster, block_size,
400                               cluster):
401            """Generate a refcount table entry."""
402            offset = table_offset + UINT64_S * (cluster / block_size)
403            return ['>Q', offset, block_cluster * self.cluster_size,
404                    'refcount_table_entry']
405
406        def create_block_entry(block_cluster, block_size, cluster):
407            """Generate a list of entries for the current block."""
408            entry_size = self.cluster_size / block_size
409            offset = block_cluster * self.cluster_size
410            entry_offset = offset + entry_size * (cluster % block_size)
411            # While snapshots are not supported all refcounts are set to 1
412            return ['>H', entry_offset, 1, 'refcount_block_entry']
413        # Size of a block entry in bits
414        refcount_bits = 1 << self.header['refcount_order'][0].value
415        # Number of refcount entries per refcount block
416        # Convert self.cluster_size from bytes to bits to have the same
417        # base for the numerator and denominator
418        block_size = self.cluster_size * 8 / refcount_bits
419        meta_data = self._get_metadata()
420        if len(self.data_clusters) == 0:
421            # All metadata for an empty guest image needs 4 clusters:
422            # header, rfc table, rfc block, L1 table.
423            # Header takes cluster #0, other clusters ##1-3 can be used
424            block_clusters = set([random.choice(list(set(range(1, 4)) -
425                                                     meta_data))])
426            block_ids = set([0])
427            table_clusters = set([random.choice(list(set(range(1, 4)) -
428                                                     meta_data -
429                                                     block_clusters))])
430        else:
431            block_clusters, block_ids = \
432                                allocate_rfc_blocks(self.data_clusters |
433                                                    meta_data, block_size)
434            table_clusters, block_ids, new_clusters = \
435                                    allocate_rfc_table(self.data_clusters |
436                                                       meta_data |
437                                                       block_clusters,
438                                                       block_ids,
439                                                       block_size)
440            block_clusters |= new_clusters
441
442        meta_data |= block_clusters | table_clusters
443        table_offset = min(table_clusters) * self.cluster_size
444        block_id = None
445        # Clusters allocated for refcount blocks
446        block_clusters = list(block_clusters)
447        # Indices of refcount blocks
448        block_ids = list(block_ids)
449        # Refcount table entries
450        rfc_table = []
451        # Refcount entries
452        rfc_blocks = []
453
454        for cluster in sorted(self.data_clusters | meta_data):
455            if cluster / block_size != block_id:
456                block_id = cluster / block_size
457                block_cluster = block_clusters[block_ids.index(block_id)]
458                rfc_table.append(create_table_entry(table_offset,
459                                                    block_cluster,
460                                                    block_size, cluster))
461            rfc_blocks.append(create_block_entry(block_cluster, block_size,
462                                                 cluster))
463        self.refcount_table = FieldsList(rfc_table)
464        self.refcount_blocks = FieldsList(rfc_blocks)
465
466        self.header['refcount_table_offset'][0].value = table_offset
467        self.header['refcount_table_clusters'][0].value = len(table_clusters)
468
469    def fuzz(self, fields_to_fuzz=None):
470        """Fuzz an image by corrupting values of a random subset of its fields.
471
472        Without parameters the method fuzzes an entire image.
473
474        If 'fields_to_fuzz' is specified then only fields in this list will be
475        fuzzed. 'fields_to_fuzz' can contain both individual fields and more
476        general image elements as a header or tables.
477
478        In the first case the field will be fuzzed always.
479        In the second a random subset of fields will be selected and fuzzed.
480        """
481        def coin():
482            """Return boolean value proportional to a portion of fields to be
483            fuzzed.
484            """
485            return random.random() < self.bias
486
487        if fields_to_fuzz is None:
488            for field in self:
489                if coin():
490                    field.value = getattr(fuzz, field.name)(field.value)
491        else:
492            for item in fields_to_fuzz:
493                if len(item) == 1:
494                    for field in getattr(self, item[0]):
495                        if coin():
496                            field.value = getattr(fuzz,
497                                                  field.name)(field.value)
498                else:
499                    # If fields with the requested name were not generated
500                    # getattr(self, item[0])[item[1]] returns an empty list
501                    for field in getattr(self, item[0])[item[1]]:
502                        field.value = getattr(fuzz, field.name)(field.value)
503
504    def write(self, filename):
505        """Write an entire image to the file."""
506        image_file = open(filename, 'w')
507        for field in self:
508            image_file.seek(field.offset)
509            image_file.write(struct.pack(field.fmt, field.value))
510
511        for cluster in sorted(self.data_clusters):
512            image_file.seek(cluster * self.cluster_size)
513            image_file.write(urandom(self.cluster_size))
514
515        # Align the real image size to the cluster size
516        image_file.seek(0, 2)
517        size = image_file.tell()
518        rounded = (size + self.cluster_size - 1) & ~(self.cluster_size - 1)
519        if rounded > size:
520            image_file.seek(rounded - 1)
521            image_file.write("\0")
522        image_file.close()
523
524    @staticmethod
525    def _size_params():
526        """Generate a random image size aligned to a random correct
527        cluster size.
528        """
529        cluster_bits = random.randrange(9, 21)
530        cluster_size = 1 << cluster_bits
531        img_size = random.randrange(0, MAX_IMAGE_SIZE + 1, cluster_size)
532        return (cluster_bits, img_size)
533
534    @staticmethod
535    def _get_available_clusters(used, number):
536        """Return a set of indices of not allocated clusters.
537
538        'used' contains indices of currently allocated clusters.
539        All clusters that cannot be allocated between 'used' clusters will have
540        indices appended to the end of 'used'.
541        """
542        append_id = max(used) + 1
543        free = set(range(1, append_id)) - used
544        if len(free) >= number:
545            return set(random.sample(free, number))
546        else:
547            return free | set(range(append_id, append_id + number - len(free)))
548
549    @staticmethod
550    def _get_adjacent_clusters(used, size):
551        """Return an index of the first cluster in the sequence of free ones.
552
553        'used' contains indices of currently allocated clusters. 'size' is the
554        length of the sequence of free clusters.
555        If the sequence of 'size' is not available between 'used' clusters, its
556        first index will be append to the end of 'used'.
557        """
558        def get_cluster_id(lst, length):
559            """Return the first index of the sequence of the specified length
560            or None if the sequence cannot be inserted in the list.
561            """
562            if len(lst) != 0:
563                pairs = []
564                pair = (lst[0], 1)
565                for i in range(1, len(lst)):
566                    if lst[i] == lst[i-1] + 1:
567                        pair = (lst[i], pair[1] + 1)
568                    else:
569                        pairs.append(pair)
570                        pair = (lst[i], 1)
571                pairs.append(pair)
572                random.shuffle(pairs)
573                for x, s in pairs:
574                    if s >= length:
575                        return x - length + 1
576            return None
577
578        append_id = max(used) + 1
579        free = list(set(range(1, append_id)) - used)
580        idx = get_cluster_id(free, size)
581        if idx is None:
582            return append_id
583        else:
584            return idx
585
586    @staticmethod
587    def _alloc_data(img_size, cluster_size):
588        """Return a set of random indices of clusters allocated for guest data.
589        """
590        num_of_cls = img_size/cluster_size
591        return set(random.sample(range(1, num_of_cls + 1),
592                                 random.randint(0, num_of_cls)))
593
594    def _get_metadata(self):
595        """Return indices of clusters allocated for image metadata."""
596        ids = set()
597        for x in self:
598            ids.add(x.offset/self.cluster_size)
599        return ids
600
601
602def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None,
603                 fields_to_fuzz=None):
604    """Create a fuzzed image and write it to the specified file."""
605    image = Image(backing_file_name)
606    image.set_backing_file_format(backing_file_fmt)
607    image.create_feature_name_table()
608    image.set_end_of_extension_area()
609    image.create_l_structures()
610    image.create_refcount_structures()
611    image.fuzz(fields_to_fuzz)
612    image.write(test_img_path)
613    return image.image_size
614