xref: /qemu/tests/qemu-iotests/060 (revision b21e2380)
1#!/usr/bin/env bash
2# group: rw auto quick
3#
4# Test case for image corruption (overlapping data structures) in qcow2
5#
6# Copyright (C) 2013 Red Hat, Inc.
7#
8# This program is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program.  If not, see <http://www.gnu.org/licenses/>.
20#
21
22# creator
23owner=mreitz@redhat.com
24
25seq="$(basename $0)"
26echo "QA output created by $seq"
27
28status=1	# failure is the default!
29
30_cleanup()
31{
32	_cleanup_test_img
33}
34trap "_cleanup; exit \$status" 0 1 2 3 15
35
36# Sometimes the error line might be dumped before/after an event
37# randomly.  Mask it out for specific test that may trigger this
38# uncertainty for current test for now.
39_filter_io_error()
40{
41    sed '/Input\/output error/d'
42}
43
44# get standard environment, filters and checks
45. ./common.rc
46. ./common.filter
47
48# This tests qcow2-specific low-level functionality
49_supported_fmt qcow2
50_supported_proto file fuse
51_supported_os Linux
52# These tests only work for compat=1.1 images without an external
53# data file with refcount_bits=16
54_unsupported_imgopts 'compat=0.10' data_file \
55    'refcount_bits=\([^1]\|.\([^6]\|$\)\)'
56
57# The repair process will create a large file - so check for availability first
58_require_large_file 64G
59
60rt_offset=65536  # 0x10000 (XXX: just an assumption)
61rb_offset=131072 # 0x20000 (XXX: just an assumption)
62l1_offset=196608 # 0x30000 (XXX: just an assumption)
63l2_offset=262144 # 0x40000 (XXX: just an assumption)
64l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption)
65
66OPEN_RW="open -o overlap-check=all $TEST_IMG"
67# Overlap checks are done before write operations only, therefore opening an
68# image read-only makes the overlap-check option irrelevant
69OPEN_RO="open -r $TEST_IMG"
70
71echo
72echo "=== Testing L2 reference into L1 ==="
73echo
74_make_test_img 64M
75# Link first L1 entry (first L2 table) onto itself
76# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any
77# later write will result in a COW operation, effectively ruining this attempt
78# on image corruption)
79poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
80_check_test_img
81
82# The corrupt bit should not be set anyway
83_qcow2_dump_header | grep incompatible_features
84
85# Try to write something, thereby forcing the corrupt bit to be set
86$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
87
88# The corrupt bit must now be set
89_qcow2_dump_header | grep incompatible_features
90
91# This information should be available through qemu-img info
92_img_info --format-specific
93
94# Try to open the image R/W (which should fail)
95$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
96                                            | _filter_testdir \
97                                            | _filter_imgfmt
98
99# Try to open it RO (which should succeed)
100$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io
101
102# We could now try to fix the image, but this would probably fail (how should an
103# L2 table linked onto the L1 table be fixed?)
104
105echo
106echo "=== Testing cluster data reference into refcount block ==="
107echo
108_make_test_img 64M
109# Allocate L2 table
110truncate -s "$(($l2_offset+65536))" "$TEST_IMG"
111poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00"
112# Mark cluster as used
113poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
114# Redirect new data cluster onto refcount block
115poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
116_check_test_img
117_qcow2_dump_header | grep incompatible_features
118$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
119_qcow2_dump_header | grep incompatible_features
120
121# Try to fix it
122_check_test_img -r all
123
124# The corrupt bit should be cleared
125_qcow2_dump_header | grep incompatible_features
126
127# Look if it's really really fixed
128$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
129_qcow2_dump_header | grep incompatible_features
130
131echo
132echo "=== Testing cluster data reference into inactive L2 table ==="
133echo
134_make_test_img 64M
135$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io
136$QEMU_IMG snapshot -c foo "$TEST_IMG"
137$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io
138# The inactive L2 table remains at its old offset
139poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \
140                      "\x80\x00\x00\x00\x00\x04\x00\x00"
141_check_test_img
142_qcow2_dump_header | grep incompatible_features
143$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
144_qcow2_dump_header | grep incompatible_features
145_check_test_img -r all
146_qcow2_dump_header | grep incompatible_features
147$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
148_qcow2_dump_header | grep incompatible_features
149
150# Check data
151$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
152$QEMU_IMG snapshot -a foo "$TEST_IMG"
153_check_test_img
154$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
155
156echo
157echo "=== Testing overlap while COW is in flight ==="
158echo
159BACKING_IMG=$TEST_IMG.base
160TEST_IMG=$BACKING_IMG _make_test_img 1G
161
162$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io
163
164_make_test_img -b "$BACKING_IMG" -F $IMGFMT 1G
165# Write two clusters, the second one enforces creation of an L2 table after
166# the first data cluster.
167$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
168# Free the first cluster. This cluster will soon enough be reallocated and
169# used for COW.
170poke_file "$TEST_IMG" "$l2_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
171poke_file "$TEST_IMG" "$(($rb_offset+10))" "\x00\x00"
172# Now, corrupt the image by marking the second L2 table cluster as free.
173poke_file "$TEST_IMG" "$(($rb_offset+12))" "\x00\x00"
174# Start a write operation requiring COW on the image stopping it right before
175# doing the read; then, trigger the corruption prevention by writing anything to
176# any unallocated cluster, leading to an attempt to overwrite the second L2
177# table. Finally, resume the COW write and see it fail (but not crash).
178echo "open -o file.driver=blkdebug $TEST_IMG
179break cow_read 0
180aio_write 0k 1k
181wait_break 0
182write 64k 64k
183resume 0" | $QEMU_IO | _filter_qemu_io
184
185echo
186echo "=== Testing unallocated image header ==="
187echo
188_make_test_img 64M
189# Create L1/L2
190$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
191poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
192$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
193
194echo
195echo "=== Testing unaligned L1 entry ==="
196echo
197_make_test_img 64M
198$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
199# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are
200# aligned or not does not matter
201poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
202$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
203
204# Test how well zero cluster expansion can cope with this
205_make_test_img 64M
206$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
207poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
208$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
209
210echo
211echo "=== Testing unaligned L2 entry ==="
212echo
213_make_test_img 64M
214$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
215poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
216$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
217
218echo
219echo "=== Testing unaligned pre-allocated zero cluster ==="
220echo
221_make_test_img 64M
222$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
223poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01"
224# zero cluster expansion
225$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
226
227echo
228echo "=== Testing unaligned reftable entry ==="
229echo
230_make_test_img 64M
231poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00"
232$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
233
234echo
235echo "=== Testing non-fatal corruption on freeing ==="
236echo
237_make_test_img 64M
238$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
239poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
240$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
241
242echo
243echo "=== Testing read-only corruption report ==="
244echo
245_make_test_img 64M
246$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
247poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
248# Should only emit a single error message
249$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io
250
251echo
252echo "=== Testing non-fatal and then fatal corruption report ==="
253echo
254_make_test_img 64M
255$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
256poke_file "$TEST_IMG" "$l2_offset"        "\x80\x00\x00\x00\x00\x05\x2a\x00"
257poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00"
258# Should emit two error messages
259$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io
260
261echo
262echo "=== Testing empty refcount table ==="
263echo
264_make_test_img 64M
265poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
266$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
267# Repair the image
268_check_test_img -r all
269
270echo
271echo "=== Testing empty refcount table with valid L1 and L2 tables ==="
272echo
273_make_test_img 64M
274$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
275poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
276# Since the first data cluster is already allocated this triggers an
277# allocation with an explicit offset (using qcow2_alloc_clusters_at())
278# causing a refcount block to be allocated at offset 0
279$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
280# Repair the image
281_check_test_img -r all
282
283echo
284echo "=== Testing empty refcount block ==="
285echo
286_make_test_img 64M
287poke_file "$TEST_IMG" "$rb_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
288$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
289# Repair the image
290_check_test_img -r all
291
292echo
293echo "=== Testing empty refcount block with compressed write ==="
294echo
295_make_test_img 64M
296$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
297poke_file "$TEST_IMG" "$rb_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
298# The previous write already allocated an L2 table, so now this new
299# write will try to allocate a compressed data cluster at offset 0.
300$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io
301# Repair the image
302_check_test_img -r all
303
304echo
305echo "=== Testing zero refcount table size ==="
306echo
307_make_test_img 64M
308poke_file "$TEST_IMG" "56"                "\x00\x00\x00\x00"
309$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
310# Repair the image
311_check_test_img -r all
312
313echo
314echo "=== Testing incorrect refcount table offset ==="
315echo
316_make_test_img 64M
317poke_file "$TEST_IMG" "48"                "\x00\x00\x00\x00\x00\x00\x00\x00"
318$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
319
320echo
321echo "=== Testing dirty corrupt image ==="
322echo
323
324_make_test_img 64M
325
326# Let the refblock appear unaligned
327poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\xff\xff\x2a\x00"
328# Mark the image dirty, thus forcing an automatic check when opening it
329$PYTHON qcow2.py "$TEST_IMG" set-feature-bit incompatible 0
330# Open the image (qemu should refuse to do so)
331$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
332
333echo '--- Repairing ---'
334
335# The actual repair should have happened (because of the dirty bit),
336# but some cleanup may have failed (like freeing the old reftable)
337# because the image was already marked corrupt by that point
338_check_test_img -r all
339
340echo
341echo "=== Writing to an unaligned preallocated zero cluster ==="
342echo
343
344_make_test_img 64M
345
346# Allocate the L2 table
347$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
348# Pretend there is a preallocated zero cluster somewhere inside the
349# image header
350poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01"
351# Let's write to it!
352$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
353
354echo '--- Repairing ---'
355_check_test_img -r all
356
357echo
358echo '=== Discarding with an unaligned refblock ==='
359echo
360
361_make_test_img 64M
362
363$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
364# Make our refblock unaligned
365poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00"
366# Now try to discard something that will be submitted as two requests
367# (main part + tail)
368$QEMU_IO -c "discard 0 65537" "$TEST_IMG"
369
370echo '--- Repairing ---'
371# Fails the first repair because the corruption prevents the check
372# function from double-checking
373# (Using -q for the first invocation, because otherwise the
374#  double-check error message appears above the summary for some
375#  reason -- so let's just hide the summary)
376_check_test_img -q -r all
377_check_test_img -r all
378
379echo
380echo "=== Discarding an out-of-bounds refblock ==="
381echo
382
383_make_test_img 64M
384
385# Pretend there's a refblock really up high
386poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00"
387# Let's try to shrink the qcow2 image so that the block driver tries
388# to discard that refblock (and see what happens!)
389$QEMU_IMG resize --shrink "$TEST_IMG" 32M
390
391echo '--- Checking and retrying ---'
392# Image should not be resized
393_img_info | grep 'virtual size'
394# But it should pass this check, because the "partial" resize has
395# already overwritten refblocks past the end
396_check_test_img -r all
397# So let's try again
398$QEMU_IMG resize --shrink "$TEST_IMG" 32M
399_img_info | grep 'virtual size'
400
401echo
402echo "=== Discarding a non-covered in-bounds refblock ==="
403echo
404
405_make_test_img -o 'refcount_bits=1' 64M
406
407# Pretend there's a refblock somewhere where there is no refblock to
408# cover it (but the covering refblock has a valid index in the
409# reftable)
410# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point
411# to 0x10_0000_0000 (64G) to point to the third refblock
412poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
413$QEMU_IMG resize --shrink "$TEST_IMG" 32M
414
415echo '--- Checking and retrying ---'
416# Image should not be resized
417_img_info | grep 'virtual size'
418# But it should pass this check, because the "partial" resize has
419# already overwritten refblocks past the end
420_check_test_img -r all
421# So let's try again
422$QEMU_IMG resize --shrink "$TEST_IMG" 32M
423_img_info | grep 'virtual size'
424
425echo
426echo "=== Discarding a refblock covered by an unaligned refblock ==="
427echo
428
429_make_test_img -o 'refcount_bits=1' 64M
430
431# Same as above
432poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
433# But now we actually "create" an unaligned third refblock
434poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00"
435$QEMU_IMG resize --shrink "$TEST_IMG" 32M
436
437echo '--- Repairing ---'
438# Fails the first repair because the corruption prevents the check
439# function from double-checking
440# (Using -q for the first invocation, because otherwise the
441#  double-check error message appears above the summary for some
442#  reason -- so let's just hide the summary)
443_check_test_img -q -r all
444_check_test_img -r all
445
446echo
447echo "=== Testing the QEMU shutdown with a corrupted image ==="
448echo
449_make_test_img 64M
450poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
451echo "{'execute': 'qmp_capabilities'}
452      {'execute': 'human-monitor-command',
453       'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}}
454      {'execute': 'quit'}" \
455    | $QEMU -qmp stdio -nographic -nodefaults \
456            -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \
457    | _filter_qmp | _filter_qemu_io
458
459echo
460echo "=== Testing incoming inactive corrupted image ==="
461echo
462
463_make_test_img 64M
464# Create an unaligned L1 entry, so qemu will signal a corruption when
465# reading from the covered area
466poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a"
467
468# Inactive images are effectively read-only images, so this should be a
469# non-fatal corruption (which does not modify the image)
470echo "{'execute': 'qmp_capabilities'}
471      {'execute': 'human-monitor-command',
472       'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}}
473      {'execute': 'quit'}" \
474    | $QEMU -qmp stdio -nographic -nodefaults \
475            -blockdev "{'node-name': 'drive',
476                        'driver': 'qcow2',
477                        'file': {
478                            'driver': 'file',
479                            'filename': '$TEST_IMG'
480                        }}" \
481            -incoming exec:'cat /dev/null' \
482            2>&1 \
483    | _filter_qmp | _filter_qemu_io | _filter_io_error
484
485echo
486# Image should not have been marked corrupt
487_img_info --format-specific | grep 'corrupt:'
488
489# success, all done
490echo "*** done"
491rm -f $seq.full
492status=0
493