xref: /qemu/tests/qemu-iotests/060 (revision 27a4a30e)
1#!/usr/bin/env bash
2#
3# Test case for image corruption (overlapping data structures) in qcow2
4#
5# Copyright (C) 2013 Red Hat, Inc.
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program.  If not, see <http://www.gnu.org/licenses/>.
19#
20
21# creator
22owner=mreitz@redhat.com
23
24seq="$(basename $0)"
25echo "QA output created by $seq"
26
27status=1	# failure is the default!
28
29_cleanup()
30{
31	_cleanup_test_img
32}
33trap "_cleanup; exit \$status" 0 1 2 3 15
34
35# Sometimes the error line might be dumped before/after an event
36# randomly.  Mask it out for specific test that may trigger this
37# uncertainty for current test for now.
38_filter_io_error()
39{
40    sed '/Input\/output error/d'
41}
42
43# get standard environment, filters and checks
44. ./common.rc
45. ./common.filter
46
47# This tests qcow2-specific low-level functionality
48_supported_fmt qcow2
49_supported_proto file
50_supported_os Linux
51# These tests only work for compat=1.1 images without an external
52# data file with refcount_bits=16
53_unsupported_imgopts 'compat=0.10' data_file \
54    'refcount_bits=\([^1]\|.\([^6]\|$\)\)'
55
56# The repair process will create a large file - so check for availability first
57_require_large_file 64G
58
59rt_offset=65536  # 0x10000 (XXX: just an assumption)
60rb_offset=131072 # 0x20000 (XXX: just an assumption)
61l1_offset=196608 # 0x30000 (XXX: just an assumption)
62l2_offset=262144 # 0x40000 (XXX: just an assumption)
63l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption)
64
65OPEN_RW="open -o overlap-check=all $TEST_IMG"
66# Overlap checks are done before write operations only, therefore opening an
67# image read-only makes the overlap-check option irrelevant
68OPEN_RO="open -r $TEST_IMG"
69
70echo
71echo "=== Testing L2 reference into L1 ==="
72echo
73_make_test_img 64M
74# Link first L1 entry (first L2 table) onto itself
75# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any
76# later write will result in a COW operation, effectively ruining this attempt
77# on image corruption)
78poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
79_check_test_img
80
81# The corrupt bit should not be set anyway
82$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
83
84# Try to write something, thereby forcing the corrupt bit to be set
85$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
86
87# The corrupt bit must now be set
88$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
89
90# This information should be available through qemu-img info
91_img_info --format-specific
92
93# Try to open the image R/W (which should fail)
94$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
95                                            | _filter_testdir \
96                                            | _filter_imgfmt
97
98# Try to open it RO (which should succeed)
99$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io
100
101# We could now try to fix the image, but this would probably fail (how should an
102# L2 table linked onto the L1 table be fixed?)
103
104echo
105echo "=== Testing cluster data reference into refcount block ==="
106echo
107_make_test_img 64M
108# Allocate L2 table
109truncate -s "$(($l2_offset+65536))" "$TEST_IMG"
110poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00"
111# Mark cluster as used
112poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
113# Redirect new data cluster onto refcount block
114poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
115_check_test_img
116$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
117$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
118$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
119
120# Try to fix it
121_check_test_img -r all
122
123# The corrupt bit should be cleared
124$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
125
126# Look if it's really really fixed
127$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
128$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
129
130echo
131echo "=== Testing cluster data reference into inactive L2 table ==="
132echo
133_make_test_img 64M
134$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io
135$QEMU_IMG snapshot -c foo "$TEST_IMG"
136$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io
137# The inactive L2 table remains at its old offset
138poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \
139                      "\x80\x00\x00\x00\x00\x04\x00\x00"
140_check_test_img
141$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
142$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
143$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
144_check_test_img -r all
145$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
146$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
147$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
148
149# Check data
150$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
151$QEMU_IMG snapshot -a foo "$TEST_IMG"
152_check_test_img
153$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
154
155echo
156echo "=== Testing overlap while COW is in flight ==="
157echo
158BACKING_IMG=$TEST_IMG.base
159TEST_IMG=$BACKING_IMG _make_test_img 1G
160
161$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io
162
163# compat=0.10 is required in order to make the following discard actually
164# unallocate the sector rather than make it a zero sector - we want COW, after
165# all.
166_make_test_img -o 'compat=0.10' -b "$BACKING_IMG" 1G
167# Write two clusters, the second one enforces creation of an L2 table after
168# the first data cluster.
169$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
170# Discard the first cluster. This cluster will soon enough be reallocated and
171# used for COW.
172$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io
173# Now, corrupt the image by marking the second L2 table cluster as free.
174poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c
175# Start a write operation requiring COW on the image stopping it right before
176# doing the read; then, trigger the corruption prevention by writing anything to
177# any unallocated cluster, leading to an attempt to overwrite the second L2
178# table. Finally, resume the COW write and see it fail (but not crash).
179echo "open -o file.driver=blkdebug $TEST_IMG
180break cow_read 0
181aio_write 0k 1k
182wait_break 0
183write 64k 64k
184resume 0" | $QEMU_IO | _filter_qemu_io
185
186echo
187echo "=== Testing unallocated image header ==="
188echo
189_make_test_img 64M
190# Create L1/L2
191$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
192poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
193$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
194
195echo
196echo "=== Testing unaligned L1 entry ==="
197echo
198_make_test_img 64M
199$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
200# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are
201# aligned or not does not matter
202poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
203$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
204
205# Test how well zero cluster expansion can cope with this
206_make_test_img 64M
207$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
208poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
209$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
210
211echo
212echo "=== Testing unaligned L2 entry ==="
213echo
214_make_test_img 64M
215$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
216poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
217$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
218
219echo
220echo "=== Testing unaligned pre-allocated zero cluster ==="
221echo
222_make_test_img 64M
223$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
224poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01"
225# zero cluster expansion
226$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
227
228echo
229echo "=== Testing unaligned reftable entry ==="
230echo
231_make_test_img 64M
232poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00"
233$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
234
235echo
236echo "=== Testing non-fatal corruption on freeing ==="
237echo
238_make_test_img 64M
239$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
240poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
241$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
242
243echo
244echo "=== Testing read-only corruption report ==="
245echo
246_make_test_img 64M
247$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
248poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
249# Should only emit a single error message
250$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io
251
252echo
253echo "=== Testing non-fatal and then fatal corruption report ==="
254echo
255_make_test_img 64M
256$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
257poke_file "$TEST_IMG" "$l2_offset"        "\x80\x00\x00\x00\x00\x05\x2a\x00"
258poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00"
259# Should emit two error messages
260$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io
261
262echo
263echo "=== Testing empty refcount table ==="
264echo
265_make_test_img 64M
266poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
267$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
268# Repair the image
269_check_test_img -r all
270
271echo
272echo "=== Testing empty refcount table with valid L1 and L2 tables ==="
273echo
274_make_test_img 64M
275$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
276poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
277# Since the first data cluster is already allocated this triggers an
278# allocation with an explicit offset (using qcow2_alloc_clusters_at())
279# causing a refcount block to be allocated at offset 0
280$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
281# Repair the image
282_check_test_img -r all
283
284echo
285echo "=== Testing empty refcount block ==="
286echo
287_make_test_img 64M
288poke_file "$TEST_IMG" "$rb_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
289$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
290# Repair the image
291_check_test_img -r all
292
293echo
294echo "=== Testing empty refcount block with compressed write ==="
295echo
296_make_test_img 64M
297$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
298poke_file "$TEST_IMG" "$rb_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
299# The previous write already allocated an L2 table, so now this new
300# write will try to allocate a compressed data cluster at offset 0.
301$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io
302# Repair the image
303_check_test_img -r all
304
305echo
306echo "=== Testing zero refcount table size ==="
307echo
308_make_test_img 64M
309poke_file "$TEST_IMG" "56"                "\x00\x00\x00\x00"
310$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
311# Repair the image
312_check_test_img -r all
313
314echo
315echo "=== Testing incorrect refcount table offset ==="
316echo
317_make_test_img 64M
318poke_file "$TEST_IMG" "48"                "\x00\x00\x00\x00\x00\x00\x00\x00"
319$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
320
321echo
322echo "=== Testing dirty corrupt image ==="
323echo
324
325_make_test_img 64M
326
327# Let the refblock appear unaligned
328poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\xff\xff\x2a\x00"
329# Mark the image dirty, thus forcing an automatic check when opening it
330poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01"
331# Open the image (qemu should refuse to do so)
332$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
333
334echo '--- Repairing ---'
335
336# The actual repair should have happened (because of the dirty bit),
337# but some cleanup may have failed (like freeing the old reftable)
338# because the image was already marked corrupt by that point
339_check_test_img -r all
340
341echo
342echo "=== Writing to an unaligned preallocated zero cluster ==="
343echo
344
345_make_test_img 64M
346
347# Allocate the L2 table
348$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
349# Pretend there is a preallocated zero cluster somewhere inside the
350# image header
351poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01"
352# Let's write to it!
353$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
354
355echo '--- Repairing ---'
356_check_test_img -r all
357
358echo
359echo '=== Discarding with an unaligned refblock ==='
360echo
361
362_make_test_img 64M
363
364$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
365# Make our refblock unaligned
366poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00"
367# Now try to discard something that will be submitted as two requests
368# (main part + tail)
369$QEMU_IO -c "discard 0 65537" "$TEST_IMG"
370
371echo '--- Repairing ---'
372# Fails the first repair because the corruption prevents the check
373# function from double-checking
374# (Using -q for the first invocation, because otherwise the
375#  double-check error message appears above the summary for some
376#  reason -- so let's just hide the summary)
377_check_test_img -q -r all
378_check_test_img -r all
379
380echo
381echo "=== Discarding an out-of-bounds refblock ==="
382echo
383
384_make_test_img 64M
385
386# Pretend there's a refblock really up high
387poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00"
388# Let's try to shrink the qcow2 image so that the block driver tries
389# to discard that refblock (and see what happens!)
390$QEMU_IMG resize --shrink "$TEST_IMG" 32M
391
392echo '--- Checking and retrying ---'
393# Image should not be resized
394_img_info | grep 'virtual size'
395# But it should pass this check, because the "partial" resize has
396# already overwritten refblocks past the end
397_check_test_img -r all
398# So let's try again
399$QEMU_IMG resize --shrink "$TEST_IMG" 32M
400_img_info | grep 'virtual size'
401
402echo
403echo "=== Discarding a non-covered in-bounds refblock ==="
404echo
405
406_make_test_img -o 'refcount_bits=1' 64M
407
408# Pretend there's a refblock somewhere where there is no refblock to
409# cover it (but the covering refblock has a valid index in the
410# reftable)
411# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point
412# to 0x10_0000_0000 (64G) to point to the third refblock
413poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
414$QEMU_IMG resize --shrink "$TEST_IMG" 32M
415
416echo '--- Checking and retrying ---'
417# Image should not be resized
418_img_info | grep 'virtual size'
419# But it should pass this check, because the "partial" resize has
420# already overwritten refblocks past the end
421_check_test_img -r all
422# So let's try again
423$QEMU_IMG resize --shrink "$TEST_IMG" 32M
424_img_info | grep 'virtual size'
425
426echo
427echo "=== Discarding a refblock covered by an unaligned refblock ==="
428echo
429
430_make_test_img -o 'refcount_bits=1' 64M
431
432# Same as above
433poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
434# But now we actually "create" an unaligned third refblock
435poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00"
436$QEMU_IMG resize --shrink "$TEST_IMG" 32M
437
438echo '--- Repairing ---'
439# Fails the first repair because the corruption prevents the check
440# function from double-checking
441# (Using -q for the first invocation, because otherwise the
442#  double-check error message appears above the summary for some
443#  reason -- so let's just hide the summary)
444_check_test_img -q -r all
445_check_test_img -r all
446
447echo
448echo "=== Testing the QEMU shutdown with a corrupted image ==="
449echo
450_make_test_img 64M
451poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
452echo "{'execute': 'qmp_capabilities'}
453      {'execute': 'human-monitor-command',
454       'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}}
455      {'execute': 'quit'}" \
456    | $QEMU -qmp stdio -nographic -nodefaults \
457            -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \
458    | _filter_qmp | _filter_qemu_io
459
460echo
461echo "=== Testing incoming inactive corrupted image ==="
462echo
463
464_make_test_img 64M
465# Create an unaligned L1 entry, so qemu will signal a corruption when
466# reading from the covered area
467poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a"
468
469# Inactive images are effectively read-only images, so this should be a
470# non-fatal corruption (which does not modify the image)
471echo "{'execute': 'qmp_capabilities'}
472      {'execute': 'human-monitor-command',
473       'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}}
474      {'execute': 'quit'}" \
475    | $QEMU -qmp stdio -nographic -nodefaults \
476            -blockdev "{'node-name': 'drive',
477                        'driver': 'qcow2',
478                        'file': {
479                            'driver': 'file',
480                            'filename': '$TEST_IMG'
481                        }}" \
482            -incoming exec:'cat /dev/null' \
483            2>&1 \
484    | _filter_qmp | _filter_qemu_io | _filter_io_error
485
486echo
487# Image should not have been marked corrupt
488_img_info --format-specific | grep 'corrupt:'
489
490# success, all done
491echo "*** done"
492rm -f $seq.full
493status=0
494