xref: /qemu/tests/qemu-iotests/060 (revision ab9056ff)
1#!/usr/bin/env bash
2#
3# Test case for image corruption (overlapping data structures) in qcow2
4#
5# Copyright (C) 2013 Red Hat, Inc.
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program.  If not, see <http://www.gnu.org/licenses/>.
19#
20
21# creator
22owner=mreitz@redhat.com
23
24seq="$(basename $0)"
25echo "QA output created by $seq"
26
27status=1	# failure is the default!
28
29_cleanup()
30{
31	_cleanup_test_img
32}
33trap "_cleanup; exit \$status" 0 1 2 3 15
34
35# Sometimes the error line might be dumped before/after an event
36# randomly.  Mask it out for specific test that may trigger this
37# uncertainty for current test for now.
38_filter_io_error()
39{
40    sed '/Input\/output error/d'
41}
42
43# get standard environment, filters and checks
44. ./common.rc
45. ./common.filter
46
47# This tests qocw2-specific low-level functionality
48_supported_fmt qcow2
49_supported_proto file
50_supported_os Linux
51
52rt_offset=65536  # 0x10000 (XXX: just an assumption)
53rb_offset=131072 # 0x20000 (XXX: just an assumption)
54l1_offset=196608 # 0x30000 (XXX: just an assumption)
55l2_offset=262144 # 0x40000 (XXX: just an assumption)
56l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption)
57
58IMGOPTS="compat=1.1"
59
60OPEN_RW="open -o overlap-check=all $TEST_IMG"
61# Overlap checks are done before write operations only, therefore opening an
62# image read-only makes the overlap-check option irrelevant
63OPEN_RO="open -r $TEST_IMG"
64
65echo
66echo "=== Testing L2 reference into L1 ==="
67echo
68_make_test_img 64M
69# Link first L1 entry (first L2 table) onto itself
70# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any
71# later write will result in a COW operation, effectively ruining this attempt
72# on image corruption)
73poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
74_check_test_img
75
76# The corrupt bit should not be set anyway
77$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
78
79# Try to write something, thereby forcing the corrupt bit to be set
80$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
81
82# The corrupt bit must now be set
83$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
84
85# This information should be available through qemu-img info
86_img_info --format-specific
87
88# Try to open the image R/W (which should fail)
89$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
90                                            | _filter_testdir \
91                                            | _filter_imgfmt
92
93# Try to open it RO (which should succeed)
94$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io
95
96# We could now try to fix the image, but this would probably fail (how should an
97# L2 table linked onto the L1 table be fixed?)
98
99echo
100echo "=== Testing cluster data reference into refcount block ==="
101echo
102_make_test_img 64M
103# Allocate L2 table
104truncate -s "$(($l2_offset+65536))" "$TEST_IMG"
105poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00"
106# Mark cluster as used
107poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
108# Redirect new data cluster onto refcount block
109poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
110_check_test_img
111$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
112$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
113$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
114
115# Try to fix it
116_check_test_img -r all
117
118# The corrupt bit should be cleared
119$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
120
121# Look if it's really really fixed
122$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
123$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
124
125echo
126echo "=== Testing cluster data reference into inactive L2 table ==="
127echo
128_make_test_img 64M
129$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io
130$QEMU_IMG snapshot -c foo "$TEST_IMG"
131$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io
132# The inactive L2 table remains at its old offset
133poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \
134                      "\x80\x00\x00\x00\x00\x04\x00\x00"
135_check_test_img
136$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
137$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
138$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
139_check_test_img -r all
140$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
141$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
142$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
143
144# Check data
145$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
146$QEMU_IMG snapshot -a foo "$TEST_IMG"
147_check_test_img
148$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
149
150echo
151echo "=== Testing overlap while COW is in flight ==="
152echo
153BACKING_IMG=$TEST_IMG.base
154TEST_IMG=$BACKING_IMG _make_test_img 1G
155
156$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io
157
158# compat=0.10 is required in order to make the following discard actually
159# unallocate the sector rather than make it a zero sector - we want COW, after
160# all.
161IMGOPTS='compat=0.10' _make_test_img -b "$BACKING_IMG" 1G
162# Write two clusters, the second one enforces creation of an L2 table after
163# the first data cluster.
164$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
165# Discard the first cluster. This cluster will soon enough be reallocated and
166# used for COW.
167$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io
168# Now, corrupt the image by marking the second L2 table cluster as free.
169poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c
170# Start a write operation requiring COW on the image stopping it right before
171# doing the read; then, trigger the corruption prevention by writing anything to
172# any unallocated cluster, leading to an attempt to overwrite the second L2
173# table. Finally, resume the COW write and see it fail (but not crash).
174echo "open -o file.driver=blkdebug $TEST_IMG
175break cow_read 0
176aio_write 0k 1k
177wait_break 0
178write 64k 64k
179resume 0" | $QEMU_IO | _filter_qemu_io
180
181echo
182echo "=== Testing unallocated image header ==="
183echo
184_make_test_img 64M
185# Create L1/L2
186$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
187poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
188$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
189
190echo
191echo "=== Testing unaligned L1 entry ==="
192echo
193_make_test_img 64M
194$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
195# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are
196# aligned or not does not matter
197poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
198$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
199
200# Test how well zero cluster expansion can cope with this
201_make_test_img 64M
202$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
203poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
204$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
205
206echo
207echo "=== Testing unaligned L2 entry ==="
208echo
209_make_test_img 64M
210$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
211poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
212$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
213
214echo
215echo "=== Testing unaligned pre-allocated zero cluster ==="
216echo
217_make_test_img 64M
218$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
219poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01"
220# zero cluster expansion
221$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
222
223echo
224echo "=== Testing unaligned reftable entry ==="
225echo
226_make_test_img 64M
227poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00"
228$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
229
230echo
231echo "=== Testing non-fatal corruption on freeing ==="
232echo
233_make_test_img 64M
234$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
235poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
236$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
237
238echo
239echo "=== Testing read-only corruption report ==="
240echo
241_make_test_img 64M
242$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
243poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
244# Should only emit a single error message
245$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io
246
247echo
248echo "=== Testing non-fatal and then fatal corruption report ==="
249echo
250_make_test_img 64M
251$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
252poke_file "$TEST_IMG" "$l2_offset"        "\x80\x00\x00\x00\x00\x05\x2a\x00"
253poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00"
254# Should emit two error messages
255$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io
256
257echo
258echo "=== Testing empty refcount table ==="
259echo
260_make_test_img 64M
261poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
262$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
263# Repair the image
264_check_test_img -r all
265
266echo
267echo "=== Testing empty refcount table with valid L1 and L2 tables ==="
268echo
269_make_test_img 64M
270$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
271poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
272# Since the first data cluster is already allocated this triggers an
273# allocation with an explicit offset (using qcow2_alloc_clusters_at())
274# causing a refcount block to be allocated at offset 0
275$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
276# Repair the image
277_check_test_img -r all
278
279echo
280echo "=== Testing empty refcount block ==="
281echo
282_make_test_img 64M
283poke_file "$TEST_IMG" "$rb_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
284$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
285# Repair the image
286_check_test_img -r all
287
288echo
289echo "=== Testing empty refcount block with compressed write ==="
290echo
291_make_test_img 64M
292$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
293poke_file "$TEST_IMG" "$rb_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
294# The previous write already allocated an L2 table, so now this new
295# write will try to allocate a compressed data cluster at offset 0.
296$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io
297# Repair the image
298_check_test_img -r all
299
300echo
301echo "=== Testing zero refcount table size ==="
302echo
303_make_test_img 64M
304poke_file "$TEST_IMG" "56"                "\x00\x00\x00\x00"
305$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
306# Repair the image
307_check_test_img -r all
308
309echo
310echo "=== Testing incorrect refcount table offset ==="
311echo
312_make_test_img 64M
313poke_file "$TEST_IMG" "48"                "\x00\x00\x00\x00\x00\x00\x00\x00"
314$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
315
316echo
317echo "=== Testing dirty corrupt image ==="
318echo
319
320_make_test_img 64M
321
322# Let the refblock appear unaligned
323poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\xff\xff\x2a\x00"
324# Mark the image dirty, thus forcing an automatic check when opening it
325poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01"
326# Open the image (qemu should refuse to do so)
327$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
328
329echo '--- Repairing ---'
330
331# The actual repair should have happened (because of the dirty bit),
332# but some cleanup may have failed (like freeing the old reftable)
333# because the image was already marked corrupt by that point
334_check_test_img -r all
335
336echo
337echo "=== Writing to an unaligned preallocated zero cluster ==="
338echo
339
340_make_test_img 64M
341
342# Allocate the L2 table
343$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
344# Pretend there is a preallocated zero cluster somewhere inside the
345# image header
346poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01"
347# Let's write to it!
348$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
349
350echo '--- Repairing ---'
351_check_test_img -r all
352
353echo
354echo '=== Discarding with an unaligned refblock ==='
355echo
356
357_make_test_img 64M
358
359$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
360# Make our refblock unaligned
361poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00"
362# Now try to discard something that will be submitted as two requests
363# (main part + tail)
364$QEMU_IO -c "discard 0 65537" "$TEST_IMG"
365
366echo '--- Repairing ---'
367# Fails the first repair because the corruption prevents the check
368# function from double-checking
369# (Using -q for the first invocation, because otherwise the
370#  double-check error message appears above the summary for some
371#  reason -- so let's just hide the summary)
372_check_test_img -q -r all
373_check_test_img -r all
374
375echo
376echo "=== Discarding an out-of-bounds refblock ==="
377echo
378
379_make_test_img 64M
380
381# Pretend there's a refblock really up high
382poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00"
383# Let's try to shrink the qcow2 image so that the block driver tries
384# to discard that refblock (and see what happens!)
385$QEMU_IMG resize --shrink "$TEST_IMG" 32M
386
387echo '--- Checking and retrying ---'
388# Image should not be resized
389_img_info | grep 'virtual size'
390# But it should pass this check, because the "partial" resize has
391# already overwritten refblocks past the end
392_check_test_img -r all
393# So let's try again
394$QEMU_IMG resize --shrink "$TEST_IMG" 32M
395_img_info | grep 'virtual size'
396
397echo
398echo "=== Discarding a non-covered in-bounds refblock ==="
399echo
400
401IMGOPTS='refcount_bits=1' _make_test_img 64M
402
403# Pretend there's a refblock somewhere where there is no refblock to
404# cover it (but the covering refblock has a valid index in the
405# reftable)
406# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point
407# to 0x10_0000_0000 (64G) to point to the third refblock
408poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
409$QEMU_IMG resize --shrink "$TEST_IMG" 32M
410
411echo '--- Checking and retrying ---'
412# Image should not be resized
413_img_info | grep 'virtual size'
414# But it should pass this check, because the "partial" resize has
415# already overwritten refblocks past the end
416_check_test_img -r all
417# So let's try again
418$QEMU_IMG resize --shrink "$TEST_IMG" 32M
419_img_info | grep 'virtual size'
420
421echo
422echo "=== Discarding a refblock covered by an unaligned refblock ==="
423echo
424
425IMGOPTS='refcount_bits=1' _make_test_img 64M
426
427# Same as above
428poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
429# But now we actually "create" an unaligned third refblock
430poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00"
431$QEMU_IMG resize --shrink "$TEST_IMG" 32M
432
433echo '--- Repairing ---'
434# Fails the first repair because the corruption prevents the check
435# function from double-checking
436# (Using -q for the first invocation, because otherwise the
437#  double-check error message appears above the summary for some
438#  reason -- so let's just hide the summary)
439_check_test_img -q -r all
440_check_test_img -r all
441
442echo
443echo "=== Testing the QEMU shutdown with a corrupted image ==="
444echo
445_make_test_img 64M
446poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
447echo "{'execute': 'qmp_capabilities'}
448      {'execute': 'human-monitor-command',
449       'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}}
450      {'execute': 'quit'}" \
451    | $QEMU -qmp stdio -nographic -nodefaults \
452            -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \
453    | _filter_qmp | _filter_qemu_io
454
455echo
456echo "=== Testing incoming inactive corrupted image ==="
457echo
458
459_make_test_img 64M
460# Create an unaligned L1 entry, so qemu will signal a corruption when
461# reading from the covered area
462poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a"
463
464# Inactive images are effectively read-only images, so this should be a
465# non-fatal corruption (which does not modify the image)
466echo "{'execute': 'qmp_capabilities'}
467      {'execute': 'human-monitor-command',
468       'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}}
469      {'execute': 'quit'}" \
470    | $QEMU -qmp stdio -nographic -nodefaults \
471            -blockdev "{'node-name': 'drive',
472                        'driver': 'qcow2',
473                        'file': {
474                            'driver': 'file',
475                            'filename': '$TEST_IMG'
476                        }}" \
477            -incoming exec:'cat /dev/null' \
478            2>&1 \
479    | _filter_qmp | _filter_qemu_io | _filter_io_error
480
481echo
482# Image should not have been marked corrupt
483_img_info --format-specific | grep 'corrupt:'
484
485# success, all done
486echo "*** done"
487rm -f $seq.full
488status=0
489