25
#include "qemu/osdep.h"
28
#include "block/block-io.h"
29
#include "qapi/error.h"
31
#include "qemu/bswap.h"
32
#include "qemu/memalign.h"
35
int coroutine_fn qcow2_shrink_l1_table(BlockDriverState *bs,
38
BDRVQcow2State *s = bs->opaque;
39
int new_l1_size, i, ret;
41
if (exact_size >= s->l1_size) {
45
new_l1_size = exact_size;
48
fprintf(stderr, "shrink l1_table from %d to %d\n", s->l1_size, new_l1_size);
51
BLKDBG_CO_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE);
52
ret = bdrv_co_pwrite_zeroes(bs->file,
53
s->l1_table_offset + new_l1_size * L1E_SIZE,
54
(s->l1_size - new_l1_size) * L1E_SIZE, 0);
59
ret = bdrv_co_flush(bs->file->bs);
64
BLKDBG_CO_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS);
65
for (i = s->l1_size - 1; i > new_l1_size - 1; i--) {
66
if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) {
69
qcow2_free_clusters(bs, s->l1_table[i] & L1E_OFFSET_MASK,
70
s->cluster_size, QCOW2_DISCARD_ALWAYS);
81
memset(s->l1_table + new_l1_size, 0,
82
(s->l1_size - new_l1_size) * L1E_SIZE);
86
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
89
BDRVQcow2State *s = bs->opaque;
90
int new_l1_size2, ret, i;
91
uint64_t *new_l1_table;
92
int64_t old_l1_table_offset, old_l1_size;
93
int64_t new_l1_table_offset, new_l1_size;
96
if (min_size <= s->l1_size)
102
if (min_size > INT_MAX / L1E_SIZE) {
107
new_l1_size = min_size;
110
new_l1_size = s->l1_size;
111
if (new_l1_size == 0) {
114
while (min_size > new_l1_size) {
115
new_l1_size = DIV_ROUND_UP(new_l1_size * 3, 2);
119
QEMU_BUILD_BUG_ON(QCOW_MAX_L1_SIZE > INT_MAX);
120
if (new_l1_size > QCOW_MAX_L1_SIZE / L1E_SIZE) {
125
fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n",
126
s->l1_size, new_l1_size);
129
new_l1_size2 = L1E_SIZE * new_l1_size;
130
new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_size2);
131
if (new_l1_table == NULL) {
134
memset(new_l1_table, 0, new_l1_size2);
137
memcpy(new_l1_table, s->l1_table, s->l1_size * L1E_SIZE);
141
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
142
new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
143
if (new_l1_table_offset < 0) {
144
qemu_vfree(new_l1_table);
145
return new_l1_table_offset;
148
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
155
ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset,
156
new_l1_size2, false);
161
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
162
for(i = 0; i < s->l1_size; i++)
163
new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
164
ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_size2,
168
for(i = 0; i < s->l1_size; i++)
169
new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
172
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
173
stl_be_p(data, new_l1_size);
174
stq_be_p(data + 4, new_l1_table_offset);
175
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size),
176
sizeof(data), data, 0);
180
qemu_vfree(s->l1_table);
181
old_l1_table_offset = s->l1_table_offset;
182
s->l1_table_offset = new_l1_table_offset;
183
s->l1_table = new_l1_table;
184
old_l1_size = s->l1_size;
185
s->l1_size = new_l1_size;
186
qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * L1E_SIZE,
187
QCOW2_DISCARD_OTHER);
190
qemu_vfree(new_l1_table);
191
qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
192
QCOW2_DISCARD_OTHER);
210
static int GRAPH_RDLOCK
211
l2_load(BlockDriverState *bs, uint64_t offset,
212
uint64_t l2_offset, uint64_t **l2_slice)
214
BDRVQcow2State *s = bs->opaque;
215
int start_of_slice = l2_entry_size(s) *
216
(offset_to_l2_index(s, offset) - offset_to_l2_slice_index(s, offset));
218
return qcow2_cache_get(bs, s->l2_table_cache, l2_offset + start_of_slice,
227
int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
229
BDRVQcow2State *s = bs->opaque;
232
int bufsize = MAX(L1E_SIZE,
233
MIN(bs->file->bs->bl.request_alignment, s->cluster_size));
234
int nentries = bufsize / L1E_SIZE;
235
g_autofree uint64_t *buf = g_try_new0(uint64_t, nentries);
241
l1_start_index = QEMU_ALIGN_DOWN(l1_index, nentries);
242
for (i = 0; i < MIN(nentries, s->l1_size - l1_start_index); i++) {
243
buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
246
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
247
s->l1_table_offset + L1E_SIZE * l1_start_index, bufsize, false);
252
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
253
ret = bdrv_pwrite_sync(bs->file,
254
s->l1_table_offset + L1E_SIZE * l1_start_index,
273
static int GRAPH_RDLOCK l2_allocate(BlockDriverState *bs, int l1_index)
275
BDRVQcow2State *s = bs->opaque;
276
uint64_t old_l2_offset;
277
uint64_t *l2_slice = NULL;
278
unsigned slice, slice_size2, n_slices;
282
old_l2_offset = s->l1_table[l1_index];
284
trace_qcow2_l2_allocate(bs, l1_index);
288
l2_offset = qcow2_alloc_clusters(bs, s->l2_size * l2_entry_size(s));
295
assert((l2_offset & L1E_OFFSET_MASK) == l2_offset);
298
if (l2_offset == 0) {
299
qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
300
"allocation of L2 table at offset 0");
305
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
312
slice_size2 = s->l2_slice_size * l2_entry_size(s);
313
n_slices = s->cluster_size / slice_size2;
315
trace_qcow2_l2_allocate_get_empty(bs, l1_index);
316
for (slice = 0; slice < n_slices; slice++) {
317
ret = qcow2_cache_get_empty(bs, s->l2_table_cache,
318
l2_offset + slice * slice_size2,
319
(void **) &l2_slice);
324
if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
326
memset(l2_slice, 0, slice_size2);
329
uint64_t old_l2_slice_offset =
330
(old_l2_offset & L1E_OFFSET_MASK) + slice * slice_size2;
333
BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
334
ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_slice_offset,
335
(void **) &old_slice);
340
memcpy(l2_slice, old_slice, slice_size2);
342
qcow2_cache_put(s->l2_table_cache, (void **) &old_slice);
346
BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
348
trace_qcow2_l2_allocate_write_l2(bs, l1_index);
349
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
350
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
353
ret = qcow2_cache_flush(bs, s->l2_table_cache);
359
trace_qcow2_l2_allocate_write_l1(bs, l1_index);
360
s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
361
ret = qcow2_write_l1_entry(bs, l1_index);
366
trace_qcow2_l2_allocate_done(bs, l1_index, 0);
370
trace_qcow2_l2_allocate_done(bs, l1_index, ret);
371
if (l2_slice != NULL) {
372
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
374
s->l1_table[l1_index] = old_l2_offset;
376
qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s),
377
QCOW2_DISCARD_ALWAYS);
394
static int GRAPH_RDLOCK
395
qcow2_get_subcluster_range_type(BlockDriverState *bs, uint64_t l2_entry,
396
uint64_t l2_bitmap, unsigned sc_from,
397
QCow2SubclusterType *type)
399
BDRVQcow2State *s = bs->opaque;
402
*type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_from);
404
if (*type == QCOW2_SUBCLUSTER_INVALID) {
406
} else if (!has_subclusters(s) || *type == QCOW2_SUBCLUSTER_COMPRESSED) {
407
return s->subclusters_per_cluster - sc_from;
411
case QCOW2_SUBCLUSTER_NORMAL:
412
val = l2_bitmap | QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from);
413
return cto32(val) - sc_from;
415
case QCOW2_SUBCLUSTER_ZERO_PLAIN:
416
case QCOW2_SUBCLUSTER_ZERO_ALLOC:
417
val = (l2_bitmap | QCOW_OFLAG_SUB_ZERO_RANGE(0, sc_from)) >> 32;
418
return cto32(val) - sc_from;
420
case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
421
case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
422
val = ((l2_bitmap >> 32) | l2_bitmap)
423
& ~QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from);
424
return ctz32(val) - sc_from;
427
g_assert_not_reached();
444
static int GRAPH_RDLOCK
445
count_contiguous_subclusters(BlockDriverState *bs, int nb_clusters,
446
unsigned sc_index, uint64_t *l2_slice,
449
BDRVQcow2State *s = bs->opaque;
451
bool check_offset = false;
452
uint64_t expected_offset = 0;
453
QCow2SubclusterType expected_type = QCOW2_SUBCLUSTER_NORMAL, type;
455
assert(*l2_index + nb_clusters <= s->l2_slice_size);
457
for (i = 0; i < nb_clusters; i++) {
458
unsigned first_sc = (i == 0) ? sc_index : 0;
459
uint64_t l2_entry = get_l2_entry(s, l2_slice, *l2_index + i);
460
uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, *l2_index + i);
461
int ret = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap,
468
if (type == QCOW2_SUBCLUSTER_COMPRESSED) {
472
expected_type = type;
473
expected_offset = l2_entry & L2E_OFFSET_MASK;
474
check_offset = (type == QCOW2_SUBCLUSTER_NORMAL ||
475
type == QCOW2_SUBCLUSTER_ZERO_ALLOC ||
476
type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC);
477
} else if (type != expected_type) {
479
} else if (check_offset) {
480
expected_offset += s->cluster_size;
481
if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) {
487
if (first_sc + ret < s->subclusters_per_cluster) {
495
static int coroutine_fn GRAPH_RDLOCK
496
do_perform_cow_read(BlockDriverState *bs, uint64_t src_cluster_offset,
497
unsigned offset_in_cluster, QEMUIOVector *qiov)
501
if (qiov->size == 0) {
505
BLKDBG_CO_EVENT(bs->file, BLKDBG_COW_READ);
517
assert(src_cluster_offset <= INT64_MAX);
518
assert(src_cluster_offset + offset_in_cluster <= INT64_MAX);
520
assert((uint64_t)qiov->size <= INT64_MAX);
521
bdrv_check_qiov_request(src_cluster_offset + offset_in_cluster, qiov->size,
522
qiov, 0, &error_abort);
528
ret = bs->drv->bdrv_co_preadv_part(bs,
529
src_cluster_offset + offset_in_cluster,
530
qiov->size, qiov, 0, 0);
538
static int coroutine_fn GRAPH_RDLOCK
539
do_perform_cow_write(BlockDriverState *bs, uint64_t cluster_offset,
540
unsigned offset_in_cluster, QEMUIOVector *qiov)
542
BDRVQcow2State *s = bs->opaque;
545
if (qiov->size == 0) {
549
ret = qcow2_pre_write_overlap_check(bs, 0,
550
cluster_offset + offset_in_cluster, qiov->size, true);
555
BLKDBG_CO_EVENT(bs->file, BLKDBG_COW_WRITE);
556
ret = bdrv_co_pwritev(s->data_file, cluster_offset + offset_in_cluster,
557
qiov->size, qiov, 0);
586
int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset,
587
unsigned int *bytes, uint64_t *host_offset,
588
QCow2SubclusterType *subcluster_type)
590
BDRVQcow2State *s = bs->opaque;
591
unsigned int l2_index, sc_index;
592
uint64_t l1_index, l2_offset, *l2_slice, l2_entry, l2_bitmap;
594
unsigned int offset_in_cluster;
595
uint64_t bytes_available, bytes_needed, nb_clusters;
596
QCow2SubclusterType type;
599
offset_in_cluster = offset_into_cluster(s, offset);
600
bytes_needed = (uint64_t) *bytes + offset_in_cluster;
606
((uint64_t) (s->l2_slice_size - offset_to_l2_slice_index(s, offset)))
609
if (bytes_needed > bytes_available) {
610
bytes_needed = bytes_available;
617
l1_index = offset_to_l1_index(s, offset);
618
if (l1_index >= s->l1_size) {
619
type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN;
623
l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
625
type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN;
629
if (offset_into_cluster(s, l2_offset)) {
630
qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64
631
" unaligned (L1 index: %#" PRIx64 ")",
632
l2_offset, l1_index);
638
ret = l2_load(bs, offset, l2_offset, &l2_slice);
645
l2_index = offset_to_l2_slice_index(s, offset);
646
sc_index = offset_to_sc_index(s, offset);
647
l2_entry = get_l2_entry(s, l2_slice, l2_index);
648
l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index);
650
nb_clusters = size_to_clusters(s, bytes_needed);
654
assert(nb_clusters <= INT_MAX);
656
type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index);
657
if (s->qcow_version < 3 && (type == QCOW2_SUBCLUSTER_ZERO_PLAIN ||
658
type == QCOW2_SUBCLUSTER_ZERO_ALLOC)) {
659
qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
660
" in pre-v3 image (L2 offset: %#" PRIx64
661
", L2 index: %#x)", l2_offset, l2_index);
666
case QCOW2_SUBCLUSTER_INVALID:
668
case QCOW2_SUBCLUSTER_COMPRESSED:
669
if (has_data_file(bs)) {
670
qcow2_signal_corruption(bs, true, -1, -1, "Compressed cluster "
671
"entry found in image with external data "
672
"file (L2 offset: %#" PRIx64 ", L2 index: "
673
"%#x)", l2_offset, l2_index);
677
*host_offset = l2_entry;
679
case QCOW2_SUBCLUSTER_ZERO_PLAIN:
680
case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
682
case QCOW2_SUBCLUSTER_ZERO_ALLOC:
683
case QCOW2_SUBCLUSTER_NORMAL:
684
case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: {
685
uint64_t host_cluster_offset = l2_entry & L2E_OFFSET_MASK;
686
*host_offset = host_cluster_offset + offset_in_cluster;
687
if (offset_into_cluster(s, host_cluster_offset)) {
688
qcow2_signal_corruption(bs, true, -1, -1,
689
"Cluster allocation offset %#"
690
PRIx64 " unaligned (L2 offset: %#" PRIx64
691
", L2 index: %#x)", host_cluster_offset,
692
l2_offset, l2_index);
696
if (has_data_file(bs) && *host_offset != offset) {
697
qcow2_signal_corruption(bs, true, -1, -1,
698
"External data file host cluster offset %#"
699
PRIx64 " does not match guest cluster "
701
", L2 index: %#x)", host_cluster_offset,
702
offset - offset_in_cluster, l2_index);
712
sc = count_contiguous_subclusters(bs, nb_clusters, sc_index,
713
l2_slice, &l2_index);
715
qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster entry found "
716
" (L2 offset: %#" PRIx64 ", L2 index: %#x)",
717
l2_offset, l2_index);
721
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
723
bytes_available = ((int64_t)sc + sc_index) << s->subcluster_bits;
726
if (bytes_available > bytes_needed) {
727
bytes_available = bytes_needed;
733
assert(bytes_available - offset_in_cluster <= UINT_MAX);
734
*bytes = bytes_available - offset_in_cluster;
736
*subcluster_type = type;
741
qcow2_cache_put(s->l2_table_cache, (void **)&l2_slice);
755
static int GRAPH_RDLOCK
756
get_cluster_table(BlockDriverState *bs, uint64_t offset,
757
uint64_t **new_l2_slice, int *new_l2_index)
759
BDRVQcow2State *s = bs->opaque;
760
unsigned int l2_index;
761
uint64_t l1_index, l2_offset;
762
uint64_t *l2_slice = NULL;
767
l1_index = offset_to_l1_index(s, offset);
768
if (l1_index >= s->l1_size) {
769
ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
775
assert(l1_index < s->l1_size);
776
l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
777
if (offset_into_cluster(s, l2_offset)) {
778
qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64
779
" unaligned (L1 index: %#" PRIx64 ")",
780
l2_offset, l1_index);
784
if (!(s->l1_table[l1_index] & QCOW_OFLAG_COPIED)) {
786
ret = l2_allocate(bs, l1_index);
793
qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s),
794
QCOW2_DISCARD_OTHER);
798
l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
799
assert(offset_into_cluster(s, l2_offset) == 0);
803
ret = l2_load(bs, offset, l2_offset, &l2_slice);
810
l2_index = offset_to_l2_slice_index(s, offset);
812
*new_l2_slice = l2_slice;
813
*new_l2_index = l2_index;
827
int coroutine_fn GRAPH_RDLOCK
828
qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset,
829
int compressed_size, uint64_t *host_offset)
831
BDRVQcow2State *s = bs->opaque;
834
int64_t cluster_offset;
837
if (has_data_file(bs)) {
841
ret = get_cluster_table(bs, offset, &l2_slice, &l2_index);
848
cluster_offset = get_l2_entry(s, l2_slice, l2_index);
849
if (cluster_offset & L2E_OFFSET_MASK) {
850
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
854
cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
855
if (cluster_offset < 0) {
856
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
857
return cluster_offset;
861
(cluster_offset + compressed_size - 1) / QCOW2_COMPRESSED_SECTOR_SIZE -
862
(cluster_offset / QCOW2_COMPRESSED_SECTOR_SIZE);
865
assert((cluster_offset & s->cluster_offset_mask) == cluster_offset);
866
assert((nb_csectors & s->csize_mask) == nb_csectors);
868
cluster_offset |= QCOW_OFLAG_COMPRESSED |
869
((uint64_t)nb_csectors << s->csize_shift);
875
BLKDBG_CO_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
876
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
877
set_l2_entry(s, l2_slice, l2_index, cluster_offset);
878
if (has_subclusters(s)) {
879
set_l2_bitmap(s, l2_slice, l2_index, 0);
881
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
883
*host_offset = cluster_offset & s->cluster_offset_mask;
887
static int coroutine_fn GRAPH_RDLOCK
888
perform_cow(BlockDriverState *bs, QCowL2Meta *m)
890
BDRVQcow2State *s = bs->opaque;
891
Qcow2COWRegion *start = &m->cow_start;
892
Qcow2COWRegion *end = &m->cow_end;
893
unsigned buffer_size;
894
unsigned data_bytes = end->offset - (start->offset + start->nb_bytes);
896
uint8_t *start_buffer, *end_buffer;
900
assert(start->nb_bytes <= UINT_MAX - end->nb_bytes);
901
assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes);
902
assert(start->offset + start->nb_bytes <= end->offset);
904
if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) {
911
merge_reads = start->nb_bytes && end->nb_bytes && data_bytes <= 16384;
913
buffer_size = start->nb_bytes + data_bytes + end->nb_bytes;
918
size_t align = bdrv_opt_mem_align(bs);
919
assert(align > 0 && align <= UINT_MAX);
920
assert(QEMU_ALIGN_UP(start->nb_bytes, align) <=
921
UINT_MAX - end->nb_bytes);
922
buffer_size = QEMU_ALIGN_UP(start->nb_bytes, align) + end->nb_bytes;
927
start_buffer = qemu_try_blockalign(bs, buffer_size);
928
if (start_buffer == NULL) {
932
end_buffer = start_buffer + buffer_size - end->nb_bytes;
934
qemu_iovec_init(&qiov, 2 + (m->data_qiov ?
935
qemu_iovec_subvec_niov(m->data_qiov,
940
qemu_co_mutex_unlock(&s->lock);
945
qemu_iovec_add(&qiov, start_buffer, buffer_size);
946
ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov);
948
qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
949
ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov);
954
qemu_iovec_reset(&qiov);
955
qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
956
ret = do_perform_cow_read(bs, m->offset, end->offset, &qiov);
964
ret = qcow2_co_encrypt(bs,
965
m->alloc_offset + start->offset,
966
m->offset + start->offset,
967
start_buffer, start->nb_bytes);
972
ret = qcow2_co_encrypt(bs,
973
m->alloc_offset + end->offset,
974
m->offset + end->offset,
975
end_buffer, end->nb_bytes);
984
qemu_iovec_reset(&qiov);
985
if (start->nb_bytes) {
986
qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
988
qemu_iovec_concat(&qiov, m->data_qiov, m->data_qiov_offset, data_bytes);
990
qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
995
BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
996
ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
999
qemu_iovec_reset(&qiov);
1000
qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
1001
ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
1006
qemu_iovec_reset(&qiov);
1007
qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
1008
ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov);
1012
qemu_co_mutex_lock(&s->lock);
1020
qcow2_cache_depends_on_flush(s->l2_table_cache);
1023
qemu_vfree(start_buffer);
1024
qemu_iovec_destroy(&qiov);
1028
int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs,
1031
BDRVQcow2State *s = bs->opaque;
1032
int i, j = 0, l2_index, ret;
1033
uint64_t *old_cluster, *l2_slice;
1034
uint64_t cluster_offset = m->alloc_offset;
1036
trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
1037
assert(m->nb_clusters > 0);
1039
old_cluster = g_try_new(uint64_t, m->nb_clusters);
1040
if (old_cluster == NULL) {
1046
ret = perform_cow(bs, m);
1052
if (s->use_lazy_refcounts) {
1053
qcow2_mark_dirty(bs);
1055
if (qcow2_need_accurate_refcounts(s)) {
1056
qcow2_cache_set_dependency(bs, s->l2_table_cache,
1057
s->refcount_block_cache);
1060
ret = get_cluster_table(bs, m->offset, &l2_slice, &l2_index);
1064
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
1066
assert(l2_index + m->nb_clusters <= s->l2_slice_size);
1067
assert(m->cow_end.offset + m->cow_end.nb_bytes <=
1068
m->nb_clusters << s->cluster_bits);
1069
for (i = 0; i < m->nb_clusters; i++) {
1070
uint64_t offset = cluster_offset + ((uint64_t)i << s->cluster_bits);
1077
if (get_l2_entry(s, l2_slice, l2_index + i) != 0) {
1078
old_cluster[j++] = get_l2_entry(s, l2_slice, l2_index + i);
1082
assert((offset & L2E_OFFSET_MASK) == offset);
1084
set_l2_entry(s, l2_slice, l2_index + i, offset | QCOW_OFLAG_COPIED);
1087
if (has_subclusters(s) && !m->prealloc) {
1088
uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i);
1089
unsigned written_from = m->cow_start.offset;
1090
unsigned written_to = m->cow_end.offset + m->cow_end.nb_bytes;
1091
int first_sc, last_sc;
1093
written_from = MAX(written_from, i << s->cluster_bits);
1094
written_to = MIN(written_to, (i + 1) << s->cluster_bits);
1095
assert(written_from < written_to);
1096
first_sc = offset_to_sc_index(s, written_from);
1097
last_sc = offset_to_sc_index(s, written_to - 1);
1098
l2_bitmap |= QCOW_OFLAG_SUB_ALLOC_RANGE(first_sc, last_sc + 1);
1099
l2_bitmap &= ~QCOW_OFLAG_SUB_ZERO_RANGE(first_sc, last_sc + 1);
1100
set_l2_bitmap(s, l2_slice, l2_index + i, l2_bitmap);
1105
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
1113
if (!m->keep_old_clusters && j != 0) {
1114
for (i = 0; i < j; i++) {
1115
qcow2_free_any_cluster(bs, old_cluster[i], QCOW2_DISCARD_NEVER);
1121
g_free(old_cluster);
1129
void coroutine_fn qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
1131
BDRVQcow2State *s = bs->opaque;
1132
if (!has_data_file(bs) && !m->keep_old_clusters) {
1133
qcow2_free_clusters(bs, m->alloc_offset,
1134
m->nb_clusters << s->cluster_bits,
1135
QCOW2_DISCARD_NEVER);
1159
static int coroutine_fn GRAPH_RDLOCK
1160
calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset,
1161
uint64_t guest_offset, unsigned bytes, uint64_t *l2_slice,
1162
QCowL2Meta **m, bool keep_old)
1164
BDRVQcow2State *s = bs->opaque;
1165
int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset);
1166
uint64_t l2_entry, l2_bitmap;
1167
unsigned cow_start_from, cow_end_to;
1168
unsigned cow_start_to = offset_into_cluster(s, guest_offset);
1169
unsigned cow_end_from = cow_start_to + bytes;
1170
unsigned nb_clusters = size_to_clusters(s, cow_end_from);
1171
QCowL2Meta *old_m = *m;
1172
QCow2SubclusterType type;
1174
bool skip_cow = keep_old;
1176
assert(nb_clusters <= s->l2_slice_size - l2_index);
1179
for (i = 0; i < nb_clusters; i++) {
1180
l2_entry = get_l2_entry(s, l2_slice, l2_index + i);
1181
l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i);
1183
unsigned write_from = MAX(cow_start_to, i << s->cluster_bits);
1184
unsigned write_to = MIN(cow_end_from, (i + 1) << s->cluster_bits);
1185
int first_sc = offset_to_sc_index(s, write_from);
1186
int last_sc = offset_to_sc_index(s, write_to - 1);
1187
int cnt = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap,
1190
if (type != QCOW2_SUBCLUSTER_NORMAL || first_sc + cnt <= last_sc) {
1195
type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, 0);
1197
if (type == QCOW2_SUBCLUSTER_INVALID) {
1198
int l1_index = offset_to_l1_index(s, guest_offset);
1199
uint64_t l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
1200
qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster "
1201
"entry found (L2 offset: %#" PRIx64
1203
l2_offset, l2_index + i);
1213
l2_entry = get_l2_entry(s, l2_slice, l2_index);
1214
l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index);
1215
sc_index = offset_to_sc_index(s, guest_offset);
1216
type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index);
1220
case QCOW2_SUBCLUSTER_COMPRESSED:
1223
case QCOW2_SUBCLUSTER_NORMAL:
1224
case QCOW2_SUBCLUSTER_ZERO_ALLOC:
1225
case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
1226
if (has_subclusters(s)) {
1228
uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC;
1230
MIN(sc_index, ctz32(alloc_bitmap)) << s->subcluster_bits;
1235
case QCOW2_SUBCLUSTER_ZERO_PLAIN:
1236
case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
1237
cow_start_from = sc_index << s->subcluster_bits;
1240
g_assert_not_reached();
1244
case QCOW2_SUBCLUSTER_NORMAL:
1245
cow_start_from = cow_start_to;
1247
case QCOW2_SUBCLUSTER_ZERO_ALLOC:
1248
case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
1249
cow_start_from = sc_index << s->subcluster_bits;
1252
g_assert_not_reached();
1257
l2_index += nb_clusters - 1;
1258
l2_entry = get_l2_entry(s, l2_slice, l2_index);
1259
l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index);
1260
sc_index = offset_to_sc_index(s, guest_offset + bytes - 1);
1261
type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index);
1265
case QCOW2_SUBCLUSTER_COMPRESSED:
1266
cow_end_to = ROUND_UP(cow_end_from, s->cluster_size);
1268
case QCOW2_SUBCLUSTER_NORMAL:
1269
case QCOW2_SUBCLUSTER_ZERO_ALLOC:
1270
case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
1271
cow_end_to = ROUND_UP(cow_end_from, s->cluster_size);
1272
if (has_subclusters(s)) {
1274
uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC;
1276
MIN(s->subclusters_per_cluster - sc_index - 1,
1277
clz32(alloc_bitmap)) << s->subcluster_bits;
1280
case QCOW2_SUBCLUSTER_ZERO_PLAIN:
1281
case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
1282
cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size);
1285
g_assert_not_reached();
1289
case QCOW2_SUBCLUSTER_NORMAL:
1290
cow_end_to = cow_end_from;
1292
case QCOW2_SUBCLUSTER_ZERO_ALLOC:
1293
case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
1294
cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size);
1297
g_assert_not_reached();
1301
*m = g_malloc0(sizeof(**m));
1302
**m = (QCowL2Meta) {
1305
.alloc_offset = host_cluster_offset,
1306
.offset = start_of_cluster(s, guest_offset),
1307
.nb_clusters = nb_clusters,
1309
.keep_old_clusters = keep_old,
1312
.offset = cow_start_from,
1313
.nb_bytes = cow_start_to - cow_start_from,
1316
.offset = cow_end_from,
1317
.nb_bytes = cow_end_to - cow_end_from,
1321
qemu_co_queue_init(&(*m)->dependent_requests);
1322
QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
1332
static bool GRAPH_RDLOCK
1333
cluster_needs_new_alloc(BlockDriverState *bs, uint64_t l2_entry)
1335
switch (qcow2_get_cluster_type(bs, l2_entry)) {
1336
case QCOW2_CLUSTER_NORMAL:
1337
case QCOW2_CLUSTER_ZERO_ALLOC:
1338
if (l2_entry & QCOW_OFLAG_COPIED) {
1342
case QCOW2_CLUSTER_UNALLOCATED:
1343
case QCOW2_CLUSTER_COMPRESSED:
1344
case QCOW2_CLUSTER_ZERO_PLAIN:
1364
static int GRAPH_RDLOCK
1365
count_single_write_clusters(BlockDriverState *bs, int nb_clusters,
1366
uint64_t *l2_slice, int l2_index, bool new_alloc)
1368
BDRVQcow2State *s = bs->opaque;
1369
uint64_t l2_entry = get_l2_entry(s, l2_slice, l2_index);
1370
uint64_t expected_offset = l2_entry & L2E_OFFSET_MASK;
1373
for (i = 0; i < nb_clusters; i++) {
1374
l2_entry = get_l2_entry(s, l2_slice, l2_index + i);
1375
if (cluster_needs_new_alloc(bs, l2_entry) != new_alloc) {
1379
if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) {
1382
expected_offset += s->cluster_size;
1386
assert(i <= nb_clusters);
1404
static int coroutine_fn handle_dependencies(BlockDriverState *bs,
1405
uint64_t guest_offset,
1406
uint64_t *cur_bytes, QCowL2Meta **m)
1408
BDRVQcow2State *s = bs->opaque;
1409
QCowL2Meta *old_alloc;
1410
uint64_t bytes = *cur_bytes;
1412
QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
1414
uint64_t start = guest_offset;
1415
uint64_t end = start + bytes;
1416
uint64_t old_start = start_of_cluster(s, l2meta_cow_start(old_alloc));
1417
uint64_t old_end = ROUND_UP(l2meta_cow_end(old_alloc), s->cluster_size);
1419
if (end <= old_start || start >= old_end) {
1424
if (old_alloc->keep_old_clusters &&
1425
(end <= l2meta_cow_start(old_alloc) ||
1426
start >= l2meta_cow_end(old_alloc)))
1437
if (start < old_start) {
1439
bytes = old_start - start;
1450
if (bytes == 0 && *m) {
1460
qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
1494
static int coroutine_fn GRAPH_RDLOCK
1495
handle_copied(BlockDriverState *bs, uint64_t guest_offset,
1496
uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
1498
BDRVQcow2State *s = bs->opaque;
1500
uint64_t l2_entry, cluster_offset;
1502
uint64_t nb_clusters;
1503
unsigned int keep_clusters;
1506
trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset,
1509
assert(*host_offset == INV_OFFSET || offset_into_cluster(s, guest_offset)
1510
== offset_into_cluster(s, *host_offset));
1517
size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
1519
l2_index = offset_to_l2_slice_index(s, guest_offset);
1520
nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
1522
nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits);
1525
ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index);
1530
l2_entry = get_l2_entry(s, l2_slice, l2_index);
1531
cluster_offset = l2_entry & L2E_OFFSET_MASK;
1533
if (!cluster_needs_new_alloc(bs, l2_entry)) {
1534
if (offset_into_cluster(s, cluster_offset)) {
1535
qcow2_signal_corruption(bs, true, -1, -1, "%s cluster offset "
1536
"%#" PRIx64 " unaligned (guest offset: %#"
1537
PRIx64 ")", l2_entry & QCOW_OFLAG_ZERO ?
1538
"Preallocated zero" : "Data",
1539
cluster_offset, guest_offset);
1545
if (*host_offset != INV_OFFSET && cluster_offset != *host_offset) {
1552
keep_clusters = count_single_write_clusters(bs, nb_clusters, l2_slice,
1554
assert(keep_clusters <= nb_clusters);
1556
*bytes = MIN(*bytes,
1557
keep_clusters * s->cluster_size
1558
- offset_into_cluster(s, guest_offset));
1559
assert(*bytes != 0);
1561
ret = calculate_l2_meta(bs, cluster_offset, guest_offset,
1562
*bytes, l2_slice, m, true);
1574
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
1579
*host_offset = cluster_offset + offset_into_cluster(s, guest_offset);
1604
static int coroutine_fn GRAPH_RDLOCK
1605
do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
1606
uint64_t *host_offset, uint64_t *nb_clusters)
1608
BDRVQcow2State *s = bs->opaque;
1610
trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
1611
*host_offset, *nb_clusters);
1613
if (has_data_file(bs)) {
1614
assert(*host_offset == INV_OFFSET ||
1615
*host_offset == start_of_cluster(s, guest_offset));
1616
*host_offset = start_of_cluster(s, guest_offset);
1621
trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
1622
if (*host_offset == INV_OFFSET) {
1623
int64_t cluster_offset =
1624
qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
1625
if (cluster_offset < 0) {
1626
return cluster_offset;
1628
*host_offset = cluster_offset;
1631
int64_t ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
1661
static int coroutine_fn GRAPH_RDLOCK
1662
handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
1663
uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
1665
BDRVQcow2State *s = bs->opaque;
1668
uint64_t nb_clusters;
1671
uint64_t alloc_cluster_offset;
1673
trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
1682
size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
1684
l2_index = offset_to_l2_slice_index(s, guest_offset);
1685
nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
1687
nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits);
1690
ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index);
1695
nb_clusters = count_single_write_clusters(bs, nb_clusters,
1696
l2_slice, l2_index, true);
1701
assert(nb_clusters > 0);
1704
alloc_cluster_offset = *host_offset == INV_OFFSET ? INV_OFFSET :
1705
start_of_cluster(s, *host_offset);
1706
ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
1713
if (nb_clusters == 0) {
1719
assert(alloc_cluster_offset != INV_OFFSET);
1735
uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset);
1736
int avail_bytes = nb_clusters << s->cluster_bits;
1737
int nb_bytes = MIN(requested_bytes, avail_bytes);
1739
*host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
1740
*bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset));
1741
assert(*bytes != 0);
1743
ret = calculate_l2_meta(bs, alloc_cluster_offset, guest_offset, *bytes,
1744
l2_slice, m, false);
1752
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
1783
int coroutine_fn qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset,
1784
unsigned int *bytes,
1785
uint64_t *host_offset,
1788
BDRVQcow2State *s = bs->opaque;
1789
uint64_t start, remaining;
1790
uint64_t cluster_offset;
1794
trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *bytes);
1799
cluster_offset = INV_OFFSET;
1800
*host_offset = INV_OFFSET;
1806
if (*host_offset == INV_OFFSET && cluster_offset != INV_OFFSET) {
1807
*host_offset = cluster_offset;
1810
assert(remaining >= cur_bytes);
1813
remaining -= cur_bytes;
1815
if (cluster_offset != INV_OFFSET) {
1816
cluster_offset += cur_bytes;
1819
if (remaining == 0) {
1823
cur_bytes = remaining;
1843
ret = handle_dependencies(bs, start, &cur_bytes, m);
1844
if (ret == -EAGAIN) {
1850
} else if (ret < 0) {
1852
} else if (cur_bytes == 0) {
1863
ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m);
1868
} else if (cur_bytes == 0) {
1876
ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m);
1882
assert(cur_bytes == 0);
1887
*bytes -= remaining;
1889
assert(*host_offset != INV_OFFSET);
1890
assert(offset_into_cluster(s, *host_offset) ==
1891
offset_into_cluster(s, offset));
1901
static int GRAPH_RDLOCK
1902
discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, uint64_t nb_clusters,
1903
enum qcow2_discard_type type, bool full_discard)
1905
BDRVQcow2State *s = bs->opaque;
1911
ret = get_cluster_table(bs, offset, &l2_slice, &l2_index);
1917
nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
1918
assert(nb_clusters <= INT_MAX);
1920
for (i = 0; i < nb_clusters; i++) {
1921
uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i);
1922
uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i);
1923
uint64_t new_l2_entry = old_l2_entry;
1924
uint64_t new_l2_bitmap = old_l2_bitmap;
1925
QCow2ClusterType cluster_type =
1926
qcow2_get_cluster_type(bs, old_l2_entry);
1927
bool keep_reference = (cluster_type != QCOW2_CLUSTER_COMPRESSED) &&
1929
(s->discard_no_unref &&
1930
type == QCOW2_DISCARD_REQUEST);
1946
new_l2_entry = new_l2_bitmap = 0;
1947
} else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) {
1948
if (has_subclusters(s)) {
1949
if (keep_reference) {
1950
new_l2_entry = old_l2_entry;
1954
new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
1956
if (s->qcow_version >= 3) {
1957
if (keep_reference) {
1958
new_l2_entry |= QCOW_OFLAG_ZERO;
1960
new_l2_entry = QCOW_OFLAG_ZERO;
1968
if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) {
1973
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
1974
set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry);
1975
if (has_subclusters(s)) {
1976
set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
1978
if (!keep_reference) {
1980
qcow2_free_any_cluster(bs, old_l2_entry, type);
1981
} else if (s->discard_passthrough[type] &&
1982
(cluster_type == QCOW2_CLUSTER_NORMAL ||
1983
cluster_type == QCOW2_CLUSTER_ZERO_ALLOC)) {
1985
bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
1990
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
1995
int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
1996
uint64_t bytes, enum qcow2_discard_type type,
1999
BDRVQcow2State *s = bs->opaque;
2000
uint64_t end_offset = offset + bytes;
2001
uint64_t nb_clusters;
2006
assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
2007
assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
2008
end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
2010
nb_clusters = size_to_clusters(s, bytes);
2012
s->cache_discards = true;
2015
while (nb_clusters > 0) {
2016
cleared = discard_in_l2_slice(bs, offset, nb_clusters, type,
2023
nb_clusters -= cleared;
2024
offset += (cleared * s->cluster_size);
2029
s->cache_discards = false;
2030
qcow2_process_discards(bs, ret);
2040
static int coroutine_fn GRAPH_RDLOCK
2041
zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
2042
uint64_t nb_clusters, int flags)
2044
BDRVQcow2State *s = bs->opaque;
2050
ret = get_cluster_table(bs, offset, &l2_slice, &l2_index);
2056
nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
2057
assert(nb_clusters <= INT_MAX);
2059
for (i = 0; i < nb_clusters; i++) {
2060
uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i);
2061
uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i);
2062
QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry);
2063
bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) ||
2064
((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type));
2065
bool keep_reference =
2066
(s->discard_no_unref && type != QCOW2_CLUSTER_COMPRESSED);
2067
uint64_t new_l2_entry = old_l2_entry;
2068
uint64_t new_l2_bitmap = old_l2_bitmap;
2070
if (unmap && !keep_reference) {
2074
if (has_subclusters(s)) {
2075
new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
2077
new_l2_entry |= QCOW_OFLAG_ZERO;
2080
if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) {
2085
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
2086
set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry);
2087
if (has_subclusters(s)) {
2088
set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
2092
if (!keep_reference) {
2094
qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST);
2095
} else if (s->discard_passthrough[QCOW2_DISCARD_REQUEST] &&
2096
(type == QCOW2_CLUSTER_NORMAL ||
2097
type == QCOW2_CLUSTER_ZERO_ALLOC)) {
2099
bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
2105
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
2110
static int coroutine_fn GRAPH_RDLOCK
2111
zero_l2_subclusters(BlockDriverState *bs, uint64_t offset,
2112
unsigned nb_subclusters)
2114
BDRVQcow2State *s = bs->opaque;
2116
uint64_t old_l2_bitmap, l2_bitmap;
2117
int l2_index, ret, sc = offset_to_sc_index(s, offset);
2120
assert(nb_subclusters > 0 && nb_subclusters < s->subclusters_per_cluster);
2121
assert(sc + nb_subclusters <= s->subclusters_per_cluster);
2122
assert(offset_into_subcluster(s, offset) == 0);
2124
ret = get_cluster_table(bs, offset, &l2_slice, &l2_index);
2129
switch (qcow2_get_cluster_type(bs, get_l2_entry(s, l2_slice, l2_index))) {
2130
case QCOW2_CLUSTER_COMPRESSED:
2133
case QCOW2_CLUSTER_NORMAL:
2134
case QCOW2_CLUSTER_UNALLOCATED:
2137
g_assert_not_reached();
2140
old_l2_bitmap = l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index);
2142
l2_bitmap |= QCOW_OFLAG_SUB_ZERO_RANGE(sc, sc + nb_subclusters);
2143
l2_bitmap &= ~QCOW_OFLAG_SUB_ALLOC_RANGE(sc, sc + nb_subclusters);
2145
if (old_l2_bitmap != l2_bitmap) {
2146
set_l2_bitmap(s, l2_slice, l2_index, l2_bitmap);
2147
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
2152
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
2157
int coroutine_fn qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset,
2158
uint64_t bytes, int flags)
2160
BDRVQcow2State *s = bs->opaque;
2161
uint64_t end_offset = offset + bytes;
2162
uint64_t nb_clusters;
2163
unsigned head, tail;
2169
if (data_file_is_raw(bs)) {
2170
assert(has_data_file(bs));
2171
ret = bdrv_co_pwrite_zeroes(s->data_file, offset, bytes, flags);
2178
assert(offset_into_subcluster(s, offset) == 0);
2179
assert(offset_into_subcluster(s, end_offset) == 0 ||
2180
end_offset >= bs->total_sectors << BDRV_SECTOR_BITS);
2186
if (s->qcow_version < 3) {
2188
return qcow2_cluster_discard(bs, offset, bytes,
2189
QCOW2_DISCARD_REQUEST, false);
2194
head = MIN(end_offset, ROUND_UP(offset, s->cluster_size)) - offset;
2197
tail = (end_offset >= bs->total_sectors << BDRV_SECTOR_BITS) ? 0 :
2198
end_offset - MAX(offset, start_of_cluster(s, end_offset));
2201
s->cache_discards = true;
2204
ret = zero_l2_subclusters(bs, offset - head,
2205
size_to_subclusters(s, head));
2212
nb_clusters = size_to_clusters(s, end_offset - offset);
2214
while (nb_clusters > 0) {
2215
cleared = zero_in_l2_slice(bs, offset, nb_clusters, flags);
2221
nb_clusters -= cleared;
2222
offset += (cleared * s->cluster_size);
2226
ret = zero_l2_subclusters(bs, end_offset, size_to_subclusters(s, tail));
2234
s->cache_discards = false;
2235
qcow2_process_discards(bs, ret);
2248
static int GRAPH_RDLOCK
2249
expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
2250
int l1_size, int64_t *visited_l1_entries,
2252
BlockDriverAmendStatusCB *status_cb,
2255
BDRVQcow2State *s = bs->opaque;
2256
bool is_active_l1 = (l1_table == s->l1_table);
2257
uint64_t *l2_slice = NULL;
2258
unsigned slice, slice_size2, n_slices;
2263
assert(!has_subclusters(s));
2265
slice_size2 = s->l2_slice_size * l2_entry_size(s);
2266
n_slices = s->cluster_size / slice_size2;
2268
if (!is_active_l1) {
2271
l2_slice = qemu_try_blockalign(bs->file->bs, slice_size2);
2272
if (l2_slice == NULL) {
2277
for (i = 0; i < l1_size; i++) {
2278
uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
2279
uint64_t l2_refcount;
2283
(*visited_l1_entries)++;
2285
status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque);
2290
if (offset_into_cluster(s, l2_offset)) {
2291
qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#"
2292
PRIx64 " unaligned (L1 index: %#x)",
2298
ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
2304
for (slice = 0; slice < n_slices; slice++) {
2305
uint64_t slice_offset = l2_offset + slice * slice_size2;
2306
bool l2_dirty = false;
2309
ret = qcow2_cache_get(bs, s->l2_table_cache, slice_offset,
2310
(void **)&l2_slice);
2313
ret = bdrv_pread(bs->file, slice_offset, slice_size2,
2320
for (j = 0; j < s->l2_slice_size; j++) {
2321
uint64_t l2_entry = get_l2_entry(s, l2_slice, j);
2322
int64_t offset = l2_entry & L2E_OFFSET_MASK;
2323
QCow2ClusterType cluster_type =
2324
qcow2_get_cluster_type(bs, l2_entry);
2326
if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN &&
2327
cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) {
2331
if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
2338
set_l2_entry(s, l2_slice, j, 0);
2343
offset = qcow2_alloc_clusters(bs, s->cluster_size);
2350
assert((offset & L2E_OFFSET_MASK) == offset);
2352
if (l2_refcount > 1) {
2355
ret = qcow2_update_cluster_refcount(
2356
bs, offset >> s->cluster_bits,
2357
refcount_diff(1, l2_refcount), false,
2358
QCOW2_DISCARD_OTHER);
2360
qcow2_free_clusters(bs, offset, s->cluster_size,
2361
QCOW2_DISCARD_OTHER);
2367
if (offset_into_cluster(s, offset)) {
2368
int l2_index = slice * s->l2_slice_size + j;
2369
qcow2_signal_corruption(
2371
"Cluster allocation offset "
2372
"%#" PRIx64 " unaligned (L2 offset: %#"
2373
PRIx64 ", L2 index: %#x)", offset,
2374
l2_offset, l2_index);
2375
if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
2376
qcow2_free_clusters(bs, offset, s->cluster_size,
2377
QCOW2_DISCARD_ALWAYS);
2383
ret = qcow2_pre_write_overlap_check(bs, 0, offset,
2384
s->cluster_size, true);
2386
if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
2387
qcow2_free_clusters(bs, offset, s->cluster_size,
2388
QCOW2_DISCARD_ALWAYS);
2393
ret = bdrv_pwrite_zeroes(s->data_file, offset,
2394
s->cluster_size, 0);
2396
if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
2397
qcow2_free_clusters(bs, offset, s->cluster_size,
2398
QCOW2_DISCARD_ALWAYS);
2403
if (l2_refcount == 1) {
2404
set_l2_entry(s, l2_slice, j, offset | QCOW_OFLAG_COPIED);
2406
set_l2_entry(s, l2_slice, j, offset);
2417
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
2418
qcow2_cache_depends_on_flush(s->l2_table_cache);
2420
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
2423
ret = qcow2_pre_write_overlap_check(
2424
bs, QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2,
2425
slice_offset, slice_size2, false);
2430
ret = bdrv_pwrite(bs->file, slice_offset, slice_size2,
2439
(*visited_l1_entries)++;
2441
status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque);
2449
if (!is_active_l1) {
2450
qemu_vfree(l2_slice);
2452
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
2464
int qcow2_expand_zero_clusters(BlockDriverState *bs,
2465
BlockDriverAmendStatusCB *status_cb,
2468
BDRVQcow2State *s = bs->opaque;
2469
uint64_t *l1_table = NULL;
2470
int64_t l1_entries = 0, visited_l1_entries = 0;
2475
l1_entries = s->l1_size;
2476
for (i = 0; i < s->nb_snapshots; i++) {
2477
l1_entries += s->snapshots[i].l1_size;
2481
ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
2482
&visited_l1_entries, l1_entries,
2483
status_cb, cb_opaque);
2495
ret = qcow2_cache_empty(bs, s->l2_table_cache);
2500
for (i = 0; i < s->nb_snapshots; i++) {
2502
uint64_t *new_l1_table;
2503
Error *local_err = NULL;
2505
ret = qcow2_validate_table(bs, s->snapshots[i].l1_table_offset,
2506
s->snapshots[i].l1_size, L1E_SIZE,
2507
QCOW_MAX_L1_SIZE, "Snapshot L1 table",
2510
error_report_err(local_err);
2514
l1_size2 = s->snapshots[i].l1_size * L1E_SIZE;
2515
new_l1_table = g_try_realloc(l1_table, l1_size2);
2517
if (!new_l1_table) {
2522
l1_table = new_l1_table;
2524
ret = bdrv_pread(bs->file, s->snapshots[i].l1_table_offset, l1_size2,
2530
for (j = 0; j < s->snapshots[i].l1_size; j++) {
2531
be64_to_cpus(&l1_table[j]);
2534
ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size,
2535
&visited_l1_entries, l1_entries,
2536
status_cb, cb_opaque);
2549
void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry,
2550
uint64_t *coffset, int *csize)
2552
BDRVQcow2State *s = bs->opaque;
2555
assert(qcow2_get_cluster_type(bs, l2_entry) == QCOW2_CLUSTER_COMPRESSED);
2557
*coffset = l2_entry & s->cluster_offset_mask;
2559
nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1;
2560
*csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
2561
(*coffset & (QCOW2_COMPRESSED_SECTOR_SIZE - 1));