|
|
1.1 root 1: /*
2: * Block driver for the QCOW version 2 format
3: *
4: * Copyright (c) 2004-2006 Fabrice Bellard
5: *
6: * Permission is hereby granted, free of charge, to any person obtaining a copy
7: * of this software and associated documentation files (the "Software"), to deal
8: * in the Software without restriction, including without limitation the rights
9: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10: * copies of the Software, and to permit persons to whom the Software is
11: * furnished to do so, subject to the following conditions:
12: *
13: * The above copyright notice and this permission notice shall be included in
14: * all copies or substantial portions of the Software.
15: *
16: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19: * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22: * THE SOFTWARE.
23: */
24:
25: #include "qemu-common.h"
26: #include "block_int.h"
27: #include "block/qcow2.h"
28:
29: static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
1.1.1.4 ! root 30: static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
1.1 root 31: int64_t offset, int64_t length,
32: int addend);
33:
34:
35: static int cache_refcount_updates = 0;
36:
37: static int write_refcount_block(BDRVQcowState *s)
38: {
39: size_t size = s->cluster_size;
40:
41: if (s->refcount_block_cache_offset == 0) {
42: return 0;
43: }
44:
45: if (bdrv_pwrite(s->hd, s->refcount_block_cache_offset,
46: s->refcount_block_cache, size) != size)
47: {
48: return -EIO;
49: }
50:
51: return 0;
52: }
53:
54: /*********************************************************/
55: /* refcount handling */
56:
57: int qcow2_refcount_init(BlockDriverState *bs)
58: {
59: BDRVQcowState *s = bs->opaque;
60: int ret, refcount_table_size2, i;
61:
62: s->refcount_block_cache = qemu_malloc(s->cluster_size);
63: refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
64: s->refcount_table = qemu_malloc(refcount_table_size2);
65: if (s->refcount_table_size > 0) {
66: ret = bdrv_pread(s->hd, s->refcount_table_offset,
67: s->refcount_table, refcount_table_size2);
68: if (ret != refcount_table_size2)
69: goto fail;
70: for(i = 0; i < s->refcount_table_size; i++)
71: be64_to_cpus(&s->refcount_table[i]);
72: }
73: return 0;
74: fail:
75: return -ENOMEM;
76: }
77:
78: void qcow2_refcount_close(BlockDriverState *bs)
79: {
80: BDRVQcowState *s = bs->opaque;
81: qemu_free(s->refcount_block_cache);
82: qemu_free(s->refcount_table);
83: }
84:
85:
86: static int load_refcount_block(BlockDriverState *bs,
87: int64_t refcount_block_offset)
88: {
89: BDRVQcowState *s = bs->opaque;
90: int ret;
91:
92: if (cache_refcount_updates) {
93: write_refcount_block(s);
94: }
95:
96: ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
97: s->cluster_size);
98: if (ret != s->cluster_size)
99: return -EIO;
100: s->refcount_block_cache_offset = refcount_block_offset;
101: return 0;
102: }
103:
104: static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
105: {
106: BDRVQcowState *s = bs->opaque;
107: int refcount_table_index, block_index;
108: int64_t refcount_block_offset;
109:
110: refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
111: if (refcount_table_index >= s->refcount_table_size)
112: return 0;
113: refcount_block_offset = s->refcount_table[refcount_table_index];
114: if (!refcount_block_offset)
115: return 0;
116: if (refcount_block_offset != s->refcount_block_cache_offset) {
117: /* better than nothing: return allocated if read error */
118: if (load_refcount_block(bs, refcount_block_offset) < 0)
119: return 1;
120: }
121: block_index = cluster_index &
122: ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
123: return be16_to_cpu(s->refcount_block_cache[block_index]);
124: }
125:
1.1.1.4 ! root 126: /*
! 127: * Rounds the refcount table size up to avoid growing the table for each single
! 128: * refcount block that is allocated.
! 129: */
! 130: static unsigned int next_refcount_table_size(BDRVQcowState *s,
! 131: unsigned int min_size)
! 132: {
! 133: unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
! 134: unsigned int refcount_table_clusters =
! 135: MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
! 136:
! 137: while (min_clusters > refcount_table_clusters) {
! 138: refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
! 139: }
! 140:
! 141: return refcount_table_clusters << (s->cluster_bits - 3);
! 142: }
! 143:
! 144:
! 145: /* Checks if two offsets are described by the same refcount block */
! 146: static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
! 147: uint64_t offset_b)
! 148: {
! 149: uint64_t block_a = offset_a >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
! 150: uint64_t block_b = offset_b >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
! 151:
! 152: return (block_a == block_b);
! 153: }
! 154:
! 155: /*
! 156: * Loads a refcount block. If it doesn't exist yet, it is allocated first
! 157: * (including growing the refcount table if needed).
! 158: *
! 159: * Returns the offset of the refcount block on success or -errno in error case
! 160: */
! 161: static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
1.1 root 162: {
163: BDRVQcowState *s = bs->opaque;
1.1.1.4 ! root 164: unsigned int refcount_table_index;
! 165: int ret;
1.1 root 166:
1.1.1.4 ! root 167: /* Find the refcount block for the given cluster */
! 168: refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
! 169:
! 170: if (refcount_table_index < s->refcount_table_size) {
! 171:
! 172: uint64_t refcount_block_offset =
! 173: s->refcount_table[refcount_table_index];
! 174:
! 175: /* If it's already there, we're done */
! 176: if (refcount_block_offset) {
! 177: if (refcount_block_offset != s->refcount_block_cache_offset) {
! 178: ret = load_refcount_block(bs, refcount_block_offset);
! 179: if (ret < 0) {
! 180: return ret;
! 181: }
! 182: }
! 183: return refcount_block_offset;
! 184: }
! 185: }
! 186:
! 187: /*
! 188: * If we came here, we need to allocate something. Something is at least
! 189: * a cluster for the new refcount block. It may also include a new refcount
! 190: * table if the old refcount table is too small.
! 191: *
! 192: * Note that allocating clusters here needs some special care:
! 193: *
! 194: * - We can't use the normal qcow2_alloc_clusters(), it would try to
! 195: * increase the refcount and very likely we would end up with an endless
! 196: * recursion. Instead we must place the refcount blocks in a way that
! 197: * they can describe them themselves.
! 198: *
! 199: * - We need to consider that at this point we are inside update_refcounts
! 200: * and doing the initial refcount increase. This means that some clusters
! 201: * have already been allocated by the caller, but their refcount isn't
! 202: * accurate yet. free_cluster_index tells us where this allocation ends
! 203: * as long as we don't overwrite it by freeing clusters.
! 204: *
! 205: * - alloc_clusters_noref and qcow2_free_clusters may load a different
! 206: * refcount block into the cache
! 207: */
! 208:
! 209: if (cache_refcount_updates) {
! 210: ret = write_refcount_block(s);
! 211: if (ret < 0) {
! 212: return ret;
! 213: }
! 214: }
! 215:
! 216: /* Allocate the refcount block itself and mark it as used */
! 217: uint64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
! 218: memset(s->refcount_block_cache, 0, s->cluster_size);
! 219: s->refcount_block_cache_offset = new_block;
! 220:
! 221: #ifdef DEBUG_ALLOC2
! 222: fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
! 223: " at %" PRIx64 "\n",
! 224: refcount_table_index, cluster_index << s->cluster_bits, new_block);
! 225: #endif
! 226:
! 227: if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
! 228: /* The block describes itself, need to update the cache */
! 229: int block_index = (new_block >> s->cluster_bits) &
! 230: ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
! 231: s->refcount_block_cache[block_index] = cpu_to_be16(1);
! 232: } else {
! 233: /* Described somewhere else. This can recurse at most twice before we
! 234: * arrive at a block that describes itself. */
! 235: ret = update_refcount(bs, new_block, s->cluster_size, 1);
! 236: if (ret < 0) {
! 237: goto fail_block;
! 238: }
! 239: }
! 240:
! 241: /* Now the new refcount block needs to be written to disk */
! 242: ret = bdrv_pwrite(s->hd, new_block, s->refcount_block_cache,
! 243: s->cluster_size);
! 244: if (ret < 0) {
! 245: goto fail_block;
! 246: }
! 247:
! 248: /* If the refcount table is big enough, just hook the block up there */
! 249: if (refcount_table_index < s->refcount_table_size) {
! 250: uint64_t data64 = cpu_to_be64(new_block);
! 251: ret = bdrv_pwrite(s->hd,
! 252: s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
! 253: &data64, sizeof(data64));
! 254: if (ret < 0) {
! 255: goto fail_block;
1.1 root 256: }
1.1.1.4 ! root 257:
! 258: s->refcount_table[refcount_table_index] = new_block;
! 259: return new_block;
1.1 root 260: }
1.1.1.4 ! root 261:
! 262: /*
! 263: * If we come here, we need to grow the refcount table. Again, a new
! 264: * refcount table needs some space and we can't simply allocate to avoid
! 265: * endless recursion.
! 266: *
! 267: * Therefore let's grab new refcount blocks at the end of the image, which
! 268: * will describe themselves and the new refcount table. This way we can
! 269: * reference them only in the new table and do the switch to the new
! 270: * refcount table at once without producing an inconsistent state in
! 271: * between.
! 272: */
! 273: /* Calculate the number of refcount blocks needed so far */
! 274: uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
! 275: uint64_t blocks_used = (s->free_cluster_index +
! 276: refcount_block_clusters - 1) / refcount_block_clusters;
! 277:
! 278: /* And now we need at least one block more for the new metadata */
! 279: uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
! 280: uint64_t last_table_size;
! 281: uint64_t blocks_clusters;
! 282: do {
! 283: uint64_t table_clusters = size_to_clusters(s, table_size);
! 284: blocks_clusters = 1 +
! 285: ((table_clusters + refcount_block_clusters - 1)
! 286: / refcount_block_clusters);
! 287: uint64_t meta_clusters = table_clusters + blocks_clusters;
! 288:
! 289: last_table_size = table_size;
! 290: table_size = next_refcount_table_size(s, blocks_used +
! 291: ((meta_clusters + refcount_block_clusters - 1)
! 292: / refcount_block_clusters));
! 293:
! 294: } while (last_table_size != table_size);
! 295:
1.1 root 296: #ifdef DEBUG_ALLOC2
1.1.1.4 ! root 297: fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
! 298: s->refcount_table_size, table_size);
1.1 root 299: #endif
1.1.1.4 ! root 300:
! 301: /* Create the new refcount table and blocks */
! 302: uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
! 303: s->cluster_size;
! 304: uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
! 305: uint16_t *new_blocks = qemu_mallocz(blocks_clusters * s->cluster_size);
! 306: uint64_t *new_table = qemu_mallocz(table_size * sizeof(uint64_t));
! 307:
! 308: assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
! 309:
! 310: /* Fill the new refcount table */
1.1 root 311: memcpy(new_table, s->refcount_table,
1.1.1.4 ! root 312: s->refcount_table_size * sizeof(uint64_t));
! 313: new_table[refcount_table_index] = new_block;
! 314:
! 315: int i;
! 316: for (i = 0; i < blocks_clusters; i++) {
! 317: new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
! 318: }
! 319:
! 320: /* Fill the refcount blocks */
! 321: uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
! 322: int block = 0;
! 323: for (i = 0; i < table_clusters + blocks_clusters; i++) {
! 324: new_blocks[block++] = cpu_to_be16(1);
! 325: }
! 326:
! 327: /* Write refcount blocks to disk */
! 328: ret = bdrv_pwrite(s->hd, meta_offset, new_blocks,
! 329: blocks_clusters * s->cluster_size);
! 330: qemu_free(new_blocks);
! 331: if (ret < 0) {
! 332: goto fail_table;
! 333: }
! 334:
! 335: /* Write refcount table to disk */
! 336: for(i = 0; i < table_size; i++) {
1.1 root 337: cpu_to_be64s(&new_table[i]);
1.1.1.4 ! root 338: }
! 339:
! 340: ret = bdrv_pwrite(s->hd, table_offset, new_table,
! 341: table_size * sizeof(uint64_t));
! 342: if (ret < 0) {
! 343: goto fail_table;
! 344: }
! 345:
! 346: for(i = 0; i < table_size; i++) {
! 347: cpu_to_be64s(&new_table[i]);
! 348: }
1.1 root 349:
1.1.1.4 ! root 350: /* Hook up the new refcount table in the qcow2 header */
! 351: uint8_t data[12];
1.1 root 352: cpu_to_be64w((uint64_t*)data, table_offset);
1.1.1.4 ! root 353: cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
1.1.1.3 root 354: ret = bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
1.1.1.4 ! root 355: data, sizeof(data));
! 356: if (ret < 0) {
! 357: goto fail_table;
1.1.1.3 root 358: }
359:
1.1.1.4 ! root 360: /* And switch it in memory */
! 361: uint64_t old_table_offset = s->refcount_table_offset;
! 362: uint64_t old_table_size = s->refcount_table_size;
! 363:
1.1 root 364: qemu_free(s->refcount_table);
365: s->refcount_table = new_table;
1.1.1.4 ! root 366: s->refcount_table_size = table_size;
1.1 root 367: s->refcount_table_offset = table_offset;
368:
1.1.1.4 ! root 369: /* Free old table. Remember, we must not change free_cluster_index */
! 370: uint64_t old_free_cluster_index = s->free_cluster_index;
1.1 root 371: qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
1.1.1.4 ! root 372: s->free_cluster_index = old_free_cluster_index;
1.1 root 373:
1.1.1.4 ! root 374: ret = load_refcount_block(bs, new_block);
! 375: if (ret < 0) {
! 376: goto fail_block;
1.1 root 377: }
378:
1.1.1.4 ! root 379: return new_block;
1.1 root 380:
1.1.1.4 ! root 381: fail_table:
! 382: qemu_free(new_table);
! 383: fail_block:
! 384: s->refcount_block_cache_offset = 0;
! 385: return ret;
1.1 root 386: }
387:
388: #define REFCOUNTS_PER_SECTOR (512 >> REFCOUNT_SHIFT)
389: static int write_refcount_block_entries(BDRVQcowState *s,
390: int64_t refcount_block_offset, int first_index, int last_index)
391: {
392: size_t size;
393:
394: if (cache_refcount_updates) {
395: return 0;
396: }
397:
398: first_index &= ~(REFCOUNTS_PER_SECTOR - 1);
399: last_index = (last_index + REFCOUNTS_PER_SECTOR)
400: & ~(REFCOUNTS_PER_SECTOR - 1);
401:
402: size = (last_index - first_index) << REFCOUNT_SHIFT;
403: if (bdrv_pwrite(s->hd,
404: refcount_block_offset + (first_index << REFCOUNT_SHIFT),
405: &s->refcount_block_cache[first_index], size) != size)
406: {
407: return -EIO;
408: }
409:
410: return 0;
411: }
412:
413: /* XXX: cache several refcount block clusters ? */
1.1.1.3 root 414: static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
415: int64_t offset, int64_t length, int addend)
1.1 root 416: {
417: BDRVQcowState *s = bs->opaque;
418: int64_t start, last, cluster_offset;
419: int64_t refcount_block_offset = 0;
420: int64_t table_index = -1, old_table_index;
421: int first_index = -1, last_index = -1;
1.1.1.3 root 422: int ret;
1.1 root 423:
424: #ifdef DEBUG_ALLOC2
425: printf("update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
426: offset, length, addend);
427: #endif
1.1.1.3 root 428: if (length < 0) {
1.1 root 429: return -EINVAL;
1.1.1.3 root 430: } else if (length == 0) {
431: return 0;
432: }
433:
1.1 root 434: start = offset & ~(s->cluster_size - 1);
435: last = (offset + length - 1) & ~(s->cluster_size - 1);
436: for(cluster_offset = start; cluster_offset <= last;
437: cluster_offset += s->cluster_size)
438: {
439: int block_index, refcount;
440: int64_t cluster_index = cluster_offset >> s->cluster_bits;
1.1.1.3 root 441: int64_t new_block;
1.1 root 442:
443: /* Only write refcount block to disk when we are done with it */
444: old_table_index = table_index;
445: table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
446: if ((old_table_index >= 0) && (table_index != old_table_index)) {
447:
448: if (write_refcount_block_entries(s, refcount_block_offset,
449: first_index, last_index) < 0)
450: {
451: return -EIO;
452: }
453:
454: first_index = -1;
455: last_index = -1;
456: }
457:
458: /* Load the refcount block and allocate it if needed */
1.1.1.3 root 459: new_block = alloc_refcount_block(bs, cluster_index);
460: if (new_block < 0) {
461: ret = new_block;
462: goto fail;
1.1 root 463: }
1.1.1.3 root 464: refcount_block_offset = new_block;
1.1 root 465:
466: /* we can update the count and save it */
467: block_index = cluster_index &
468: ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
469: if (first_index == -1 || block_index < first_index) {
470: first_index = block_index;
471: }
472: if (block_index > last_index) {
473: last_index = block_index;
474: }
475:
476: refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
477: refcount += addend;
1.1.1.3 root 478: if (refcount < 0 || refcount > 0xffff) {
479: ret = -EINVAL;
480: goto fail;
481: }
1.1 root 482: if (refcount == 0 && cluster_index < s->free_cluster_index) {
483: s->free_cluster_index = cluster_index;
484: }
485: s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
486: }
487:
1.1.1.3 root 488: ret = 0;
489: fail:
490:
1.1 root 491: /* Write last changed block to disk */
492: if (refcount_block_offset != 0) {
493: if (write_refcount_block_entries(s, refcount_block_offset,
494: first_index, last_index) < 0)
495: {
1.1.1.3 root 496: return ret < 0 ? ret : -EIO;
1.1 root 497: }
498: }
499:
1.1.1.3 root 500: /*
501: * Try do undo any updates if an error is returned (This may succeed in
502: * some cases like ENOSPC for allocating a new refcount block)
503: */
504: if (ret < 0) {
505: int dummy;
506: dummy = update_refcount(bs, offset, cluster_offset - offset, -addend);
507: }
508:
509: return ret;
1.1 root 510: }
511:
512: /* addend must be 1 or -1 */
513: static int update_cluster_refcount(BlockDriverState *bs,
514: int64_t cluster_index,
515: int addend)
516: {
517: BDRVQcowState *s = bs->opaque;
518: int ret;
519:
520: ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);
521: if (ret < 0) {
522: return ret;
523: }
524:
525: return get_refcount(bs, cluster_index);
526: }
527:
528:
529:
530: /*********************************************************/
531: /* cluster allocation functions */
532:
533:
534:
535: /* return < 0 if error */
536: static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
537: {
538: BDRVQcowState *s = bs->opaque;
539: int i, nb_clusters;
540:
541: nb_clusters = size_to_clusters(s, size);
542: retry:
543: for(i = 0; i < nb_clusters; i++) {
544: int64_t i = s->free_cluster_index++;
545: if (get_refcount(bs, i) != 0)
546: goto retry;
547: }
548: #ifdef DEBUG_ALLOC2
549: printf("alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
550: size,
551: (s->free_cluster_index - nb_clusters) << s->cluster_bits);
552: #endif
553: return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
554: }
555:
556: int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
557: {
558: int64_t offset;
1.1.1.3 root 559: int ret;
1.1 root 560:
561: offset = alloc_clusters_noref(bs, size);
1.1.1.3 root 562: ret = update_refcount(bs, offset, size, 1);
563: if (ret < 0) {
564: return ret;
565: }
1.1 root 566: return offset;
567: }
568:
569: /* only used to allocate compressed sectors. We try to allocate
570: contiguous sectors. size must be <= cluster_size */
571: int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
572: {
573: BDRVQcowState *s = bs->opaque;
574: int64_t offset, cluster_offset;
575: int free_in_cluster;
576:
577: assert(size > 0 && size <= s->cluster_size);
578: if (s->free_byte_offset == 0) {
579: s->free_byte_offset = qcow2_alloc_clusters(bs, s->cluster_size);
1.1.1.3 root 580: if (s->free_byte_offset < 0) {
581: return s->free_byte_offset;
582: }
1.1 root 583: }
584: redo:
585: free_in_cluster = s->cluster_size -
586: (s->free_byte_offset & (s->cluster_size - 1));
587: if (size <= free_in_cluster) {
588: /* enough space in current cluster */
589: offset = s->free_byte_offset;
590: s->free_byte_offset += size;
591: free_in_cluster -= size;
592: if (free_in_cluster == 0)
593: s->free_byte_offset = 0;
594: if ((offset & (s->cluster_size - 1)) != 0)
595: update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
596: } else {
597: offset = qcow2_alloc_clusters(bs, s->cluster_size);
1.1.1.3 root 598: if (offset < 0) {
599: return offset;
600: }
1.1 root 601: cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
602: if ((cluster_offset + s->cluster_size) == offset) {
603: /* we are lucky: contiguous data */
604: offset = s->free_byte_offset;
605: update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
606: s->free_byte_offset += size;
607: } else {
608: s->free_byte_offset = offset;
609: goto redo;
610: }
611: }
612: return offset;
613: }
614:
615: void qcow2_free_clusters(BlockDriverState *bs,
616: int64_t offset, int64_t size)
617: {
1.1.1.3 root 618: int ret;
619:
620: ret = update_refcount(bs, offset, size, -1);
621: if (ret < 0) {
622: fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
623: abort();
624: }
1.1 root 625: }
626:
627: /*
628: * free_any_clusters
629: *
630: * free clusters according to its type: compressed or not
631: *
632: */
633:
634: void qcow2_free_any_clusters(BlockDriverState *bs,
635: uint64_t cluster_offset, int nb_clusters)
636: {
637: BDRVQcowState *s = bs->opaque;
638:
639: /* free the cluster */
640:
641: if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
642: int nb_csectors;
643: nb_csectors = ((cluster_offset >> s->csize_shift) &
644: s->csize_mask) + 1;
645: qcow2_free_clusters(bs,
646: (cluster_offset & s->cluster_offset_mask) & ~511,
647: nb_csectors * 512);
648: return;
649: }
650:
651: qcow2_free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits);
652:
653: return;
654: }
655:
656:
657:
658: /*********************************************************/
659: /* snapshots and image creation */
660:
661:
662:
663: void qcow2_create_refcount_update(QCowCreateState *s, int64_t offset,
664: int64_t size)
665: {
666: int refcount;
667: int64_t start, last, cluster_offset;
668: uint16_t *p;
669:
670: start = offset & ~(s->cluster_size - 1);
671: last = (offset + size - 1) & ~(s->cluster_size - 1);
672: for(cluster_offset = start; cluster_offset <= last;
673: cluster_offset += s->cluster_size) {
674: p = &s->refcount_block[cluster_offset >> s->cluster_bits];
675: refcount = be16_to_cpu(*p);
676: refcount++;
677: *p = cpu_to_be16(refcount);
678: }
679: }
680:
681: /* update the refcounts of snapshots and the copied flag */
682: int qcow2_update_snapshot_refcount(BlockDriverState *bs,
683: int64_t l1_table_offset, int l1_size, int addend)
684: {
685: BDRVQcowState *s = bs->opaque;
686: uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
687: int64_t old_offset, old_l2_offset;
688: int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
689:
690: qcow2_l2_cache_reset(bs);
691: cache_refcount_updates = 1;
692:
693: l2_table = NULL;
694: l1_table = NULL;
695: l1_size2 = l1_size * sizeof(uint64_t);
696: l1_allocated = 0;
697: if (l1_table_offset != s->l1_table_offset) {
1.1.1.2 root 698: if (l1_size2 != 0) {
699: l1_table = qemu_mallocz(align_offset(l1_size2, 512));
700: } else {
701: l1_table = NULL;
702: }
1.1 root 703: l1_allocated = 1;
704: if (bdrv_pread(s->hd, l1_table_offset,
705: l1_table, l1_size2) != l1_size2)
706: goto fail;
707: for(i = 0;i < l1_size; i++)
708: be64_to_cpus(&l1_table[i]);
709: } else {
710: assert(l1_size == s->l1_size);
711: l1_table = s->l1_table;
712: l1_allocated = 0;
713: }
714:
715: l2_size = s->l2_size * sizeof(uint64_t);
716: l2_table = qemu_malloc(l2_size);
717: l1_modified = 0;
718: for(i = 0; i < l1_size; i++) {
719: l2_offset = l1_table[i];
720: if (l2_offset) {
721: old_l2_offset = l2_offset;
722: l2_offset &= ~QCOW_OFLAG_COPIED;
723: l2_modified = 0;
724: if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
725: goto fail;
726: for(j = 0; j < s->l2_size; j++) {
727: offset = be64_to_cpu(l2_table[j]);
728: if (offset != 0) {
729: old_offset = offset;
730: offset &= ~QCOW_OFLAG_COPIED;
731: if (offset & QCOW_OFLAG_COMPRESSED) {
732: nb_csectors = ((offset >> s->csize_shift) &
733: s->csize_mask) + 1;
1.1.1.3 root 734: if (addend != 0) {
735: int ret;
736: ret = update_refcount(bs,
737: (offset & s->cluster_offset_mask) & ~511,
738: nb_csectors * 512, addend);
739: if (ret < 0) {
740: goto fail;
741: }
742: }
1.1 root 743: /* compressed clusters are never modified */
744: refcount = 2;
745: } else {
746: if (addend != 0) {
747: refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
748: } else {
749: refcount = get_refcount(bs, offset >> s->cluster_bits);
750: }
751: }
752:
753: if (refcount == 1) {
754: offset |= QCOW_OFLAG_COPIED;
755: }
756: if (offset != old_offset) {
757: l2_table[j] = cpu_to_be64(offset);
758: l2_modified = 1;
759: }
760: }
761: }
762: if (l2_modified) {
763: if (bdrv_pwrite(s->hd,
764: l2_offset, l2_table, l2_size) != l2_size)
765: goto fail;
766: }
767:
768: if (addend != 0) {
769: refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
770: } else {
771: refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
772: }
773: if (refcount == 1) {
774: l2_offset |= QCOW_OFLAG_COPIED;
775: }
776: if (l2_offset != old_l2_offset) {
777: l1_table[i] = l2_offset;
778: l1_modified = 1;
779: }
780: }
781: }
782: if (l1_modified) {
783: for(i = 0; i < l1_size; i++)
784: cpu_to_be64s(&l1_table[i]);
785: if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,
786: l1_size2) != l1_size2)
787: goto fail;
788: for(i = 0; i < l1_size; i++)
789: be64_to_cpus(&l1_table[i]);
790: }
791: if (l1_allocated)
792: qemu_free(l1_table);
793: qemu_free(l2_table);
794: cache_refcount_updates = 0;
795: write_refcount_block(s);
796: return 0;
797: fail:
798: if (l1_allocated)
799: qemu_free(l1_table);
800: qemu_free(l2_table);
801: cache_refcount_updates = 0;
802: write_refcount_block(s);
803: return -EIO;
804: }
805:
806:
807:
808:
809: /*********************************************************/
810: /* refcount checking functions */
811:
812:
813:
814: /*
815: * Increases the refcount for a range of clusters in a given refcount table.
816: * This is used to construct a temporary refcount table out of L1 and L2 tables
817: * which can be compared the the refcount table saved in the image.
818: *
819: * Returns the number of errors in the image that were found
820: */
821: static int inc_refcounts(BlockDriverState *bs,
822: uint16_t *refcount_table,
823: int refcount_table_size,
824: int64_t offset, int64_t size)
825: {
826: BDRVQcowState *s = bs->opaque;
827: int64_t start, last, cluster_offset;
828: int k;
829: int errors = 0;
830:
831: if (size <= 0)
832: return 0;
833:
834: start = offset & ~(s->cluster_size - 1);
835: last = (offset + size - 1) & ~(s->cluster_size - 1);
836: for(cluster_offset = start; cluster_offset <= last;
837: cluster_offset += s->cluster_size) {
838: k = cluster_offset >> s->cluster_bits;
839: if (k < 0 || k >= refcount_table_size) {
840: fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
841: cluster_offset);
842: errors++;
843: } else {
844: if (++refcount_table[k] == 0) {
845: fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
846: "\n", cluster_offset);
847: errors++;
848: }
849: }
850: }
851:
852: return errors;
853: }
854:
855: /*
856: * Increases the refcount in the given refcount table for the all clusters
857: * referenced in the L2 table. While doing so, performs some checks on L2
858: * entries.
859: *
860: * Returns the number of errors found by the checks or -errno if an internal
861: * error occurred.
862: */
863: static int check_refcounts_l2(BlockDriverState *bs,
864: uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
865: int check_copied)
866: {
867: BDRVQcowState *s = bs->opaque;
868: uint64_t *l2_table, offset;
869: int i, l2_size, nb_csectors, refcount;
870: int errors = 0;
871:
872: /* Read L2 table from disk */
873: l2_size = s->l2_size * sizeof(uint64_t);
874: l2_table = qemu_malloc(l2_size);
875:
876: if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
877: goto fail;
878:
879: /* Do the actual checks */
880: for(i = 0; i < s->l2_size; i++) {
881: offset = be64_to_cpu(l2_table[i]);
882: if (offset != 0) {
883: if (offset & QCOW_OFLAG_COMPRESSED) {
884: /* Compressed clusters don't have QCOW_OFLAG_COPIED */
885: if (offset & QCOW_OFLAG_COPIED) {
886: fprintf(stderr, "ERROR: cluster %" PRId64 ": "
887: "copied flag must never be set for compressed "
888: "clusters\n", offset >> s->cluster_bits);
889: offset &= ~QCOW_OFLAG_COPIED;
890: errors++;
891: }
892:
893: /* Mark cluster as used */
894: nb_csectors = ((offset >> s->csize_shift) &
895: s->csize_mask) + 1;
896: offset &= s->cluster_offset_mask;
897: errors += inc_refcounts(bs, refcount_table,
898: refcount_table_size,
899: offset & ~511, nb_csectors * 512);
900: } else {
901: /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
902: if (check_copied) {
903: uint64_t entry = offset;
904: offset &= ~QCOW_OFLAG_COPIED;
905: refcount = get_refcount(bs, offset >> s->cluster_bits);
906: if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
907: fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
908: PRIx64 " refcount=%d\n", entry, refcount);
909: errors++;
910: }
911: }
912:
913: /* Mark cluster as used */
914: offset &= ~QCOW_OFLAG_COPIED;
915: errors += inc_refcounts(bs, refcount_table,
916: refcount_table_size,
917: offset, s->cluster_size);
918:
919: /* Correct offsets are cluster aligned */
920: if (offset & (s->cluster_size - 1)) {
921: fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
922: "properly aligned; L2 entry corrupted.\n", offset);
923: errors++;
924: }
925: }
926: }
927: }
928:
929: qemu_free(l2_table);
930: return errors;
931:
932: fail:
933: fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
934: qemu_free(l2_table);
935: return -EIO;
936: }
937:
938: /*
939: * Increases the refcount for the L1 table, its L2 tables and all referenced
940: * clusters in the given refcount table. While doing so, performs some checks
941: * on L1 and L2 entries.
942: *
943: * Returns the number of errors found by the checks or -errno if an internal
944: * error occurred.
945: */
946: static int check_refcounts_l1(BlockDriverState *bs,
947: uint16_t *refcount_table,
948: int refcount_table_size,
949: int64_t l1_table_offset, int l1_size,
950: int check_copied)
951: {
952: BDRVQcowState *s = bs->opaque;
953: uint64_t *l1_table, l2_offset, l1_size2;
954: int i, refcount, ret;
955: int errors = 0;
956:
957: l1_size2 = l1_size * sizeof(uint64_t);
958:
959: /* Mark L1 table as used */
960: errors += inc_refcounts(bs, refcount_table, refcount_table_size,
961: l1_table_offset, l1_size2);
962:
963: /* Read L1 table entries from disk */
1.1.1.2 root 964: if (l1_size2 == 0) {
965: l1_table = NULL;
966: } else {
967: l1_table = qemu_malloc(l1_size2);
968: if (bdrv_pread(s->hd, l1_table_offset,
969: l1_table, l1_size2) != l1_size2)
970: goto fail;
971: for(i = 0;i < l1_size; i++)
972: be64_to_cpus(&l1_table[i]);
973: }
1.1 root 974:
975: /* Do the actual checks */
976: for(i = 0; i < l1_size; i++) {
977: l2_offset = l1_table[i];
978: if (l2_offset) {
979: /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
980: if (check_copied) {
981: refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
982: >> s->cluster_bits);
983: if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
984: fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
985: " refcount=%d\n", l2_offset, refcount);
986: errors++;
987: }
988: }
989:
990: /* Mark L2 table as used */
991: l2_offset &= ~QCOW_OFLAG_COPIED;
992: errors += inc_refcounts(bs, refcount_table,
993: refcount_table_size,
994: l2_offset,
995: s->cluster_size);
996:
997: /* L2 tables are cluster aligned */
998: if (l2_offset & (s->cluster_size - 1)) {
999: fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
1000: "cluster aligned; L1 entry corrupted\n", l2_offset);
1001: errors++;
1002: }
1003:
1004: /* Process and check L2 entries */
1005: ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
1006: l2_offset, check_copied);
1007: if (ret < 0) {
1008: goto fail;
1009: }
1010: errors += ret;
1011: }
1012: }
1013: qemu_free(l1_table);
1014: return errors;
1015:
1016: fail:
1017: fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
1018: qemu_free(l1_table);
1019: return -EIO;
1020: }
1021:
1022: /*
1023: * Checks an image for refcount consistency.
1024: *
1025: * Returns 0 if no errors are found, the number of errors in case the image is
1026: * detected as corrupted, and -errno when an internal error occured.
1027: */
1028: int qcow2_check_refcounts(BlockDriverState *bs)
1029: {
1030: BDRVQcowState *s = bs->opaque;
1031: int64_t size;
1032: int nb_clusters, refcount1, refcount2, i;
1033: QCowSnapshot *sn;
1034: uint16_t *refcount_table;
1035: int ret, errors = 0;
1036:
1037: size = bdrv_getlength(s->hd);
1038: nb_clusters = size_to_clusters(s, size);
1039: refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
1040:
1041: /* header */
1042: errors += inc_refcounts(bs, refcount_table, nb_clusters,
1043: 0, s->cluster_size);
1044:
1045: /* current L1 table */
1046: ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
1047: s->l1_table_offset, s->l1_size, 1);
1048: if (ret < 0) {
1049: return ret;
1050: }
1051: errors += ret;
1052:
1053: /* snapshots */
1054: for(i = 0; i < s->nb_snapshots; i++) {
1055: sn = s->snapshots + i;
1056: check_refcounts_l1(bs, refcount_table, nb_clusters,
1057: sn->l1_table_offset, sn->l1_size, 0);
1058: }
1059: errors += inc_refcounts(bs, refcount_table, nb_clusters,
1060: s->snapshots_offset, s->snapshots_size);
1061:
1062: /* refcount data */
1063: errors += inc_refcounts(bs, refcount_table, nb_clusters,
1064: s->refcount_table_offset,
1065: s->refcount_table_size * sizeof(uint64_t));
1066: for(i = 0; i < s->refcount_table_size; i++) {
1067: int64_t offset;
1068: offset = s->refcount_table[i];
1069: if (offset != 0) {
1070: errors += inc_refcounts(bs, refcount_table, nb_clusters,
1071: offset, s->cluster_size);
1072: }
1073: }
1074:
1075: /* compare ref counts */
1076: for(i = 0; i < nb_clusters; i++) {
1077: refcount1 = get_refcount(bs, i);
1078: refcount2 = refcount_table[i];
1079: if (refcount1 != refcount2) {
1080: fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
1081: i, refcount1, refcount2);
1082: errors++;
1083: }
1084: }
1085:
1086: qemu_free(refcount_table);
1087:
1088: return errors;
1089: }
1090:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.