|
|
1.1 root 1: /*
2: * Block driver for the QCOW version 2 format
3: *
4: * Copyright (c) 2004-2006 Fabrice Bellard
5: *
6: * Permission is hereby granted, free of charge, to any person obtaining a copy
7: * of this software and associated documentation files (the "Software"), to deal
8: * in the Software without restriction, including without limitation the rights
9: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10: * copies of the Software, and to permit persons to whom the Software is
11: * furnished to do so, subject to the following conditions:
12: *
13: * The above copyright notice and this permission notice shall be included in
14: * all copies or substantial portions of the Software.
15: *
16: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19: * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22: * THE SOFTWARE.
23: */
24:
25: #include "qemu-common.h"
26: #include "block_int.h"
27: #include "block/qcow2.h"
28:
29: static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
1.1.1.4 root 30: static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
1.1 root 31: int64_t offset, int64_t length,
32: int addend);
33:
34:
35: static int cache_refcount_updates = 0;
36:
37: static int write_refcount_block(BDRVQcowState *s)
38: {
39: size_t size = s->cluster_size;
40:
41: if (s->refcount_block_cache_offset == 0) {
42: return 0;
43: }
44:
1.1.1.5 ! root 45: if (bdrv_pwrite_sync(s->hd, s->refcount_block_cache_offset,
! 46: s->refcount_block_cache, size) < 0)
1.1 root 47: {
48: return -EIO;
49: }
50:
51: return 0;
52: }
53:
54: /*********************************************************/
55: /* refcount handling */
56:
57: int qcow2_refcount_init(BlockDriverState *bs)
58: {
59: BDRVQcowState *s = bs->opaque;
60: int ret, refcount_table_size2, i;
61:
62: s->refcount_block_cache = qemu_malloc(s->cluster_size);
63: refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
64: s->refcount_table = qemu_malloc(refcount_table_size2);
65: if (s->refcount_table_size > 0) {
66: ret = bdrv_pread(s->hd, s->refcount_table_offset,
67: s->refcount_table, refcount_table_size2);
68: if (ret != refcount_table_size2)
69: goto fail;
70: for(i = 0; i < s->refcount_table_size; i++)
71: be64_to_cpus(&s->refcount_table[i]);
72: }
73: return 0;
74: fail:
75: return -ENOMEM;
76: }
77:
78: void qcow2_refcount_close(BlockDriverState *bs)
79: {
80: BDRVQcowState *s = bs->opaque;
81: qemu_free(s->refcount_block_cache);
82: qemu_free(s->refcount_table);
83: }
84:
85:
86: static int load_refcount_block(BlockDriverState *bs,
87: int64_t refcount_block_offset)
88: {
89: BDRVQcowState *s = bs->opaque;
90: int ret;
91:
92: if (cache_refcount_updates) {
93: write_refcount_block(s);
94: }
95:
96: ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
97: s->cluster_size);
98: if (ret != s->cluster_size)
99: return -EIO;
100: s->refcount_block_cache_offset = refcount_block_offset;
101: return 0;
102: }
103:
104: static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
105: {
106: BDRVQcowState *s = bs->opaque;
107: int refcount_table_index, block_index;
108: int64_t refcount_block_offset;
109:
110: refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
111: if (refcount_table_index >= s->refcount_table_size)
112: return 0;
113: refcount_block_offset = s->refcount_table[refcount_table_index];
114: if (!refcount_block_offset)
115: return 0;
116: if (refcount_block_offset != s->refcount_block_cache_offset) {
117: /* better than nothing: return allocated if read error */
118: if (load_refcount_block(bs, refcount_block_offset) < 0)
119: return 1;
120: }
121: block_index = cluster_index &
122: ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
123: return be16_to_cpu(s->refcount_block_cache[block_index]);
124: }
125:
1.1.1.4 root 126: /*
127: * Rounds the refcount table size up to avoid growing the table for each single
128: * refcount block that is allocated.
129: */
130: static unsigned int next_refcount_table_size(BDRVQcowState *s,
131: unsigned int min_size)
132: {
133: unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
134: unsigned int refcount_table_clusters =
135: MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
136:
137: while (min_clusters > refcount_table_clusters) {
138: refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
139: }
140:
141: return refcount_table_clusters << (s->cluster_bits - 3);
142: }
143:
144:
145: /* Checks if two offsets are described by the same refcount block */
146: static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
147: uint64_t offset_b)
148: {
149: uint64_t block_a = offset_a >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
150: uint64_t block_b = offset_b >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
151:
152: return (block_a == block_b);
153: }
154:
155: /*
156: * Loads a refcount block. If it doesn't exist yet, it is allocated first
157: * (including growing the refcount table if needed).
158: *
159: * Returns the offset of the refcount block on success or -errno in error case
160: */
161: static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
1.1 root 162: {
163: BDRVQcowState *s = bs->opaque;
1.1.1.4 root 164: unsigned int refcount_table_index;
165: int ret;
1.1 root 166:
1.1.1.4 root 167: /* Find the refcount block for the given cluster */
168: refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
169:
170: if (refcount_table_index < s->refcount_table_size) {
171:
172: uint64_t refcount_block_offset =
173: s->refcount_table[refcount_table_index];
174:
175: /* If it's already there, we're done */
176: if (refcount_block_offset) {
177: if (refcount_block_offset != s->refcount_block_cache_offset) {
178: ret = load_refcount_block(bs, refcount_block_offset);
179: if (ret < 0) {
180: return ret;
181: }
182: }
183: return refcount_block_offset;
184: }
185: }
186:
187: /*
188: * If we came here, we need to allocate something. Something is at least
189: * a cluster for the new refcount block. It may also include a new refcount
190: * table if the old refcount table is too small.
191: *
192: * Note that allocating clusters here needs some special care:
193: *
194: * - We can't use the normal qcow2_alloc_clusters(), it would try to
195: * increase the refcount and very likely we would end up with an endless
196: * recursion. Instead we must place the refcount blocks in a way that
197: * they can describe them themselves.
198: *
199: * - We need to consider that at this point we are inside update_refcounts
200: * and doing the initial refcount increase. This means that some clusters
201: * have already been allocated by the caller, but their refcount isn't
202: * accurate yet. free_cluster_index tells us where this allocation ends
203: * as long as we don't overwrite it by freeing clusters.
204: *
205: * - alloc_clusters_noref and qcow2_free_clusters may load a different
206: * refcount block into the cache
207: */
208:
209: if (cache_refcount_updates) {
210: ret = write_refcount_block(s);
211: if (ret < 0) {
212: return ret;
213: }
214: }
215:
216: /* Allocate the refcount block itself and mark it as used */
217: uint64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
218:
219: #ifdef DEBUG_ALLOC2
220: fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
221: " at %" PRIx64 "\n",
222: refcount_table_index, cluster_index << s->cluster_bits, new_block);
223: #endif
224:
225: if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
1.1.1.5 ! root 226: /* Zero the new refcount block before updating it */
! 227: memset(s->refcount_block_cache, 0, s->cluster_size);
! 228: s->refcount_block_cache_offset = new_block;
! 229:
1.1.1.4 root 230: /* The block describes itself, need to update the cache */
231: int block_index = (new_block >> s->cluster_bits) &
232: ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
233: s->refcount_block_cache[block_index] = cpu_to_be16(1);
234: } else {
235: /* Described somewhere else. This can recurse at most twice before we
236: * arrive at a block that describes itself. */
237: ret = update_refcount(bs, new_block, s->cluster_size, 1);
238: if (ret < 0) {
239: goto fail_block;
240: }
1.1.1.5 ! root 241:
! 242: /* Initialize the new refcount block only after updating its refcount,
! 243: * update_refcount uses the refcount cache itself */
! 244: memset(s->refcount_block_cache, 0, s->cluster_size);
! 245: s->refcount_block_cache_offset = new_block;
1.1.1.4 root 246: }
247:
248: /* Now the new refcount block needs to be written to disk */
1.1.1.5 ! root 249: ret = bdrv_pwrite_sync(s->hd, new_block, s->refcount_block_cache,
1.1.1.4 root 250: s->cluster_size);
251: if (ret < 0) {
252: goto fail_block;
253: }
254:
255: /* If the refcount table is big enough, just hook the block up there */
256: if (refcount_table_index < s->refcount_table_size) {
257: uint64_t data64 = cpu_to_be64(new_block);
1.1.1.5 ! root 258: ret = bdrv_pwrite_sync(s->hd,
1.1.1.4 root 259: s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
260: &data64, sizeof(data64));
261: if (ret < 0) {
262: goto fail_block;
1.1 root 263: }
1.1.1.4 root 264:
265: s->refcount_table[refcount_table_index] = new_block;
266: return new_block;
1.1 root 267: }
1.1.1.4 root 268:
269: /*
270: * If we come here, we need to grow the refcount table. Again, a new
271: * refcount table needs some space and we can't simply allocate to avoid
272: * endless recursion.
273: *
274: * Therefore let's grab new refcount blocks at the end of the image, which
275: * will describe themselves and the new refcount table. This way we can
276: * reference them only in the new table and do the switch to the new
277: * refcount table at once without producing an inconsistent state in
278: * between.
279: */
280: /* Calculate the number of refcount blocks needed so far */
281: uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
282: uint64_t blocks_used = (s->free_cluster_index +
283: refcount_block_clusters - 1) / refcount_block_clusters;
284:
285: /* And now we need at least one block more for the new metadata */
286: uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
287: uint64_t last_table_size;
288: uint64_t blocks_clusters;
289: do {
290: uint64_t table_clusters = size_to_clusters(s, table_size);
291: blocks_clusters = 1 +
292: ((table_clusters + refcount_block_clusters - 1)
293: / refcount_block_clusters);
294: uint64_t meta_clusters = table_clusters + blocks_clusters;
295:
296: last_table_size = table_size;
297: table_size = next_refcount_table_size(s, blocks_used +
298: ((meta_clusters + refcount_block_clusters - 1)
299: / refcount_block_clusters));
300:
301: } while (last_table_size != table_size);
302:
1.1 root 303: #ifdef DEBUG_ALLOC2
1.1.1.4 root 304: fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
305: s->refcount_table_size, table_size);
1.1 root 306: #endif
1.1.1.4 root 307:
308: /* Create the new refcount table and blocks */
309: uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
310: s->cluster_size;
311: uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
312: uint16_t *new_blocks = qemu_mallocz(blocks_clusters * s->cluster_size);
313: uint64_t *new_table = qemu_mallocz(table_size * sizeof(uint64_t));
314:
315: assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
316:
317: /* Fill the new refcount table */
1.1 root 318: memcpy(new_table, s->refcount_table,
1.1.1.4 root 319: s->refcount_table_size * sizeof(uint64_t));
320: new_table[refcount_table_index] = new_block;
321:
322: int i;
323: for (i = 0; i < blocks_clusters; i++) {
324: new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
325: }
326:
327: /* Fill the refcount blocks */
328: uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
329: int block = 0;
330: for (i = 0; i < table_clusters + blocks_clusters; i++) {
331: new_blocks[block++] = cpu_to_be16(1);
332: }
333:
334: /* Write refcount blocks to disk */
1.1.1.5 ! root 335: ret = bdrv_pwrite_sync(s->hd, meta_offset, new_blocks,
1.1.1.4 root 336: blocks_clusters * s->cluster_size);
337: qemu_free(new_blocks);
338: if (ret < 0) {
339: goto fail_table;
340: }
341:
342: /* Write refcount table to disk */
343: for(i = 0; i < table_size; i++) {
1.1 root 344: cpu_to_be64s(&new_table[i]);
1.1.1.4 root 345: }
346:
1.1.1.5 ! root 347: ret = bdrv_pwrite_sync(s->hd, table_offset, new_table,
1.1.1.4 root 348: table_size * sizeof(uint64_t));
349: if (ret < 0) {
350: goto fail_table;
351: }
352:
353: for(i = 0; i < table_size; i++) {
354: cpu_to_be64s(&new_table[i]);
355: }
1.1 root 356:
1.1.1.4 root 357: /* Hook up the new refcount table in the qcow2 header */
358: uint8_t data[12];
1.1 root 359: cpu_to_be64w((uint64_t*)data, table_offset);
1.1.1.4 root 360: cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
1.1.1.5 ! root 361: ret = bdrv_pwrite_sync(s->hd, offsetof(QCowHeader, refcount_table_offset),
1.1.1.4 root 362: data, sizeof(data));
363: if (ret < 0) {
364: goto fail_table;
1.1.1.3 root 365: }
366:
1.1.1.4 root 367: /* And switch it in memory */
368: uint64_t old_table_offset = s->refcount_table_offset;
369: uint64_t old_table_size = s->refcount_table_size;
370:
1.1 root 371: qemu_free(s->refcount_table);
372: s->refcount_table = new_table;
1.1.1.4 root 373: s->refcount_table_size = table_size;
1.1 root 374: s->refcount_table_offset = table_offset;
375:
1.1.1.4 root 376: /* Free old table. Remember, we must not change free_cluster_index */
377: uint64_t old_free_cluster_index = s->free_cluster_index;
1.1 root 378: qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
1.1.1.4 root 379: s->free_cluster_index = old_free_cluster_index;
1.1 root 380:
1.1.1.4 root 381: ret = load_refcount_block(bs, new_block);
382: if (ret < 0) {
383: goto fail_block;
1.1 root 384: }
385:
1.1.1.4 root 386: return new_block;
1.1 root 387:
1.1.1.4 root 388: fail_table:
389: qemu_free(new_table);
390: fail_block:
391: s->refcount_block_cache_offset = 0;
392: return ret;
1.1 root 393: }
394:
395: #define REFCOUNTS_PER_SECTOR (512 >> REFCOUNT_SHIFT)
396: static int write_refcount_block_entries(BDRVQcowState *s,
397: int64_t refcount_block_offset, int first_index, int last_index)
398: {
399: size_t size;
1.1.1.5 ! root 400: int ret;
1.1 root 401:
402: if (cache_refcount_updates) {
403: return 0;
404: }
405:
1.1.1.5 ! root 406: if (first_index < 0) {
! 407: return 0;
! 408: }
! 409:
1.1 root 410: first_index &= ~(REFCOUNTS_PER_SECTOR - 1);
411: last_index = (last_index + REFCOUNTS_PER_SECTOR)
412: & ~(REFCOUNTS_PER_SECTOR - 1);
413:
414: size = (last_index - first_index) << REFCOUNT_SHIFT;
1.1.1.5 ! root 415: ret = bdrv_pwrite_sync(s->hd,
1.1 root 416: refcount_block_offset + (first_index << REFCOUNT_SHIFT),
1.1.1.5 ! root 417: &s->refcount_block_cache[first_index], size);
! 418: if (ret < 0) {
! 419: return ret;
1.1 root 420: }
421:
422: return 0;
423: }
424:
425: /* XXX: cache several refcount block clusters ? */
1.1.1.3 root 426: static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
427: int64_t offset, int64_t length, int addend)
1.1 root 428: {
429: BDRVQcowState *s = bs->opaque;
430: int64_t start, last, cluster_offset;
431: int64_t refcount_block_offset = 0;
432: int64_t table_index = -1, old_table_index;
433: int first_index = -1, last_index = -1;
1.1.1.3 root 434: int ret;
1.1 root 435:
436: #ifdef DEBUG_ALLOC2
437: printf("update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
438: offset, length, addend);
439: #endif
1.1.1.3 root 440: if (length < 0) {
1.1 root 441: return -EINVAL;
1.1.1.3 root 442: } else if (length == 0) {
443: return 0;
444: }
445:
1.1 root 446: start = offset & ~(s->cluster_size - 1);
447: last = (offset + length - 1) & ~(s->cluster_size - 1);
448: for(cluster_offset = start; cluster_offset <= last;
449: cluster_offset += s->cluster_size)
450: {
451: int block_index, refcount;
452: int64_t cluster_index = cluster_offset >> s->cluster_bits;
1.1.1.3 root 453: int64_t new_block;
1.1 root 454:
455: /* Only write refcount block to disk when we are done with it */
456: old_table_index = table_index;
457: table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
458: if ((old_table_index >= 0) && (table_index != old_table_index)) {
459:
460: if (write_refcount_block_entries(s, refcount_block_offset,
461: first_index, last_index) < 0)
462: {
463: return -EIO;
464: }
465:
466: first_index = -1;
467: last_index = -1;
468: }
469:
470: /* Load the refcount block and allocate it if needed */
1.1.1.3 root 471: new_block = alloc_refcount_block(bs, cluster_index);
472: if (new_block < 0) {
473: ret = new_block;
474: goto fail;
1.1 root 475: }
1.1.1.3 root 476: refcount_block_offset = new_block;
1.1 root 477:
478: /* we can update the count and save it */
479: block_index = cluster_index &
480: ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
481: if (first_index == -1 || block_index < first_index) {
482: first_index = block_index;
483: }
484: if (block_index > last_index) {
485: last_index = block_index;
486: }
487:
488: refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
489: refcount += addend;
1.1.1.3 root 490: if (refcount < 0 || refcount > 0xffff) {
491: ret = -EINVAL;
492: goto fail;
493: }
1.1 root 494: if (refcount == 0 && cluster_index < s->free_cluster_index) {
495: s->free_cluster_index = cluster_index;
496: }
497: s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
498: }
499:
1.1.1.3 root 500: ret = 0;
501: fail:
502:
1.1 root 503: /* Write last changed block to disk */
504: if (refcount_block_offset != 0) {
505: if (write_refcount_block_entries(s, refcount_block_offset,
506: first_index, last_index) < 0)
507: {
1.1.1.3 root 508: return ret < 0 ? ret : -EIO;
1.1 root 509: }
510: }
511:
1.1.1.3 root 512: /*
513: * Try do undo any updates if an error is returned (This may succeed in
514: * some cases like ENOSPC for allocating a new refcount block)
515: */
516: if (ret < 0) {
517: int dummy;
518: dummy = update_refcount(bs, offset, cluster_offset - offset, -addend);
519: }
520:
521: return ret;
1.1 root 522: }
523:
524: /* addend must be 1 or -1 */
525: static int update_cluster_refcount(BlockDriverState *bs,
526: int64_t cluster_index,
527: int addend)
528: {
529: BDRVQcowState *s = bs->opaque;
530: int ret;
531:
532: ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);
533: if (ret < 0) {
534: return ret;
535: }
536:
537: return get_refcount(bs, cluster_index);
538: }
539:
540:
541:
542: /*********************************************************/
543: /* cluster allocation functions */
544:
545:
546:
547: /* return < 0 if error */
548: static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
549: {
550: BDRVQcowState *s = bs->opaque;
551: int i, nb_clusters;
552:
553: nb_clusters = size_to_clusters(s, size);
554: retry:
555: for(i = 0; i < nb_clusters; i++) {
556: int64_t i = s->free_cluster_index++;
557: if (get_refcount(bs, i) != 0)
558: goto retry;
559: }
560: #ifdef DEBUG_ALLOC2
561: printf("alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
562: size,
563: (s->free_cluster_index - nb_clusters) << s->cluster_bits);
564: #endif
565: return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
566: }
567:
568: int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
569: {
570: int64_t offset;
1.1.1.3 root 571: int ret;
1.1 root 572:
573: offset = alloc_clusters_noref(bs, size);
1.1.1.3 root 574: ret = update_refcount(bs, offset, size, 1);
575: if (ret < 0) {
576: return ret;
577: }
1.1 root 578: return offset;
579: }
580:
581: /* only used to allocate compressed sectors. We try to allocate
582: contiguous sectors. size must be <= cluster_size */
583: int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
584: {
585: BDRVQcowState *s = bs->opaque;
586: int64_t offset, cluster_offset;
587: int free_in_cluster;
588:
589: assert(size > 0 && size <= s->cluster_size);
590: if (s->free_byte_offset == 0) {
591: s->free_byte_offset = qcow2_alloc_clusters(bs, s->cluster_size);
1.1.1.3 root 592: if (s->free_byte_offset < 0) {
593: return s->free_byte_offset;
594: }
1.1 root 595: }
596: redo:
597: free_in_cluster = s->cluster_size -
598: (s->free_byte_offset & (s->cluster_size - 1));
599: if (size <= free_in_cluster) {
600: /* enough space in current cluster */
601: offset = s->free_byte_offset;
602: s->free_byte_offset += size;
603: free_in_cluster -= size;
604: if (free_in_cluster == 0)
605: s->free_byte_offset = 0;
606: if ((offset & (s->cluster_size - 1)) != 0)
607: update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
608: } else {
609: offset = qcow2_alloc_clusters(bs, s->cluster_size);
1.1.1.3 root 610: if (offset < 0) {
611: return offset;
612: }
1.1 root 613: cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
614: if ((cluster_offset + s->cluster_size) == offset) {
615: /* we are lucky: contiguous data */
616: offset = s->free_byte_offset;
617: update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
618: s->free_byte_offset += size;
619: } else {
620: s->free_byte_offset = offset;
621: goto redo;
622: }
623: }
624: return offset;
625: }
626:
627: void qcow2_free_clusters(BlockDriverState *bs,
628: int64_t offset, int64_t size)
629: {
1.1.1.3 root 630: int ret;
631:
632: ret = update_refcount(bs, offset, size, -1);
633: if (ret < 0) {
634: fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
1.1.1.5 ! root 635: /* TODO Remember the clusters to free them later and avoid leaking */
1.1.1.3 root 636: }
1.1 root 637: }
638:
639: /*
640: * free_any_clusters
641: *
642: * free clusters according to its type: compressed or not
643: *
644: */
645:
646: void qcow2_free_any_clusters(BlockDriverState *bs,
647: uint64_t cluster_offset, int nb_clusters)
648: {
649: BDRVQcowState *s = bs->opaque;
650:
651: /* free the cluster */
652:
653: if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
654: int nb_csectors;
655: nb_csectors = ((cluster_offset >> s->csize_shift) &
656: s->csize_mask) + 1;
657: qcow2_free_clusters(bs,
658: (cluster_offset & s->cluster_offset_mask) & ~511,
659: nb_csectors * 512);
660: return;
661: }
662:
663: qcow2_free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits);
664:
665: return;
666: }
667:
668:
669:
670: /*********************************************************/
671: /* snapshots and image creation */
672:
673:
674:
675: void qcow2_create_refcount_update(QCowCreateState *s, int64_t offset,
676: int64_t size)
677: {
678: int refcount;
679: int64_t start, last, cluster_offset;
680: uint16_t *p;
681:
682: start = offset & ~(s->cluster_size - 1);
683: last = (offset + size - 1) & ~(s->cluster_size - 1);
684: for(cluster_offset = start; cluster_offset <= last;
685: cluster_offset += s->cluster_size) {
686: p = &s->refcount_block[cluster_offset >> s->cluster_bits];
687: refcount = be16_to_cpu(*p);
688: refcount++;
689: *p = cpu_to_be16(refcount);
690: }
691: }
692:
693: /* update the refcounts of snapshots and the copied flag */
694: int qcow2_update_snapshot_refcount(BlockDriverState *bs,
695: int64_t l1_table_offset, int l1_size, int addend)
696: {
697: BDRVQcowState *s = bs->opaque;
698: uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
699: int64_t old_offset, old_l2_offset;
700: int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
701:
702: qcow2_l2_cache_reset(bs);
703: cache_refcount_updates = 1;
704:
705: l2_table = NULL;
706: l1_table = NULL;
707: l1_size2 = l1_size * sizeof(uint64_t);
708: l1_allocated = 0;
709: if (l1_table_offset != s->l1_table_offset) {
1.1.1.2 root 710: if (l1_size2 != 0) {
711: l1_table = qemu_mallocz(align_offset(l1_size2, 512));
712: } else {
713: l1_table = NULL;
714: }
1.1 root 715: l1_allocated = 1;
716: if (bdrv_pread(s->hd, l1_table_offset,
717: l1_table, l1_size2) != l1_size2)
718: goto fail;
719: for(i = 0;i < l1_size; i++)
720: be64_to_cpus(&l1_table[i]);
721: } else {
722: assert(l1_size == s->l1_size);
723: l1_table = s->l1_table;
724: l1_allocated = 0;
725: }
726:
727: l2_size = s->l2_size * sizeof(uint64_t);
728: l2_table = qemu_malloc(l2_size);
729: l1_modified = 0;
730: for(i = 0; i < l1_size; i++) {
731: l2_offset = l1_table[i];
732: if (l2_offset) {
733: old_l2_offset = l2_offset;
734: l2_offset &= ~QCOW_OFLAG_COPIED;
735: l2_modified = 0;
736: if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
737: goto fail;
738: for(j = 0; j < s->l2_size; j++) {
739: offset = be64_to_cpu(l2_table[j]);
740: if (offset != 0) {
741: old_offset = offset;
742: offset &= ~QCOW_OFLAG_COPIED;
743: if (offset & QCOW_OFLAG_COMPRESSED) {
744: nb_csectors = ((offset >> s->csize_shift) &
745: s->csize_mask) + 1;
1.1.1.3 root 746: if (addend != 0) {
747: int ret;
748: ret = update_refcount(bs,
749: (offset & s->cluster_offset_mask) & ~511,
750: nb_csectors * 512, addend);
751: if (ret < 0) {
752: goto fail;
753: }
754: }
1.1 root 755: /* compressed clusters are never modified */
756: refcount = 2;
757: } else {
758: if (addend != 0) {
759: refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
760: } else {
761: refcount = get_refcount(bs, offset >> s->cluster_bits);
762: }
763: }
764:
765: if (refcount == 1) {
766: offset |= QCOW_OFLAG_COPIED;
767: }
768: if (offset != old_offset) {
769: l2_table[j] = cpu_to_be64(offset);
770: l2_modified = 1;
771: }
772: }
773: }
774: if (l2_modified) {
1.1.1.5 ! root 775: if (bdrv_pwrite_sync(s->hd,
! 776: l2_offset, l2_table, l2_size) < 0)
1.1 root 777: goto fail;
778: }
779:
780: if (addend != 0) {
781: refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
782: } else {
783: refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
784: }
785: if (refcount == 1) {
786: l2_offset |= QCOW_OFLAG_COPIED;
787: }
788: if (l2_offset != old_l2_offset) {
789: l1_table[i] = l2_offset;
790: l1_modified = 1;
791: }
792: }
793: }
794: if (l1_modified) {
795: for(i = 0; i < l1_size; i++)
796: cpu_to_be64s(&l1_table[i]);
1.1.1.5 ! root 797: if (bdrv_pwrite_sync(s->hd, l1_table_offset, l1_table,
! 798: l1_size2) < 0)
1.1 root 799: goto fail;
800: for(i = 0; i < l1_size; i++)
801: be64_to_cpus(&l1_table[i]);
802: }
803: if (l1_allocated)
804: qemu_free(l1_table);
805: qemu_free(l2_table);
806: cache_refcount_updates = 0;
807: write_refcount_block(s);
808: return 0;
809: fail:
810: if (l1_allocated)
811: qemu_free(l1_table);
812: qemu_free(l2_table);
813: cache_refcount_updates = 0;
814: write_refcount_block(s);
815: return -EIO;
816: }
817:
818:
819:
820:
821: /*********************************************************/
822: /* refcount checking functions */
823:
824:
825:
826: /*
827: * Increases the refcount for a range of clusters in a given refcount table.
828: * This is used to construct a temporary refcount table out of L1 and L2 tables
829: * which can be compared the the refcount table saved in the image.
830: *
831: * Returns the number of errors in the image that were found
832: */
833: static int inc_refcounts(BlockDriverState *bs,
834: uint16_t *refcount_table,
835: int refcount_table_size,
836: int64_t offset, int64_t size)
837: {
838: BDRVQcowState *s = bs->opaque;
839: int64_t start, last, cluster_offset;
840: int k;
841: int errors = 0;
842:
843: if (size <= 0)
844: return 0;
845:
846: start = offset & ~(s->cluster_size - 1);
847: last = (offset + size - 1) & ~(s->cluster_size - 1);
848: for(cluster_offset = start; cluster_offset <= last;
849: cluster_offset += s->cluster_size) {
850: k = cluster_offset >> s->cluster_bits;
851: if (k < 0 || k >= refcount_table_size) {
852: fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
853: cluster_offset);
854: errors++;
855: } else {
856: if (++refcount_table[k] == 0) {
857: fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
858: "\n", cluster_offset);
859: errors++;
860: }
861: }
862: }
863:
864: return errors;
865: }
866:
867: /*
868: * Increases the refcount in the given refcount table for the all clusters
869: * referenced in the L2 table. While doing so, performs some checks on L2
870: * entries.
871: *
872: * Returns the number of errors found by the checks or -errno if an internal
873: * error occurred.
874: */
875: static int check_refcounts_l2(BlockDriverState *bs,
876: uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
877: int check_copied)
878: {
879: BDRVQcowState *s = bs->opaque;
880: uint64_t *l2_table, offset;
881: int i, l2_size, nb_csectors, refcount;
882: int errors = 0;
883:
884: /* Read L2 table from disk */
885: l2_size = s->l2_size * sizeof(uint64_t);
886: l2_table = qemu_malloc(l2_size);
887:
888: if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
889: goto fail;
890:
891: /* Do the actual checks */
892: for(i = 0; i < s->l2_size; i++) {
893: offset = be64_to_cpu(l2_table[i]);
894: if (offset != 0) {
895: if (offset & QCOW_OFLAG_COMPRESSED) {
896: /* Compressed clusters don't have QCOW_OFLAG_COPIED */
897: if (offset & QCOW_OFLAG_COPIED) {
898: fprintf(stderr, "ERROR: cluster %" PRId64 ": "
899: "copied flag must never be set for compressed "
900: "clusters\n", offset >> s->cluster_bits);
901: offset &= ~QCOW_OFLAG_COPIED;
902: errors++;
903: }
904:
905: /* Mark cluster as used */
906: nb_csectors = ((offset >> s->csize_shift) &
907: s->csize_mask) + 1;
908: offset &= s->cluster_offset_mask;
909: errors += inc_refcounts(bs, refcount_table,
910: refcount_table_size,
911: offset & ~511, nb_csectors * 512);
912: } else {
913: /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
914: if (check_copied) {
915: uint64_t entry = offset;
916: offset &= ~QCOW_OFLAG_COPIED;
917: refcount = get_refcount(bs, offset >> s->cluster_bits);
918: if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
919: fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
920: PRIx64 " refcount=%d\n", entry, refcount);
921: errors++;
922: }
923: }
924:
925: /* Mark cluster as used */
926: offset &= ~QCOW_OFLAG_COPIED;
927: errors += inc_refcounts(bs, refcount_table,
928: refcount_table_size,
929: offset, s->cluster_size);
930:
931: /* Correct offsets are cluster aligned */
932: if (offset & (s->cluster_size - 1)) {
933: fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
934: "properly aligned; L2 entry corrupted.\n", offset);
935: errors++;
936: }
937: }
938: }
939: }
940:
941: qemu_free(l2_table);
942: return errors;
943:
944: fail:
945: fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
946: qemu_free(l2_table);
947: return -EIO;
948: }
949:
950: /*
951: * Increases the refcount for the L1 table, its L2 tables and all referenced
952: * clusters in the given refcount table. While doing so, performs some checks
953: * on L1 and L2 entries.
954: *
955: * Returns the number of errors found by the checks or -errno if an internal
956: * error occurred.
957: */
958: static int check_refcounts_l1(BlockDriverState *bs,
959: uint16_t *refcount_table,
960: int refcount_table_size,
961: int64_t l1_table_offset, int l1_size,
962: int check_copied)
963: {
964: BDRVQcowState *s = bs->opaque;
965: uint64_t *l1_table, l2_offset, l1_size2;
966: int i, refcount, ret;
967: int errors = 0;
968:
969: l1_size2 = l1_size * sizeof(uint64_t);
970:
971: /* Mark L1 table as used */
972: errors += inc_refcounts(bs, refcount_table, refcount_table_size,
973: l1_table_offset, l1_size2);
974:
975: /* Read L1 table entries from disk */
1.1.1.2 root 976: if (l1_size2 == 0) {
977: l1_table = NULL;
978: } else {
979: l1_table = qemu_malloc(l1_size2);
980: if (bdrv_pread(s->hd, l1_table_offset,
981: l1_table, l1_size2) != l1_size2)
982: goto fail;
983: for(i = 0;i < l1_size; i++)
984: be64_to_cpus(&l1_table[i]);
985: }
1.1 root 986:
987: /* Do the actual checks */
988: for(i = 0; i < l1_size; i++) {
989: l2_offset = l1_table[i];
990: if (l2_offset) {
991: /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
992: if (check_copied) {
993: refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
994: >> s->cluster_bits);
995: if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
996: fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
997: " refcount=%d\n", l2_offset, refcount);
998: errors++;
999: }
1000: }
1001:
1002: /* Mark L2 table as used */
1003: l2_offset &= ~QCOW_OFLAG_COPIED;
1004: errors += inc_refcounts(bs, refcount_table,
1005: refcount_table_size,
1006: l2_offset,
1007: s->cluster_size);
1008:
1009: /* L2 tables are cluster aligned */
1010: if (l2_offset & (s->cluster_size - 1)) {
1011: fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
1012: "cluster aligned; L1 entry corrupted\n", l2_offset);
1013: errors++;
1014: }
1015:
1016: /* Process and check L2 entries */
1017: ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
1018: l2_offset, check_copied);
1019: if (ret < 0) {
1020: goto fail;
1021: }
1022: errors += ret;
1023: }
1024: }
1025: qemu_free(l1_table);
1026: return errors;
1027:
1028: fail:
1029: fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
1030: qemu_free(l1_table);
1031: return -EIO;
1032: }
1033:
1034: /*
1035: * Checks an image for refcount consistency.
1036: *
1037: * Returns 0 if no errors are found, the number of errors in case the image is
1038: * detected as corrupted, and -errno when an internal error occured.
1039: */
1040: int qcow2_check_refcounts(BlockDriverState *bs)
1041: {
1042: BDRVQcowState *s = bs->opaque;
1043: int64_t size;
1044: int nb_clusters, refcount1, refcount2, i;
1045: QCowSnapshot *sn;
1046: uint16_t *refcount_table;
1047: int ret, errors = 0;
1048:
1049: size = bdrv_getlength(s->hd);
1050: nb_clusters = size_to_clusters(s, size);
1051: refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
1052:
1053: /* header */
1054: errors += inc_refcounts(bs, refcount_table, nb_clusters,
1055: 0, s->cluster_size);
1056:
1057: /* current L1 table */
1058: ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
1059: s->l1_table_offset, s->l1_size, 1);
1060: if (ret < 0) {
1061: return ret;
1062: }
1063: errors += ret;
1064:
1065: /* snapshots */
1066: for(i = 0; i < s->nb_snapshots; i++) {
1067: sn = s->snapshots + i;
1068: check_refcounts_l1(bs, refcount_table, nb_clusters,
1069: sn->l1_table_offset, sn->l1_size, 0);
1070: }
1071: errors += inc_refcounts(bs, refcount_table, nb_clusters,
1072: s->snapshots_offset, s->snapshots_size);
1073:
1074: /* refcount data */
1075: errors += inc_refcounts(bs, refcount_table, nb_clusters,
1076: s->refcount_table_offset,
1077: s->refcount_table_size * sizeof(uint64_t));
1078: for(i = 0; i < s->refcount_table_size; i++) {
1079: int64_t offset;
1080: offset = s->refcount_table[i];
1081: if (offset != 0) {
1082: errors += inc_refcounts(bs, refcount_table, nb_clusters,
1083: offset, s->cluster_size);
1084: }
1085: }
1086:
1087: /* compare ref counts */
1088: for(i = 0; i < nb_clusters; i++) {
1089: refcount1 = get_refcount(bs, i);
1090: refcount2 = refcount_table[i];
1091: if (refcount1 != refcount2) {
1092: fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
1093: i, refcount1, refcount2);
1094: errors++;
1095: }
1096: }
1097:
1098: qemu_free(refcount_table);
1099:
1100: return errors;
1101: }
1102:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.