Annotation of qemu/block/qcow2-refcount.c, revision 1.1.1.4

1.1       root        1: /*
                      2:  * Block driver for the QCOW version 2 format
                      3:  *
                      4:  * Copyright (c) 2004-2006 Fabrice Bellard
                      5:  *
                      6:  * Permission is hereby granted, free of charge, to any person obtaining a copy
                      7:  * of this software and associated documentation files (the "Software"), to deal
                      8:  * in the Software without restriction, including without limitation the rights
                      9:  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
                     10:  * copies of the Software, and to permit persons to whom the Software is
                     11:  * furnished to do so, subject to the following conditions:
                     12:  *
                     13:  * The above copyright notice and this permission notice shall be included in
                     14:  * all copies or substantial portions of the Software.
                     15:  *
                     16:  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
                     17:  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
                     18:  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
                     19:  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
                     20:  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
                     21:  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
                     22:  * THE SOFTWARE.
                     23:  */
                     24: 
                     25: #include "qemu-common.h"
                     26: #include "block_int.h"
                     27: #include "block/qcow2.h"
                     28: 
                     29: static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
1.1.1.4 ! root       30: static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
1.1       root       31:                             int64_t offset, int64_t length,
                     32:                             int addend);
                     33: 
                     34: 
                     35: static int cache_refcount_updates = 0;
                     36: 
                     37: static int write_refcount_block(BDRVQcowState *s)
                     38: {
                     39:     size_t size = s->cluster_size;
                     40: 
                     41:     if (s->refcount_block_cache_offset == 0) {
                     42:         return 0;
                     43:     }
                     44: 
                     45:     if (bdrv_pwrite(s->hd, s->refcount_block_cache_offset,
                     46:             s->refcount_block_cache, size) != size)
                     47:     {
                     48:         return -EIO;
                     49:     }
                     50: 
                     51:     return 0;
                     52: }
                     53: 
                     54: /*********************************************************/
                     55: /* refcount handling */
                     56: 
                     57: int qcow2_refcount_init(BlockDriverState *bs)
                     58: {
                     59:     BDRVQcowState *s = bs->opaque;
                     60:     int ret, refcount_table_size2, i;
                     61: 
                     62:     s->refcount_block_cache = qemu_malloc(s->cluster_size);
                     63:     refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
                     64:     s->refcount_table = qemu_malloc(refcount_table_size2);
                     65:     if (s->refcount_table_size > 0) {
                     66:         ret = bdrv_pread(s->hd, s->refcount_table_offset,
                     67:                          s->refcount_table, refcount_table_size2);
                     68:         if (ret != refcount_table_size2)
                     69:             goto fail;
                     70:         for(i = 0; i < s->refcount_table_size; i++)
                     71:             be64_to_cpus(&s->refcount_table[i]);
                     72:     }
                     73:     return 0;
                     74:  fail:
                     75:     return -ENOMEM;
                     76: }
                     77: 
                     78: void qcow2_refcount_close(BlockDriverState *bs)
                     79: {
                     80:     BDRVQcowState *s = bs->opaque;
                     81:     qemu_free(s->refcount_block_cache);
                     82:     qemu_free(s->refcount_table);
                     83: }
                     84: 
                     85: 
                     86: static int load_refcount_block(BlockDriverState *bs,
                     87:                                int64_t refcount_block_offset)
                     88: {
                     89:     BDRVQcowState *s = bs->opaque;
                     90:     int ret;
                     91: 
                     92:     if (cache_refcount_updates) {
                     93:         write_refcount_block(s);
                     94:     }
                     95: 
                     96:     ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
                     97:                      s->cluster_size);
                     98:     if (ret != s->cluster_size)
                     99:         return -EIO;
                    100:     s->refcount_block_cache_offset = refcount_block_offset;
                    101:     return 0;
                    102: }
                    103: 
                    104: static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
                    105: {
                    106:     BDRVQcowState *s = bs->opaque;
                    107:     int refcount_table_index, block_index;
                    108:     int64_t refcount_block_offset;
                    109: 
                    110:     refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
                    111:     if (refcount_table_index >= s->refcount_table_size)
                    112:         return 0;
                    113:     refcount_block_offset = s->refcount_table[refcount_table_index];
                    114:     if (!refcount_block_offset)
                    115:         return 0;
                    116:     if (refcount_block_offset != s->refcount_block_cache_offset) {
                    117:         /* better than nothing: return allocated if read error */
                    118:         if (load_refcount_block(bs, refcount_block_offset) < 0)
                    119:             return 1;
                    120:     }
                    121:     block_index = cluster_index &
                    122:         ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
                    123:     return be16_to_cpu(s->refcount_block_cache[block_index]);
                    124: }
                    125: 
1.1.1.4 ! root      126: /*
        !           127:  * Rounds the refcount table size up to avoid growing the table for each single
        !           128:  * refcount block that is allocated.
        !           129:  */
        !           130: static unsigned int next_refcount_table_size(BDRVQcowState *s,
        !           131:     unsigned int min_size)
        !           132: {
        !           133:     unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
        !           134:     unsigned int refcount_table_clusters =
        !           135:         MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
        !           136: 
        !           137:     while (min_clusters > refcount_table_clusters) {
        !           138:         refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
        !           139:     }
        !           140: 
        !           141:     return refcount_table_clusters << (s->cluster_bits - 3);
        !           142: }
        !           143: 
        !           144: 
        !           145: /* Checks if two offsets are described by the same refcount block */
        !           146: static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
        !           147:     uint64_t offset_b)
        !           148: {
        !           149:     uint64_t block_a = offset_a >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
        !           150:     uint64_t block_b = offset_b >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
        !           151: 
        !           152:     return (block_a == block_b);
        !           153: }
        !           154: 
        !           155: /*
        !           156:  * Loads a refcount block. If it doesn't exist yet, it is allocated first
        !           157:  * (including growing the refcount table if needed).
        !           158:  *
        !           159:  * Returns the offset of the refcount block on success or -errno in error case
        !           160:  */
        !           161: static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
1.1       root      162: {
                    163:     BDRVQcowState *s = bs->opaque;
1.1.1.4 ! root      164:     unsigned int refcount_table_index;
        !           165:     int ret;
1.1       root      166: 
1.1.1.4 ! root      167:     /* Find the refcount block for the given cluster */
        !           168:     refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
        !           169: 
        !           170:     if (refcount_table_index < s->refcount_table_size) {
        !           171: 
        !           172:         uint64_t refcount_block_offset =
        !           173:             s->refcount_table[refcount_table_index];
        !           174: 
        !           175:         /* If it's already there, we're done */
        !           176:         if (refcount_block_offset) {
        !           177:             if (refcount_block_offset != s->refcount_block_cache_offset) {
        !           178:                 ret = load_refcount_block(bs, refcount_block_offset);
        !           179:                 if (ret < 0) {
        !           180:                     return ret;
        !           181:                 }
        !           182:             }
        !           183:             return refcount_block_offset;
        !           184:         }
        !           185:     }
        !           186: 
        !           187:     /*
        !           188:      * If we came here, we need to allocate something. Something is at least
        !           189:      * a cluster for the new refcount block. It may also include a new refcount
        !           190:      * table if the old refcount table is too small.
        !           191:      *
        !           192:      * Note that allocating clusters here needs some special care:
        !           193:      *
        !           194:      * - We can't use the normal qcow2_alloc_clusters(), it would try to
        !           195:      *   increase the refcount and very likely we would end up with an endless
        !           196:      *   recursion. Instead we must place the refcount blocks in a way that
        !           197:      *   they can describe them themselves.
        !           198:      *
        !           199:      * - We need to consider that at this point we are inside update_refcounts
        !           200:      *   and doing the initial refcount increase. This means that some clusters
        !           201:      *   have already been allocated by the caller, but their refcount isn't
        !           202:      *   accurate yet. free_cluster_index tells us where this allocation ends
        !           203:      *   as long as we don't overwrite it by freeing clusters.
        !           204:      *
        !           205:      * - alloc_clusters_noref and qcow2_free_clusters may load a different
        !           206:      *   refcount block into the cache
        !           207:      */
        !           208: 
        !           209:     if (cache_refcount_updates) {
        !           210:         ret = write_refcount_block(s);
        !           211:         if (ret < 0) {
        !           212:             return ret;
        !           213:         }
        !           214:     }
        !           215: 
        !           216:     /* Allocate the refcount block itself and mark it as used */
        !           217:     uint64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
        !           218:     memset(s->refcount_block_cache, 0, s->cluster_size);
        !           219:     s->refcount_block_cache_offset = new_block;
        !           220: 
        !           221: #ifdef DEBUG_ALLOC2
        !           222:     fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
        !           223:         " at %" PRIx64 "\n",
        !           224:         refcount_table_index, cluster_index << s->cluster_bits, new_block);
        !           225: #endif
        !           226: 
        !           227:     if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
        !           228:         /* The block describes itself, need to update the cache */
        !           229:         int block_index = (new_block >> s->cluster_bits) &
        !           230:             ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
        !           231:         s->refcount_block_cache[block_index] = cpu_to_be16(1);
        !           232:     } else {
        !           233:         /* Described somewhere else. This can recurse at most twice before we
        !           234:          * arrive at a block that describes itself. */
        !           235:         ret = update_refcount(bs, new_block, s->cluster_size, 1);
        !           236:         if (ret < 0) {
        !           237:             goto fail_block;
        !           238:         }
        !           239:     }
        !           240: 
        !           241:     /* Now the new refcount block needs to be written to disk */
        !           242:     ret = bdrv_pwrite(s->hd, new_block, s->refcount_block_cache,
        !           243:         s->cluster_size);
        !           244:     if (ret < 0) {
        !           245:         goto fail_block;
        !           246:     }
        !           247: 
        !           248:     /* If the refcount table is big enough, just hook the block up there */
        !           249:     if (refcount_table_index < s->refcount_table_size) {
        !           250:         uint64_t data64 = cpu_to_be64(new_block);
        !           251:         ret = bdrv_pwrite(s->hd,
        !           252:             s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
        !           253:             &data64, sizeof(data64));
        !           254:         if (ret < 0) {
        !           255:             goto fail_block;
1.1       root      256:         }
1.1.1.4 ! root      257: 
        !           258:         s->refcount_table[refcount_table_index] = new_block;
        !           259:         return new_block;
1.1       root      260:     }
1.1.1.4 ! root      261: 
        !           262:     /*
        !           263:      * If we come here, we need to grow the refcount table. Again, a new
        !           264:      * refcount table needs some space and we can't simply allocate to avoid
        !           265:      * endless recursion.
        !           266:      *
        !           267:      * Therefore let's grab new refcount blocks at the end of the image, which
        !           268:      * will describe themselves and the new refcount table. This way we can
        !           269:      * reference them only in the new table and do the switch to the new
        !           270:      * refcount table at once without producing an inconsistent state in
        !           271:      * between.
        !           272:      */
        !           273:     /* Calculate the number of refcount blocks needed so far */
        !           274:     uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
        !           275:     uint64_t blocks_used = (s->free_cluster_index +
        !           276:         refcount_block_clusters - 1) / refcount_block_clusters;
        !           277: 
        !           278:     /* And now we need at least one block more for the new metadata */
        !           279:     uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
        !           280:     uint64_t last_table_size;
        !           281:     uint64_t blocks_clusters;
        !           282:     do {
        !           283:         uint64_t table_clusters = size_to_clusters(s, table_size);
        !           284:         blocks_clusters = 1 +
        !           285:             ((table_clusters + refcount_block_clusters - 1)
        !           286:             / refcount_block_clusters);
        !           287:         uint64_t meta_clusters = table_clusters + blocks_clusters;
        !           288: 
        !           289:         last_table_size = table_size;
        !           290:         table_size = next_refcount_table_size(s, blocks_used +
        !           291:             ((meta_clusters + refcount_block_clusters - 1)
        !           292:             / refcount_block_clusters));
        !           293: 
        !           294:     } while (last_table_size != table_size);
        !           295: 
1.1       root      296: #ifdef DEBUG_ALLOC2
1.1.1.4 ! root      297:     fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
        !           298:         s->refcount_table_size, table_size);
1.1       root      299: #endif
1.1.1.4 ! root      300: 
        !           301:     /* Create the new refcount table and blocks */
        !           302:     uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
        !           303:         s->cluster_size;
        !           304:     uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
        !           305:     uint16_t *new_blocks = qemu_mallocz(blocks_clusters * s->cluster_size);
        !           306:     uint64_t *new_table = qemu_mallocz(table_size * sizeof(uint64_t));
        !           307: 
        !           308:     assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
        !           309: 
        !           310:     /* Fill the new refcount table */
1.1       root      311:     memcpy(new_table, s->refcount_table,
1.1.1.4 ! root      312:         s->refcount_table_size * sizeof(uint64_t));
        !           313:     new_table[refcount_table_index] = new_block;
        !           314: 
        !           315:     int i;
        !           316:     for (i = 0; i < blocks_clusters; i++) {
        !           317:         new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
        !           318:     }
        !           319: 
        !           320:     /* Fill the refcount blocks */
        !           321:     uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
        !           322:     int block = 0;
        !           323:     for (i = 0; i < table_clusters + blocks_clusters; i++) {
        !           324:         new_blocks[block++] = cpu_to_be16(1);
        !           325:     }
        !           326: 
        !           327:     /* Write refcount blocks to disk */
        !           328:     ret = bdrv_pwrite(s->hd, meta_offset, new_blocks,
        !           329:         blocks_clusters * s->cluster_size);
        !           330:     qemu_free(new_blocks);
        !           331:     if (ret < 0) {
        !           332:         goto fail_table;
        !           333:     }
        !           334: 
        !           335:     /* Write refcount table to disk */
        !           336:     for(i = 0; i < table_size; i++) {
1.1       root      337:         cpu_to_be64s(&new_table[i]);
1.1.1.4 ! root      338:     }
        !           339: 
        !           340:     ret = bdrv_pwrite(s->hd, table_offset, new_table,
        !           341:         table_size * sizeof(uint64_t));
        !           342:     if (ret < 0) {
        !           343:         goto fail_table;
        !           344:     }
        !           345: 
        !           346:     for(i = 0; i < table_size; i++) {
        !           347:         cpu_to_be64s(&new_table[i]);
        !           348:     }
1.1       root      349: 
1.1.1.4 ! root      350:     /* Hook up the new refcount table in the qcow2 header */
        !           351:     uint8_t data[12];
1.1       root      352:     cpu_to_be64w((uint64_t*)data, table_offset);
1.1.1.4 ! root      353:     cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
1.1.1.3   root      354:     ret = bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
1.1.1.4 ! root      355:         data, sizeof(data));
        !           356:     if (ret < 0) {
        !           357:         goto fail_table;
1.1.1.3   root      358:     }
                    359: 
1.1.1.4 ! root      360:     /* And switch it in memory */
        !           361:     uint64_t old_table_offset = s->refcount_table_offset;
        !           362:     uint64_t old_table_size = s->refcount_table_size;
        !           363: 
1.1       root      364:     qemu_free(s->refcount_table);
                    365:     s->refcount_table = new_table;
1.1.1.4 ! root      366:     s->refcount_table_size = table_size;
1.1       root      367:     s->refcount_table_offset = table_offset;
                    368: 
1.1.1.4 ! root      369:     /* Free old table. Remember, we must not change free_cluster_index */
        !           370:     uint64_t old_free_cluster_index = s->free_cluster_index;
1.1       root      371:     qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
1.1.1.4 ! root      372:     s->free_cluster_index = old_free_cluster_index;
1.1       root      373: 
1.1.1.4 ! root      374:     ret = load_refcount_block(bs, new_block);
        !           375:     if (ret < 0) {
        !           376:         goto fail_block;
1.1       root      377:     }
                    378: 
1.1.1.4 ! root      379:     return new_block;
1.1       root      380: 
1.1.1.4 ! root      381: fail_table:
        !           382:     qemu_free(new_table);
        !           383: fail_block:
        !           384:     s->refcount_block_cache_offset = 0;
        !           385:     return ret;
1.1       root      386: }
                    387: 
                    388: #define REFCOUNTS_PER_SECTOR (512 >> REFCOUNT_SHIFT)
                    389: static int write_refcount_block_entries(BDRVQcowState *s,
                    390:     int64_t refcount_block_offset, int first_index, int last_index)
                    391: {
                    392:     size_t size;
                    393: 
                    394:     if (cache_refcount_updates) {
                    395:         return 0;
                    396:     }
                    397: 
                    398:     first_index &= ~(REFCOUNTS_PER_SECTOR - 1);
                    399:     last_index = (last_index + REFCOUNTS_PER_SECTOR)
                    400:         & ~(REFCOUNTS_PER_SECTOR - 1);
                    401: 
                    402:     size = (last_index - first_index) << REFCOUNT_SHIFT;
                    403:     if (bdrv_pwrite(s->hd,
                    404:         refcount_block_offset + (first_index << REFCOUNT_SHIFT),
                    405:         &s->refcount_block_cache[first_index], size) != size)
                    406:     {
                    407:         return -EIO;
                    408:     }
                    409: 
                    410:     return 0;
                    411: }
                    412: 
                    413: /* XXX: cache several refcount block clusters ? */
1.1.1.3   root      414: static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
                    415:     int64_t offset, int64_t length, int addend)
1.1       root      416: {
                    417:     BDRVQcowState *s = bs->opaque;
                    418:     int64_t start, last, cluster_offset;
                    419:     int64_t refcount_block_offset = 0;
                    420:     int64_t table_index = -1, old_table_index;
                    421:     int first_index = -1, last_index = -1;
1.1.1.3   root      422:     int ret;
1.1       root      423: 
                    424: #ifdef DEBUG_ALLOC2
                    425:     printf("update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
                    426:            offset, length, addend);
                    427: #endif
1.1.1.3   root      428:     if (length < 0) {
1.1       root      429:         return -EINVAL;
1.1.1.3   root      430:     } else if (length == 0) {
                    431:         return 0;
                    432:     }
                    433: 
1.1       root      434:     start = offset & ~(s->cluster_size - 1);
                    435:     last = (offset + length - 1) & ~(s->cluster_size - 1);
                    436:     for(cluster_offset = start; cluster_offset <= last;
                    437:         cluster_offset += s->cluster_size)
                    438:     {
                    439:         int block_index, refcount;
                    440:         int64_t cluster_index = cluster_offset >> s->cluster_bits;
1.1.1.3   root      441:         int64_t new_block;
1.1       root      442: 
                    443:         /* Only write refcount block to disk when we are done with it */
                    444:         old_table_index = table_index;
                    445:         table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
                    446:         if ((old_table_index >= 0) && (table_index != old_table_index)) {
                    447: 
                    448:             if (write_refcount_block_entries(s, refcount_block_offset,
                    449:                 first_index, last_index) < 0)
                    450:             {
                    451:                 return -EIO;
                    452:             }
                    453: 
                    454:             first_index = -1;
                    455:             last_index = -1;
                    456:         }
                    457: 
                    458:         /* Load the refcount block and allocate it if needed */
1.1.1.3   root      459:         new_block = alloc_refcount_block(bs, cluster_index);
                    460:         if (new_block < 0) {
                    461:             ret = new_block;
                    462:             goto fail;
1.1       root      463:         }
1.1.1.3   root      464:         refcount_block_offset = new_block;
1.1       root      465: 
                    466:         /* we can update the count and save it */
                    467:         block_index = cluster_index &
                    468:             ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
                    469:         if (first_index == -1 || block_index < first_index) {
                    470:             first_index = block_index;
                    471:         }
                    472:         if (block_index > last_index) {
                    473:             last_index = block_index;
                    474:         }
                    475: 
                    476:         refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
                    477:         refcount += addend;
1.1.1.3   root      478:         if (refcount < 0 || refcount > 0xffff) {
                    479:             ret = -EINVAL;
                    480:             goto fail;
                    481:         }
1.1       root      482:         if (refcount == 0 && cluster_index < s->free_cluster_index) {
                    483:             s->free_cluster_index = cluster_index;
                    484:         }
                    485:         s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
                    486:     }
                    487: 
1.1.1.3   root      488:     ret = 0;
                    489: fail:
                    490: 
1.1       root      491:     /* Write last changed block to disk */
                    492:     if (refcount_block_offset != 0) {
                    493:         if (write_refcount_block_entries(s, refcount_block_offset,
                    494:             first_index, last_index) < 0)
                    495:         {
1.1.1.3   root      496:             return ret < 0 ? ret : -EIO;
1.1       root      497:         }
                    498:     }
                    499: 
1.1.1.3   root      500:     /*
                    501:      * Try do undo any updates if an error is returned (This may succeed in
                    502:      * some cases like ENOSPC for allocating a new refcount block)
                    503:      */
                    504:     if (ret < 0) {
                    505:         int dummy;
                    506:         dummy = update_refcount(bs, offset, cluster_offset - offset, -addend);
                    507:     }
                    508: 
                    509:     return ret;
1.1       root      510: }
                    511: 
                    512: /* addend must be 1 or -1 */
                    513: static int update_cluster_refcount(BlockDriverState *bs,
                    514:                                    int64_t cluster_index,
                    515:                                    int addend)
                    516: {
                    517:     BDRVQcowState *s = bs->opaque;
                    518:     int ret;
                    519: 
                    520:     ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);
                    521:     if (ret < 0) {
                    522:         return ret;
                    523:     }
                    524: 
                    525:     return get_refcount(bs, cluster_index);
                    526: }
                    527: 
                    528: 
                    529: 
                    530: /*********************************************************/
                    531: /* cluster allocation functions */
                    532: 
                    533: 
                    534: 
                    535: /* return < 0 if error */
                    536: static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
                    537: {
                    538:     BDRVQcowState *s = bs->opaque;
                    539:     int i, nb_clusters;
                    540: 
                    541:     nb_clusters = size_to_clusters(s, size);
                    542: retry:
                    543:     for(i = 0; i < nb_clusters; i++) {
                    544:         int64_t i = s->free_cluster_index++;
                    545:         if (get_refcount(bs, i) != 0)
                    546:             goto retry;
                    547:     }
                    548: #ifdef DEBUG_ALLOC2
                    549:     printf("alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
                    550:             size,
                    551:             (s->free_cluster_index - nb_clusters) << s->cluster_bits);
                    552: #endif
                    553:     return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
                    554: }
                    555: 
                    556: int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
                    557: {
                    558:     int64_t offset;
1.1.1.3   root      559:     int ret;
1.1       root      560: 
                    561:     offset = alloc_clusters_noref(bs, size);
1.1.1.3   root      562:     ret = update_refcount(bs, offset, size, 1);
                    563:     if (ret < 0) {
                    564:         return ret;
                    565:     }
1.1       root      566:     return offset;
                    567: }
                    568: 
                    569: /* only used to allocate compressed sectors. We try to allocate
                    570:    contiguous sectors. size must be <= cluster_size */
                    571: int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
                    572: {
                    573:     BDRVQcowState *s = bs->opaque;
                    574:     int64_t offset, cluster_offset;
                    575:     int free_in_cluster;
                    576: 
                    577:     assert(size > 0 && size <= s->cluster_size);
                    578:     if (s->free_byte_offset == 0) {
                    579:         s->free_byte_offset = qcow2_alloc_clusters(bs, s->cluster_size);
1.1.1.3   root      580:         if (s->free_byte_offset < 0) {
                    581:             return s->free_byte_offset;
                    582:         }
1.1       root      583:     }
                    584:  redo:
                    585:     free_in_cluster = s->cluster_size -
                    586:         (s->free_byte_offset & (s->cluster_size - 1));
                    587:     if (size <= free_in_cluster) {
                    588:         /* enough space in current cluster */
                    589:         offset = s->free_byte_offset;
                    590:         s->free_byte_offset += size;
                    591:         free_in_cluster -= size;
                    592:         if (free_in_cluster == 0)
                    593:             s->free_byte_offset = 0;
                    594:         if ((offset & (s->cluster_size - 1)) != 0)
                    595:             update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
                    596:     } else {
                    597:         offset = qcow2_alloc_clusters(bs, s->cluster_size);
1.1.1.3   root      598:         if (offset < 0) {
                    599:             return offset;
                    600:         }
1.1       root      601:         cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
                    602:         if ((cluster_offset + s->cluster_size) == offset) {
                    603:             /* we are lucky: contiguous data */
                    604:             offset = s->free_byte_offset;
                    605:             update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
                    606:             s->free_byte_offset += size;
                    607:         } else {
                    608:             s->free_byte_offset = offset;
                    609:             goto redo;
                    610:         }
                    611:     }
                    612:     return offset;
                    613: }
                    614: 
                    615: void qcow2_free_clusters(BlockDriverState *bs,
                    616:                           int64_t offset, int64_t size)
                    617: {
1.1.1.3   root      618:     int ret;
                    619: 
                    620:     ret = update_refcount(bs, offset, size, -1);
                    621:     if (ret < 0) {
                    622:         fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
                    623:         abort();
                    624:     }
1.1       root      625: }
                    626: 
                    627: /*
                    628:  * free_any_clusters
                    629:  *
                    630:  * free clusters according to its type: compressed or not
                    631:  *
                    632:  */
                    633: 
                    634: void qcow2_free_any_clusters(BlockDriverState *bs,
                    635:     uint64_t cluster_offset, int nb_clusters)
                    636: {
                    637:     BDRVQcowState *s = bs->opaque;
                    638: 
                    639:     /* free the cluster */
                    640: 
                    641:     if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
                    642:         int nb_csectors;
                    643:         nb_csectors = ((cluster_offset >> s->csize_shift) &
                    644:                        s->csize_mask) + 1;
                    645:         qcow2_free_clusters(bs,
                    646:             (cluster_offset & s->cluster_offset_mask) & ~511,
                    647:             nb_csectors * 512);
                    648:         return;
                    649:     }
                    650: 
                    651:     qcow2_free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits);
                    652: 
                    653:     return;
                    654: }
                    655: 
                    656: 
                    657: 
                    658: /*********************************************************/
                    659: /* snapshots and image creation */
                    660: 
                    661: 
                    662: 
                    663: void qcow2_create_refcount_update(QCowCreateState *s, int64_t offset,
                    664:     int64_t size)
                    665: {
                    666:     int refcount;
                    667:     int64_t start, last, cluster_offset;
                    668:     uint16_t *p;
                    669: 
                    670:     start = offset & ~(s->cluster_size - 1);
                    671:     last = (offset + size - 1)  & ~(s->cluster_size - 1);
                    672:     for(cluster_offset = start; cluster_offset <= last;
                    673:         cluster_offset += s->cluster_size) {
                    674:         p = &s->refcount_block[cluster_offset >> s->cluster_bits];
                    675:         refcount = be16_to_cpu(*p);
                    676:         refcount++;
                    677:         *p = cpu_to_be16(refcount);
                    678:     }
                    679: }
                    680: 
                    681: /* update the refcounts of snapshots and the copied flag */
                    682: int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                    683:     int64_t l1_table_offset, int l1_size, int addend)
                    684: {
                    685:     BDRVQcowState *s = bs->opaque;
                    686:     uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
                    687:     int64_t old_offset, old_l2_offset;
                    688:     int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
                    689: 
                    690:     qcow2_l2_cache_reset(bs);
                    691:     cache_refcount_updates = 1;
                    692: 
                    693:     l2_table = NULL;
                    694:     l1_table = NULL;
                    695:     l1_size2 = l1_size * sizeof(uint64_t);
                    696:     l1_allocated = 0;
                    697:     if (l1_table_offset != s->l1_table_offset) {
1.1.1.2   root      698:         if (l1_size2 != 0) {
                    699:             l1_table = qemu_mallocz(align_offset(l1_size2, 512));
                    700:         } else {
                    701:             l1_table = NULL;
                    702:         }
1.1       root      703:         l1_allocated = 1;
                    704:         if (bdrv_pread(s->hd, l1_table_offset,
                    705:                        l1_table, l1_size2) != l1_size2)
                    706:             goto fail;
                    707:         for(i = 0;i < l1_size; i++)
                    708:             be64_to_cpus(&l1_table[i]);
                    709:     } else {
                    710:         assert(l1_size == s->l1_size);
                    711:         l1_table = s->l1_table;
                    712:         l1_allocated = 0;
                    713:     }
                    714: 
                    715:     l2_size = s->l2_size * sizeof(uint64_t);
                    716:     l2_table = qemu_malloc(l2_size);
                    717:     l1_modified = 0;
                    718:     for(i = 0; i < l1_size; i++) {
                    719:         l2_offset = l1_table[i];
                    720:         if (l2_offset) {
                    721:             old_l2_offset = l2_offset;
                    722:             l2_offset &= ~QCOW_OFLAG_COPIED;
                    723:             l2_modified = 0;
                    724:             if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
                    725:                 goto fail;
                    726:             for(j = 0; j < s->l2_size; j++) {
                    727:                 offset = be64_to_cpu(l2_table[j]);
                    728:                 if (offset != 0) {
                    729:                     old_offset = offset;
                    730:                     offset &= ~QCOW_OFLAG_COPIED;
                    731:                     if (offset & QCOW_OFLAG_COMPRESSED) {
                    732:                         nb_csectors = ((offset >> s->csize_shift) &
                    733:                                        s->csize_mask) + 1;
1.1.1.3   root      734:                         if (addend != 0) {
                    735:                             int ret;
                    736:                             ret = update_refcount(bs,
                    737:                                 (offset & s->cluster_offset_mask) & ~511,
                    738:                                 nb_csectors * 512, addend);
                    739:                             if (ret < 0) {
                    740:                                 goto fail;
                    741:                             }
                    742:                         }
1.1       root      743:                         /* compressed clusters are never modified */
                    744:                         refcount = 2;
                    745:                     } else {
                    746:                         if (addend != 0) {
                    747:                             refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
                    748:                         } else {
                    749:                             refcount = get_refcount(bs, offset >> s->cluster_bits);
                    750:                         }
                    751:                     }
                    752: 
                    753:                     if (refcount == 1) {
                    754:                         offset |= QCOW_OFLAG_COPIED;
                    755:                     }
                    756:                     if (offset != old_offset) {
                    757:                         l2_table[j] = cpu_to_be64(offset);
                    758:                         l2_modified = 1;
                    759:                     }
                    760:                 }
                    761:             }
                    762:             if (l2_modified) {
                    763:                 if (bdrv_pwrite(s->hd,
                    764:                                 l2_offset, l2_table, l2_size) != l2_size)
                    765:                     goto fail;
                    766:             }
                    767: 
                    768:             if (addend != 0) {
                    769:                 refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
                    770:             } else {
                    771:                 refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
                    772:             }
                    773:             if (refcount == 1) {
                    774:                 l2_offset |= QCOW_OFLAG_COPIED;
                    775:             }
                    776:             if (l2_offset != old_l2_offset) {
                    777:                 l1_table[i] = l2_offset;
                    778:                 l1_modified = 1;
                    779:             }
                    780:         }
                    781:     }
                    782:     if (l1_modified) {
                    783:         for(i = 0; i < l1_size; i++)
                    784:             cpu_to_be64s(&l1_table[i]);
                    785:         if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,
                    786:                         l1_size2) != l1_size2)
                    787:             goto fail;
                    788:         for(i = 0; i < l1_size; i++)
                    789:             be64_to_cpus(&l1_table[i]);
                    790:     }
                    791:     if (l1_allocated)
                    792:         qemu_free(l1_table);
                    793:     qemu_free(l2_table);
                    794:     cache_refcount_updates = 0;
                    795:     write_refcount_block(s);
                    796:     return 0;
                    797:  fail:
                    798:     if (l1_allocated)
                    799:         qemu_free(l1_table);
                    800:     qemu_free(l2_table);
                    801:     cache_refcount_updates = 0;
                    802:     write_refcount_block(s);
                    803:     return -EIO;
                    804: }
                    805: 
                    806: 
                    807: 
                    808: 
                    809: /*********************************************************/
                    810: /* refcount checking functions */
                    811: 
                    812: 
                    813: 
                    814: /*
                    815:  * Increases the refcount for a range of clusters in a given refcount table.
                    816:  * This is used to construct a temporary refcount table out of L1 and L2 tables
                    817:  * which can be compared the the refcount table saved in the image.
                    818:  *
                    819:  * Returns the number of errors in the image that were found
                    820:  */
                    821: static int inc_refcounts(BlockDriverState *bs,
                    822:                           uint16_t *refcount_table,
                    823:                           int refcount_table_size,
                    824:                           int64_t offset, int64_t size)
                    825: {
                    826:     BDRVQcowState *s = bs->opaque;
                    827:     int64_t start, last, cluster_offset;
                    828:     int k;
                    829:     int errors = 0;
                    830: 
                    831:     if (size <= 0)
                    832:         return 0;
                    833: 
                    834:     start = offset & ~(s->cluster_size - 1);
                    835:     last = (offset + size - 1) & ~(s->cluster_size - 1);
                    836:     for(cluster_offset = start; cluster_offset <= last;
                    837:         cluster_offset += s->cluster_size) {
                    838:         k = cluster_offset >> s->cluster_bits;
                    839:         if (k < 0 || k >= refcount_table_size) {
                    840:             fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
                    841:                 cluster_offset);
                    842:             errors++;
                    843:         } else {
                    844:             if (++refcount_table[k] == 0) {
                    845:                 fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
                    846:                     "\n", cluster_offset);
                    847:                 errors++;
                    848:             }
                    849:         }
                    850:     }
                    851: 
                    852:     return errors;
                    853: }
                    854: 
                    855: /*
                    856:  * Increases the refcount in the given refcount table for the all clusters
                    857:  * referenced in the L2 table. While doing so, performs some checks on L2
                    858:  * entries.
                    859:  *
                    860:  * Returns the number of errors found by the checks or -errno if an internal
                    861:  * error occurred.
                    862:  */
                    863: static int check_refcounts_l2(BlockDriverState *bs,
                    864:     uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
                    865:     int check_copied)
                    866: {
                    867:     BDRVQcowState *s = bs->opaque;
                    868:     uint64_t *l2_table, offset;
                    869:     int i, l2_size, nb_csectors, refcount;
                    870:     int errors = 0;
                    871: 
                    872:     /* Read L2 table from disk */
                    873:     l2_size = s->l2_size * sizeof(uint64_t);
                    874:     l2_table = qemu_malloc(l2_size);
                    875: 
                    876:     if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
                    877:         goto fail;
                    878: 
                    879:     /* Do the actual checks */
                    880:     for(i = 0; i < s->l2_size; i++) {
                    881:         offset = be64_to_cpu(l2_table[i]);
                    882:         if (offset != 0) {
                    883:             if (offset & QCOW_OFLAG_COMPRESSED) {
                    884:                 /* Compressed clusters don't have QCOW_OFLAG_COPIED */
                    885:                 if (offset & QCOW_OFLAG_COPIED) {
                    886:                     fprintf(stderr, "ERROR: cluster %" PRId64 ": "
                    887:                         "copied flag must never be set for compressed "
                    888:                         "clusters\n", offset >> s->cluster_bits);
                    889:                     offset &= ~QCOW_OFLAG_COPIED;
                    890:                     errors++;
                    891:                 }
                    892: 
                    893:                 /* Mark cluster as used */
                    894:                 nb_csectors = ((offset >> s->csize_shift) &
                    895:                                s->csize_mask) + 1;
                    896:                 offset &= s->cluster_offset_mask;
                    897:                 errors += inc_refcounts(bs, refcount_table,
                    898:                               refcount_table_size,
                    899:                               offset & ~511, nb_csectors * 512);
                    900:             } else {
                    901:                 /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
                    902:                 if (check_copied) {
                    903:                     uint64_t entry = offset;
                    904:                     offset &= ~QCOW_OFLAG_COPIED;
                    905:                     refcount = get_refcount(bs, offset >> s->cluster_bits);
                    906:                     if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
                    907:                         fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
                    908:                             PRIx64 " refcount=%d\n", entry, refcount);
                    909:                         errors++;
                    910:                     }
                    911:                 }
                    912: 
                    913:                 /* Mark cluster as used */
                    914:                 offset &= ~QCOW_OFLAG_COPIED;
                    915:                 errors += inc_refcounts(bs, refcount_table,
                    916:                               refcount_table_size,
                    917:                               offset, s->cluster_size);
                    918: 
                    919:                 /* Correct offsets are cluster aligned */
                    920:                 if (offset & (s->cluster_size - 1)) {
                    921:                     fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
                    922:                         "properly aligned; L2 entry corrupted.\n", offset);
                    923:                     errors++;
                    924:                 }
                    925:             }
                    926:         }
                    927:     }
                    928: 
                    929:     qemu_free(l2_table);
                    930:     return errors;
                    931: 
                    932: fail:
                    933:     fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
                    934:     qemu_free(l2_table);
                    935:     return -EIO;
                    936: }
                    937: 
                    938: /*
                    939:  * Increases the refcount for the L1 table, its L2 tables and all referenced
                    940:  * clusters in the given refcount table. While doing so, performs some checks
                    941:  * on L1 and L2 entries.
                    942:  *
                    943:  * Returns the number of errors found by the checks or -errno if an internal
                    944:  * error occurred.
                    945:  */
                    946: static int check_refcounts_l1(BlockDriverState *bs,
                    947:                               uint16_t *refcount_table,
                    948:                               int refcount_table_size,
                    949:                               int64_t l1_table_offset, int l1_size,
                    950:                               int check_copied)
                    951: {
                    952:     BDRVQcowState *s = bs->opaque;
                    953:     uint64_t *l1_table, l2_offset, l1_size2;
                    954:     int i, refcount, ret;
                    955:     int errors = 0;
                    956: 
                    957:     l1_size2 = l1_size * sizeof(uint64_t);
                    958: 
                    959:     /* Mark L1 table as used */
                    960:     errors += inc_refcounts(bs, refcount_table, refcount_table_size,
                    961:                   l1_table_offset, l1_size2);
                    962: 
                    963:     /* Read L1 table entries from disk */
1.1.1.2   root      964:     if (l1_size2 == 0) {
                    965:         l1_table = NULL;
                    966:     } else {
                    967:         l1_table = qemu_malloc(l1_size2);
                    968:         if (bdrv_pread(s->hd, l1_table_offset,
                    969:                        l1_table, l1_size2) != l1_size2)
                    970:             goto fail;
                    971:         for(i = 0;i < l1_size; i++)
                    972:             be64_to_cpus(&l1_table[i]);
                    973:     }
1.1       root      974: 
                    975:     /* Do the actual checks */
                    976:     for(i = 0; i < l1_size; i++) {
                    977:         l2_offset = l1_table[i];
                    978:         if (l2_offset) {
                    979:             /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
                    980:             if (check_copied) {
                    981:                 refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
                    982:                     >> s->cluster_bits);
                    983:                 if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
                    984:                     fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
                    985:                         " refcount=%d\n", l2_offset, refcount);
                    986:                     errors++;
                    987:                 }
                    988:             }
                    989: 
                    990:             /* Mark L2 table as used */
                    991:             l2_offset &= ~QCOW_OFLAG_COPIED;
                    992:             errors += inc_refcounts(bs, refcount_table,
                    993:                           refcount_table_size,
                    994:                           l2_offset,
                    995:                           s->cluster_size);
                    996: 
                    997:             /* L2 tables are cluster aligned */
                    998:             if (l2_offset & (s->cluster_size - 1)) {
                    999:                 fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
                   1000:                     "cluster aligned; L1 entry corrupted\n", l2_offset);
                   1001:                 errors++;
                   1002:             }
                   1003: 
                   1004:             /* Process and check L2 entries */
                   1005:             ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
                   1006:                 l2_offset, check_copied);
                   1007:             if (ret < 0) {
                   1008:                 goto fail;
                   1009:             }
                   1010:             errors += ret;
                   1011:         }
                   1012:     }
                   1013:     qemu_free(l1_table);
                   1014:     return errors;
                   1015: 
                   1016: fail:
                   1017:     fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
                   1018:     qemu_free(l1_table);
                   1019:     return -EIO;
                   1020: }
                   1021: 
                   1022: /*
                   1023:  * Checks an image for refcount consistency.
                   1024:  *
                   1025:  * Returns 0 if no errors are found, the number of errors in case the image is
                   1026:  * detected as corrupted, and -errno when an internal error occured.
                   1027:  */
                   1028: int qcow2_check_refcounts(BlockDriverState *bs)
                   1029: {
                   1030:     BDRVQcowState *s = bs->opaque;
                   1031:     int64_t size;
                   1032:     int nb_clusters, refcount1, refcount2, i;
                   1033:     QCowSnapshot *sn;
                   1034:     uint16_t *refcount_table;
                   1035:     int ret, errors = 0;
                   1036: 
                   1037:     size = bdrv_getlength(s->hd);
                   1038:     nb_clusters = size_to_clusters(s, size);
                   1039:     refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
                   1040: 
                   1041:     /* header */
                   1042:     errors += inc_refcounts(bs, refcount_table, nb_clusters,
                   1043:                   0, s->cluster_size);
                   1044: 
                   1045:     /* current L1 table */
                   1046:     ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
                   1047:                        s->l1_table_offset, s->l1_size, 1);
                   1048:     if (ret < 0) {
                   1049:         return ret;
                   1050:     }
                   1051:     errors += ret;
                   1052: 
                   1053:     /* snapshots */
                   1054:     for(i = 0; i < s->nb_snapshots; i++) {
                   1055:         sn = s->snapshots + i;
                   1056:         check_refcounts_l1(bs, refcount_table, nb_clusters,
                   1057:                            sn->l1_table_offset, sn->l1_size, 0);
                   1058:     }
                   1059:     errors += inc_refcounts(bs, refcount_table, nb_clusters,
                   1060:                   s->snapshots_offset, s->snapshots_size);
                   1061: 
                   1062:     /* refcount data */
                   1063:     errors += inc_refcounts(bs, refcount_table, nb_clusters,
                   1064:                   s->refcount_table_offset,
                   1065:                   s->refcount_table_size * sizeof(uint64_t));
                   1066:     for(i = 0; i < s->refcount_table_size; i++) {
                   1067:         int64_t offset;
                   1068:         offset = s->refcount_table[i];
                   1069:         if (offset != 0) {
                   1070:             errors += inc_refcounts(bs, refcount_table, nb_clusters,
                   1071:                           offset, s->cluster_size);
                   1072:         }
                   1073:     }
                   1074: 
                   1075:     /* compare ref counts */
                   1076:     for(i = 0; i < nb_clusters; i++) {
                   1077:         refcount1 = get_refcount(bs, i);
                   1078:         refcount2 = refcount_table[i];
                   1079:         if (refcount1 != refcount2) {
                   1080:             fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
                   1081:                    i, refcount1, refcount2);
                   1082:             errors++;
                   1083:         }
                   1084:     }
                   1085: 
                   1086:     qemu_free(refcount_table);
                   1087: 
                   1088:     return errors;
                   1089: }
                   1090: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.