Annotation of qemu/block/qcow2.c, revision 1.1.1.1

1.1       root        1: /*
                      2:  * Block driver for the QCOW version 2 format
                      3:  *
                      4:  * Copyright (c) 2004-2006 Fabrice Bellard
                      5:  *
                      6:  * Permission is hereby granted, free of charge, to any person obtaining a copy
                      7:  * of this software and associated documentation files (the "Software"), to deal
                      8:  * in the Software without restriction, including without limitation the rights
                      9:  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
                     10:  * copies of the Software, and to permit persons to whom the Software is
                     11:  * furnished to do so, subject to the following conditions:
                     12:  *
                     13:  * The above copyright notice and this permission notice shall be included in
                     14:  * all copies or substantial portions of the Software.
                     15:  *
                     16:  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
                     17:  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
                     18:  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
                     19:  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
                     20:  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
                     21:  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
                     22:  * THE SOFTWARE.
                     23:  */
                     24: #include "qemu-common.h"
                     25: #include "block_int.h"
                     26: #include "module.h"
                     27: #include <zlib.h>
                     28: #include "aes.h"
                     29: #include "block/qcow2.h"
                     30: 
                     31: /*
                     32:   Differences with QCOW:
                     33: 
                     34:   - Support for multiple incremental snapshots.
                     35:   - Memory management by reference counts.
                     36:   - Clusters which have a reference count of one have the bit
                     37:     QCOW_OFLAG_COPIED to optimize write performance.
                     38:   - Size of compressed clusters is stored in sectors to reduce bit usage
                     39:     in the cluster offsets.
                     40:   - Support for storing additional data (such as the VM state) in the
                     41:     snapshots.
                     42:   - If a backing store is used, the cluster size is not constrained
                     43:     (could be backported to QCOW).
                     44:   - L2 tables have always a size of one cluster.
                     45: */
                     46: 
                     47: 
                     48: typedef struct {
                     49:     uint32_t magic;
                     50:     uint32_t len;
                     51: } QCowExtension;
                     52: #define  QCOW_EXT_MAGIC_END 0
                     53: #define  QCOW_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
                     54: 
                     55: 
                     56: 
                     57: static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
                     58: {
                     59:     const QCowHeader *cow_header = (const void *)buf;
                     60: 
                     61:     if (buf_size >= sizeof(QCowHeader) &&
                     62:         be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
                     63:         be32_to_cpu(cow_header->version) == QCOW_VERSION)
                     64:         return 100;
                     65:     else
                     66:         return 0;
                     67: }
                     68: 
                     69: 
                     70: /* 
                     71:  * read qcow2 extension and fill bs
                     72:  * start reading from start_offset
                     73:  * finish reading upon magic of value 0 or when end_offset reached
                     74:  * unknown magic is skipped (future extension this version knows nothing about)
                     75:  * return 0 upon success, non-0 otherwise
                     76:  */
                     77: static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                     78:                                 uint64_t end_offset)
                     79: {
                     80:     BDRVQcowState *s = bs->opaque;
                     81:     QCowExtension ext;
                     82:     uint64_t offset;
                     83: 
                     84: #ifdef DEBUG_EXT
                     85:     printf("qcow_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
                     86: #endif
                     87:     offset = start_offset;
                     88:     while (offset < end_offset) {
                     89: 
                     90: #ifdef DEBUG_EXT
                     91:         /* Sanity check */
                     92:         if (offset > s->cluster_size)
                     93:             printf("qcow_handle_extension: suspicious offset %lu\n", offset);
                     94: 
                     95:         printf("attemting to read extended header in offset %lu\n", offset);
                     96: #endif
                     97: 
                     98:         if (bdrv_pread(s->hd, offset, &ext, sizeof(ext)) != sizeof(ext)) {
                     99:             fprintf(stderr, "qcow_handle_extension: ERROR: pread fail from offset %llu\n",
                    100:                     (unsigned long long)offset);
                    101:             return 1;
                    102:         }
                    103:         be32_to_cpus(&ext.magic);
                    104:         be32_to_cpus(&ext.len);
                    105:         offset += sizeof(ext);
                    106: #ifdef DEBUG_EXT
                    107:         printf("ext.magic = 0x%x\n", ext.magic);
                    108: #endif
                    109:         switch (ext.magic) {
                    110:         case QCOW_EXT_MAGIC_END:
                    111:             return 0;
                    112: 
                    113:         case QCOW_EXT_MAGIC_BACKING_FORMAT:
                    114:             if (ext.len >= sizeof(bs->backing_format)) {
                    115:                 fprintf(stderr, "ERROR: ext_backing_format: len=%u too large"
                    116:                         " (>=%zu)\n",
                    117:                         ext.len, sizeof(bs->backing_format));
                    118:                 return 2;
                    119:             }
                    120:             if (bdrv_pread(s->hd, offset , bs->backing_format,
                    121:                            ext.len) != ext.len)
                    122:                 return 3;
                    123:             bs->backing_format[ext.len] = '\0';
                    124: #ifdef DEBUG_EXT
                    125:             printf("Qcow2: Got format extension %s\n", bs->backing_format);
                    126: #endif
                    127:             offset += ((ext.len + 7) & ~7);
                    128:             break;
                    129: 
                    130:         default:
                    131:             /* unknown magic -- just skip it */
                    132:             offset += ((ext.len + 7) & ~7);
                    133:             break;
                    134:         }
                    135:     }
                    136: 
                    137:     return 0;
                    138: }
                    139: 
                    140: 
                    141: static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
                    142: {
                    143:     BDRVQcowState *s = bs->opaque;
                    144:     int len, i, shift, ret;
                    145:     QCowHeader header;
                    146:     uint64_t ext_end;
                    147: 
                    148:     ret = bdrv_file_open(&s->hd, filename, flags);
                    149:     if (ret < 0)
                    150:         return ret;
                    151:     if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
                    152:         goto fail;
                    153:     be32_to_cpus(&header.magic);
                    154:     be32_to_cpus(&header.version);
                    155:     be64_to_cpus(&header.backing_file_offset);
                    156:     be32_to_cpus(&header.backing_file_size);
                    157:     be64_to_cpus(&header.size);
                    158:     be32_to_cpus(&header.cluster_bits);
                    159:     be32_to_cpus(&header.crypt_method);
                    160:     be64_to_cpus(&header.l1_table_offset);
                    161:     be32_to_cpus(&header.l1_size);
                    162:     be64_to_cpus(&header.refcount_table_offset);
                    163:     be32_to_cpus(&header.refcount_table_clusters);
                    164:     be64_to_cpus(&header.snapshots_offset);
                    165:     be32_to_cpus(&header.nb_snapshots);
                    166: 
                    167:     if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION)
                    168:         goto fail;
                    169:     if (header.size <= 1 ||
                    170:         header.cluster_bits < MIN_CLUSTER_BITS ||
                    171:         header.cluster_bits > MAX_CLUSTER_BITS)
                    172:         goto fail;
                    173:     if (header.crypt_method > QCOW_CRYPT_AES)
                    174:         goto fail;
                    175:     s->crypt_method_header = header.crypt_method;
                    176:     if (s->crypt_method_header)
                    177:         bs->encrypted = 1;
                    178:     s->cluster_bits = header.cluster_bits;
                    179:     s->cluster_size = 1 << s->cluster_bits;
                    180:     s->cluster_sectors = 1 << (s->cluster_bits - 9);
                    181:     s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
                    182:     s->l2_size = 1 << s->l2_bits;
                    183:     bs->total_sectors = header.size / 512;
                    184:     s->csize_shift = (62 - (s->cluster_bits - 8));
                    185:     s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
                    186:     s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
                    187:     s->refcount_table_offset = header.refcount_table_offset;
                    188:     s->refcount_table_size =
                    189:         header.refcount_table_clusters << (s->cluster_bits - 3);
                    190: 
                    191:     s->snapshots_offset = header.snapshots_offset;
                    192:     s->nb_snapshots = header.nb_snapshots;
                    193: 
                    194:     /* read the level 1 table */
                    195:     s->l1_size = header.l1_size;
                    196:     shift = s->cluster_bits + s->l2_bits;
                    197:     s->l1_vm_state_index = (header.size + (1LL << shift) - 1) >> shift;
                    198:     /* the L1 table must contain at least enough entries to put
                    199:        header.size bytes */
                    200:     if (s->l1_size < s->l1_vm_state_index)
                    201:         goto fail;
                    202:     s->l1_table_offset = header.l1_table_offset;
                    203:     s->l1_table = qemu_mallocz(
                    204:         align_offset(s->l1_size * sizeof(uint64_t), 512));
                    205:     if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
                    206:         s->l1_size * sizeof(uint64_t))
                    207:         goto fail;
                    208:     for(i = 0;i < s->l1_size; i++) {
                    209:         be64_to_cpus(&s->l1_table[i]);
                    210:     }
                    211:     /* alloc L2 cache */
                    212:     s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
                    213:     s->cluster_cache = qemu_malloc(s->cluster_size);
                    214:     /* one more sector for decompressed data alignment */
                    215:     s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
                    216:                                   + 512);
                    217:     s->cluster_cache_offset = -1;
                    218: 
                    219:     if (qcow2_refcount_init(bs) < 0)
                    220:         goto fail;
                    221: 
                    222:     LIST_INIT(&s->cluster_allocs);
                    223: 
                    224:     /* read qcow2 extensions */
                    225:     if (header.backing_file_offset)
                    226:         ext_end = header.backing_file_offset;
                    227:     else
                    228:         ext_end = s->cluster_size;
                    229:     if (qcow_read_extensions(bs, sizeof(header), ext_end))
                    230:         goto fail;
                    231: 
                    232:     /* read the backing file name */
                    233:     if (header.backing_file_offset != 0) {
                    234:         len = header.backing_file_size;
                    235:         if (len > 1023)
                    236:             len = 1023;
                    237:         if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len)
                    238:             goto fail;
                    239:         bs->backing_file[len] = '\0';
                    240:     }
                    241:     if (qcow2_read_snapshots(bs) < 0)
                    242:         goto fail;
                    243: 
                    244: #ifdef DEBUG_ALLOC
                    245:     qcow2_check_refcounts(bs);
                    246: #endif
                    247:     return 0;
                    248: 
                    249:  fail:
                    250:     qcow2_free_snapshots(bs);
                    251:     qcow2_refcount_close(bs);
                    252:     qemu_free(s->l1_table);
                    253:     qemu_free(s->l2_cache);
                    254:     qemu_free(s->cluster_cache);
                    255:     qemu_free(s->cluster_data);
                    256:     bdrv_delete(s->hd);
                    257:     return -1;
                    258: }
                    259: 
                    260: static int qcow_set_key(BlockDriverState *bs, const char *key)
                    261: {
                    262:     BDRVQcowState *s = bs->opaque;
                    263:     uint8_t keybuf[16];
                    264:     int len, i;
                    265: 
                    266:     memset(keybuf, 0, 16);
                    267:     len = strlen(key);
                    268:     if (len > 16)
                    269:         len = 16;
                    270:     /* XXX: we could compress the chars to 7 bits to increase
                    271:        entropy */
                    272:     for(i = 0;i < len;i++) {
                    273:         keybuf[i] = key[i];
                    274:     }
                    275:     s->crypt_method = s->crypt_method_header;
                    276: 
                    277:     if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
                    278:         return -1;
                    279:     if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
                    280:         return -1;
                    281: #if 0
                    282:     /* test */
                    283:     {
                    284:         uint8_t in[16];
                    285:         uint8_t out[16];
                    286:         uint8_t tmp[16];
                    287:         for(i=0;i<16;i++)
                    288:             in[i] = i;
                    289:         AES_encrypt(in, tmp, &s->aes_encrypt_key);
                    290:         AES_decrypt(tmp, out, &s->aes_decrypt_key);
                    291:         for(i = 0; i < 16; i++)
                    292:             printf(" %02x", tmp[i]);
                    293:         printf("\n");
                    294:         for(i = 0; i < 16; i++)
                    295:             printf(" %02x", out[i]);
                    296:         printf("\n");
                    297:     }
                    298: #endif
                    299:     return 0;
                    300: }
                    301: 
                    302: static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
                    303:                              int nb_sectors, int *pnum)
                    304: {
                    305:     uint64_t cluster_offset;
                    306: 
                    307:     *pnum = nb_sectors;
                    308:     cluster_offset = qcow2_get_cluster_offset(bs, sector_num << 9, pnum);
                    309: 
                    310:     return (cluster_offset != 0);
                    311: }
                    312: 
                    313: /* handle reading after the end of the backing file */
                    314: int qcow2_backing_read1(BlockDriverState *bs,
                    315:                   int64_t sector_num, uint8_t *buf, int nb_sectors)
                    316: {
                    317:     int n1;
                    318:     if ((sector_num + nb_sectors) <= bs->total_sectors)
                    319:         return nb_sectors;
                    320:     if (sector_num >= bs->total_sectors)
                    321:         n1 = 0;
                    322:     else
                    323:         n1 = bs->total_sectors - sector_num;
                    324:     memset(buf + n1 * 512, 0, 512 * (nb_sectors - n1));
                    325:     return n1;
                    326: }
                    327: 
                    328: typedef struct QCowAIOCB {
                    329:     BlockDriverAIOCB common;
                    330:     int64_t sector_num;
                    331:     QEMUIOVector *qiov;
                    332:     uint8_t *buf;
                    333:     void *orig_buf;
                    334:     int nb_sectors;
                    335:     int n;
                    336:     uint64_t cluster_offset;
                    337:     uint8_t *cluster_data;
                    338:     BlockDriverAIOCB *hd_aiocb;
                    339:     struct iovec hd_iov;
                    340:     QEMUIOVector hd_qiov;
                    341:     QEMUBH *bh;
                    342:     QCowL2Meta l2meta;
                    343:     LIST_ENTRY(QCowAIOCB) next_depend;
                    344: } QCowAIOCB;
                    345: 
                    346: static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
                    347: {
                    348:     QCowAIOCB *acb = (QCowAIOCB *)blockacb;
                    349:     if (acb->hd_aiocb)
                    350:         bdrv_aio_cancel(acb->hd_aiocb);
                    351:     qemu_aio_release(acb);
                    352: }
                    353: 
                    354: static AIOPool qcow_aio_pool = {
                    355:     .aiocb_size         = sizeof(QCowAIOCB),
                    356:     .cancel             = qcow_aio_cancel,
                    357: };
                    358: 
                    359: static void qcow_aio_read_cb(void *opaque, int ret);
                    360: static void qcow_aio_read_bh(void *opaque)
                    361: {
                    362:     QCowAIOCB *acb = opaque;
                    363:     qemu_bh_delete(acb->bh);
                    364:     acb->bh = NULL;
                    365:     qcow_aio_read_cb(opaque, 0);
                    366: }
                    367: 
                    368: static int qcow_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
                    369: {
                    370:     if (acb->bh)
                    371:         return -EIO;
                    372: 
                    373:     acb->bh = qemu_bh_new(cb, acb);
                    374:     if (!acb->bh)
                    375:         return -EIO;
                    376: 
                    377:     qemu_bh_schedule(acb->bh);
                    378: 
                    379:     return 0;
                    380: }
                    381: 
                    382: static void qcow_aio_read_cb(void *opaque, int ret)
                    383: {
                    384:     QCowAIOCB *acb = opaque;
                    385:     BlockDriverState *bs = acb->common.bs;
                    386:     BDRVQcowState *s = bs->opaque;
                    387:     int index_in_cluster, n1;
                    388: 
                    389:     acb->hd_aiocb = NULL;
                    390:     if (ret < 0)
                    391:         goto done;
                    392: 
                    393:     /* post process the read buffer */
                    394:     if (!acb->cluster_offset) {
                    395:         /* nothing to do */
                    396:     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
                    397:         /* nothing to do */
                    398:     } else {
                    399:         if (s->crypt_method) {
                    400:             qcow2_encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
                    401:                             acb->n, 0,
                    402:                             &s->aes_decrypt_key);
                    403:         }
                    404:     }
                    405: 
                    406:     acb->nb_sectors -= acb->n;
                    407:     acb->sector_num += acb->n;
                    408:     acb->buf += acb->n * 512;
                    409: 
                    410:     if (acb->nb_sectors == 0) {
                    411:         /* request completed */
                    412:         ret = 0;
                    413:         goto done;
                    414:     }
                    415: 
                    416:     /* prepare next AIO request */
                    417:     acb->n = acb->nb_sectors;
                    418:     acb->cluster_offset =
                    419:         qcow2_get_cluster_offset(bs, acb->sector_num << 9, &acb->n);
                    420:     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
                    421: 
                    422:     if (!acb->cluster_offset) {
                    423:         if (bs->backing_hd) {
                    424:             /* read from the base image */
                    425:             n1 = qcow2_backing_read1(bs->backing_hd, acb->sector_num,
                    426:                                acb->buf, acb->n);
                    427:             if (n1 > 0) {
                    428:                 acb->hd_iov.iov_base = (void *)acb->buf;
                    429:                 acb->hd_iov.iov_len = acb->n * 512;
                    430:                 qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
                    431:                 acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
                    432:                                     &acb->hd_qiov, acb->n,
                    433:                                    qcow_aio_read_cb, acb);
                    434:                 if (acb->hd_aiocb == NULL)
                    435:                     goto done;
                    436:             } else {
                    437:                 ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
                    438:                 if (ret < 0)
                    439:                     goto done;
                    440:             }
                    441:         } else {
                    442:             /* Note: in this case, no need to wait */
                    443:             memset(acb->buf, 0, 512 * acb->n);
                    444:             ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
                    445:             if (ret < 0)
                    446:                 goto done;
                    447:         }
                    448:     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
                    449:         /* add AIO support for compressed blocks ? */
                    450:         if (qcow2_decompress_cluster(s, acb->cluster_offset) < 0)
                    451:             goto done;
                    452:         memcpy(acb->buf,
                    453:                s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
                    454:         ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
                    455:         if (ret < 0)
                    456:             goto done;
                    457:     } else {
                    458:         if ((acb->cluster_offset & 511) != 0) {
                    459:             ret = -EIO;
                    460:             goto done;
                    461:         }
                    462: 
                    463:         acb->hd_iov.iov_base = (void *)acb->buf;
                    464:         acb->hd_iov.iov_len = acb->n * 512;
                    465:         qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
                    466:         acb->hd_aiocb = bdrv_aio_readv(s->hd,
                    467:                             (acb->cluster_offset >> 9) + index_in_cluster,
                    468:                             &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
                    469:         if (acb->hd_aiocb == NULL)
                    470:             goto done;
                    471:     }
                    472: 
                    473:     return;
                    474: done:
                    475:     if (acb->qiov->niov > 1) {
                    476:         qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
                    477:         qemu_vfree(acb->orig_buf);
                    478:     }
                    479:     acb->common.cb(acb->common.opaque, ret);
                    480:     qemu_aio_release(acb);
                    481: }
                    482: 
                    483: static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
                    484:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
                    485:         BlockDriverCompletionFunc *cb, void *opaque, int is_write)
                    486: {
                    487:     QCowAIOCB *acb;
                    488: 
                    489:     acb = qemu_aio_get(&qcow_aio_pool, bs, cb, opaque);
                    490:     if (!acb)
                    491:         return NULL;
                    492:     acb->hd_aiocb = NULL;
                    493:     acb->sector_num = sector_num;
                    494:     acb->qiov = qiov;
                    495:     if (qiov->niov > 1) {
                    496:         acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
                    497:         if (is_write)
                    498:             qemu_iovec_to_buffer(qiov, acb->buf);
                    499:     } else {
                    500:         acb->buf = (uint8_t *)qiov->iov->iov_base;
                    501:     }
                    502:     acb->nb_sectors = nb_sectors;
                    503:     acb->n = 0;
                    504:     acb->cluster_offset = 0;
                    505:     acb->l2meta.nb_clusters = 0;
                    506:     LIST_INIT(&acb->l2meta.dependent_requests);
                    507:     return acb;
                    508: }
                    509: 
                    510: static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
                    511:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
                    512:         BlockDriverCompletionFunc *cb, void *opaque)
                    513: {
                    514:     QCowAIOCB *acb;
                    515: 
                    516:     acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
                    517:     if (!acb)
                    518:         return NULL;
                    519: 
                    520:     qcow_aio_read_cb(acb, 0);
                    521:     return &acb->common;
                    522: }
                    523: 
                    524: static void qcow_aio_write_cb(void *opaque, int ret);
                    525: 
                    526: static void run_dependent_requests(QCowL2Meta *m)
                    527: {
                    528:     QCowAIOCB *req;
                    529:     QCowAIOCB *next;
                    530: 
                    531:     /* Take the request off the list of running requests */
                    532:     if (m->nb_clusters != 0) {
                    533:         LIST_REMOVE(m, next_in_flight);
                    534:     }
                    535: 
                    536:     /*
                    537:      * Restart all dependent requests.
                    538:      * Can't use LIST_FOREACH here - the next link might not be the same
                    539:      * any more after the callback  (request could depend on a different
                    540:      * request now)
                    541:      */
                    542:     for (req = m->dependent_requests.lh_first; req != NULL; req = next) {
                    543:         next = req->next_depend.le_next;
                    544:         qcow_aio_write_cb(req, 0);
                    545:     }
                    546: 
                    547:     /* Empty the list for the next part of the request */
                    548:     LIST_INIT(&m->dependent_requests);
                    549: }
                    550: 
                    551: static void qcow_aio_write_cb(void *opaque, int ret)
                    552: {
                    553:     QCowAIOCB *acb = opaque;
                    554:     BlockDriverState *bs = acb->common.bs;
                    555:     BDRVQcowState *s = bs->opaque;
                    556:     int index_in_cluster;
                    557:     const uint8_t *src_buf;
                    558:     int n_end;
                    559: 
                    560:     acb->hd_aiocb = NULL;
                    561: 
                    562:     if (ret >= 0) {
                    563:         ret = qcow2_alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta);
                    564:     }
                    565: 
                    566:     run_dependent_requests(&acb->l2meta);
                    567: 
                    568:     if (ret < 0)
                    569:         goto done;
                    570: 
                    571:     acb->nb_sectors -= acb->n;
                    572:     acb->sector_num += acb->n;
                    573:     acb->buf += acb->n * 512;
                    574: 
                    575:     if (acb->nb_sectors == 0) {
                    576:         /* request completed */
                    577:         ret = 0;
                    578:         goto done;
                    579:     }
                    580: 
                    581:     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
                    582:     n_end = index_in_cluster + acb->nb_sectors;
                    583:     if (s->crypt_method &&
                    584:         n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors)
                    585:         n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
                    586: 
                    587:     acb->cluster_offset = qcow2_alloc_cluster_offset(bs, acb->sector_num << 9,
                    588:                                           index_in_cluster,
                    589:                                           n_end, &acb->n, &acb->l2meta);
                    590: 
                    591:     /* Need to wait for another request? If so, we are done for now. */
                    592:     if (!acb->cluster_offset && acb->l2meta.depends_on != NULL) {
                    593:         LIST_INSERT_HEAD(&acb->l2meta.depends_on->dependent_requests,
                    594:             acb, next_depend);
                    595:         return;
                    596:     }
                    597: 
                    598:     if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) {
                    599:         ret = -EIO;
                    600:         goto done;
                    601:     }
                    602:     if (s->crypt_method) {
                    603:         if (!acb->cluster_data) {
                    604:             acb->cluster_data = qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS *
                    605:                                              s->cluster_size);
                    606:         }
                    607:         qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
                    608:                         acb->n, 1, &s->aes_encrypt_key);
                    609:         src_buf = acb->cluster_data;
                    610:     } else {
                    611:         src_buf = acb->buf;
                    612:     }
                    613:     acb->hd_iov.iov_base = (void *)src_buf;
                    614:     acb->hd_iov.iov_len = acb->n * 512;
                    615:     qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
                    616:     acb->hd_aiocb = bdrv_aio_writev(s->hd,
                    617:                                     (acb->cluster_offset >> 9) + index_in_cluster,
                    618:                                     &acb->hd_qiov, acb->n,
                    619:                                     qcow_aio_write_cb, acb);
                    620:     if (acb->hd_aiocb == NULL)
                    621:         goto done;
                    622: 
                    623:     return;
                    624: 
                    625: done:
                    626:     if (acb->qiov->niov > 1)
                    627:         qemu_vfree(acb->orig_buf);
                    628:     acb->common.cb(acb->common.opaque, ret);
                    629:     qemu_aio_release(acb);
                    630: }
                    631: 
                    632: static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
                    633:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
                    634:         BlockDriverCompletionFunc *cb, void *opaque)
                    635: {
                    636:     BDRVQcowState *s = bs->opaque;
                    637:     QCowAIOCB *acb;
                    638: 
                    639:     s->cluster_cache_offset = -1; /* disable compressed cache */
                    640: 
                    641:     acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
                    642:     if (!acb)
                    643:         return NULL;
                    644: 
                    645:     qcow_aio_write_cb(acb, 0);
                    646:     return &acb->common;
                    647: }
                    648: 
                    649: static void qcow_close(BlockDriverState *bs)
                    650: {
                    651:     BDRVQcowState *s = bs->opaque;
                    652:     qemu_free(s->l1_table);
                    653:     qemu_free(s->l2_cache);
                    654:     qemu_free(s->cluster_cache);
                    655:     qemu_free(s->cluster_data);
                    656:     qcow2_refcount_close(bs);
                    657:     bdrv_delete(s->hd);
                    658: }
                    659: 
                    660: static int get_bits_from_size(size_t size)
                    661: {
                    662:     int res = 0;
                    663: 
                    664:     if (size == 0) {
                    665:         return -1;
                    666:     }
                    667: 
                    668:     while (size != 1) {
                    669:         /* Not a power of two */
                    670:         if (size & 1) {
                    671:             return -1;
                    672:         }
                    673: 
                    674:         size >>= 1;
                    675:         res++;
                    676:     }
                    677: 
                    678:     return res;
                    679: }
                    680: 
                    681: static int qcow_create2(const char *filename, int64_t total_size,
                    682:                         const char *backing_file, const char *backing_format,
                    683:                         int flags, size_t cluster_size)
                    684: {
                    685: 
                    686:     int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits;
                    687:     int ref_clusters, backing_format_len = 0;
                    688:     QCowHeader header;
                    689:     uint64_t tmp, offset;
                    690:     QCowCreateState s1, *s = &s1;
                    691:     QCowExtension ext_bf = {0, 0};
                    692: 
                    693: 
                    694:     memset(s, 0, sizeof(*s));
                    695: 
                    696:     fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
                    697:     if (fd < 0)
                    698:         return -1;
                    699:     memset(&header, 0, sizeof(header));
                    700:     header.magic = cpu_to_be32(QCOW_MAGIC);
                    701:     header.version = cpu_to_be32(QCOW_VERSION);
                    702:     header.size = cpu_to_be64(total_size * 512);
                    703:     header_size = sizeof(header);
                    704:     backing_filename_len = 0;
                    705:     if (backing_file) {
                    706:         if (backing_format) {
                    707:             ext_bf.magic = QCOW_EXT_MAGIC_BACKING_FORMAT;
                    708:             backing_format_len = strlen(backing_format);
                    709:             ext_bf.len = (backing_format_len + 7) & ~7;
                    710:             header_size += ((sizeof(ext_bf) + ext_bf.len + 7) & ~7);
                    711:         }
                    712:         header.backing_file_offset = cpu_to_be64(header_size);
                    713:         backing_filename_len = strlen(backing_file);
                    714:         header.backing_file_size = cpu_to_be32(backing_filename_len);
                    715:         header_size += backing_filename_len;
                    716:     }
                    717: 
                    718:     /* Cluster size */
                    719:     s->cluster_bits = get_bits_from_size(cluster_size);
                    720:     if (s->cluster_bits < MIN_CLUSTER_BITS ||
                    721:         s->cluster_bits > MAX_CLUSTER_BITS)
                    722:     {
                    723:         fprintf(stderr, "Cluster size must be a power of two between "
                    724:             "%d and %dk\n",
                    725:             1 << MIN_CLUSTER_BITS,
                    726:             1 << (MAX_CLUSTER_BITS - 10));
                    727:         return -EINVAL;
                    728:     }
                    729:     s->cluster_size = 1 << s->cluster_bits;
                    730: 
                    731:     header.cluster_bits = cpu_to_be32(s->cluster_bits);
                    732:     header_size = (header_size + 7) & ~7;
                    733:     if (flags & BLOCK_FLAG_ENCRYPT) {
                    734:         header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
                    735:     } else {
                    736:         header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
                    737:     }
                    738:     l2_bits = s->cluster_bits - 3;
                    739:     shift = s->cluster_bits + l2_bits;
                    740:     l1_size = (((total_size * 512) + (1LL << shift) - 1) >> shift);
                    741:     offset = align_offset(header_size, s->cluster_size);
                    742:     s->l1_table_offset = offset;
                    743:     header.l1_table_offset = cpu_to_be64(s->l1_table_offset);
                    744:     header.l1_size = cpu_to_be32(l1_size);
                    745:     offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size);
                    746: 
                    747:     s->refcount_table = qemu_mallocz(s->cluster_size);
                    748: 
                    749:     s->refcount_table_offset = offset;
                    750:     header.refcount_table_offset = cpu_to_be64(offset);
                    751:     header.refcount_table_clusters = cpu_to_be32(1);
                    752:     offset += s->cluster_size;
                    753:     s->refcount_block_offset = offset;
                    754: 
                    755:     /* count how many refcount blocks needed */
                    756:     tmp = offset >> s->cluster_bits;
                    757:     ref_clusters = (tmp >> (s->cluster_bits - REFCOUNT_SHIFT)) + 1;
                    758:     for (i=0; i < ref_clusters; i++) {
                    759:         s->refcount_table[i] = cpu_to_be64(offset);
                    760:         offset += s->cluster_size;
                    761:     }
                    762: 
                    763:     s->refcount_block = qemu_mallocz(ref_clusters * s->cluster_size);
                    764: 
                    765:     /* update refcounts */
                    766:     qcow2_create_refcount_update(s, 0, header_size);
                    767:     qcow2_create_refcount_update(s, s->l1_table_offset,
                    768:         l1_size * sizeof(uint64_t));
                    769:     qcow2_create_refcount_update(s, s->refcount_table_offset, s->cluster_size);
                    770:     qcow2_create_refcount_update(s, s->refcount_block_offset,
                    771:         ref_clusters * s->cluster_size);
                    772: 
                    773:     /* write all the data */
                    774:     write(fd, &header, sizeof(header));
                    775:     if (backing_file) {
                    776:         if (backing_format_len) {
                    777:             char zero[16];
                    778:             int d = ext_bf.len - backing_format_len;
                    779: 
                    780:             memset(zero, 0, sizeof(zero));
                    781:             cpu_to_be32s(&ext_bf.magic);
                    782:             cpu_to_be32s(&ext_bf.len);
                    783:             write(fd, &ext_bf, sizeof(ext_bf));
                    784:             write(fd, backing_format, backing_format_len);
                    785:             if (d>0) {
                    786:                 write(fd, zero, d);
                    787:             }
                    788:         }
                    789:         write(fd, backing_file, backing_filename_len);
                    790:     }
                    791:     lseek(fd, s->l1_table_offset, SEEK_SET);
                    792:     tmp = 0;
                    793:     for(i = 0;i < l1_size; i++) {
                    794:         write(fd, &tmp, sizeof(tmp));
                    795:     }
                    796:     lseek(fd, s->refcount_table_offset, SEEK_SET);
                    797:     write(fd, s->refcount_table, s->cluster_size);
                    798: 
                    799:     lseek(fd, s->refcount_block_offset, SEEK_SET);
                    800:     write(fd, s->refcount_block, ref_clusters * s->cluster_size);
                    801: 
                    802:     qemu_free(s->refcount_table);
                    803:     qemu_free(s->refcount_block);
                    804:     close(fd);
                    805:     return 0;
                    806: }
                    807: 
                    808: static int qcow_create(const char *filename, QEMUOptionParameter *options)
                    809: {
                    810:     const char *backing_file = NULL;
                    811:     const char *backing_fmt = NULL;
                    812:     uint64_t sectors = 0;
                    813:     int flags = 0;
                    814:     size_t cluster_size = 65536;
                    815: 
                    816:     /* Read out options */
                    817:     while (options && options->name) {
                    818:         if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
                    819:             sectors = options->value.n / 512;
                    820:         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
                    821:             backing_file = options->value.s;
                    822:         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
                    823:             backing_fmt = options->value.s;
                    824:         } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
                    825:             flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
                    826:         } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
                    827:             if (options->value.n) {
                    828:                 cluster_size = options->value.n;
                    829:             }
                    830:         }
                    831:         options++;
                    832:     }
                    833: 
                    834:     return qcow_create2(filename, sectors, backing_file, backing_fmt, flags,
                    835:         cluster_size);
                    836: }
                    837: 
                    838: static int qcow_make_empty(BlockDriverState *bs)
                    839: {
                    840: #if 0
                    841:     /* XXX: not correct */
                    842:     BDRVQcowState *s = bs->opaque;
                    843:     uint32_t l1_length = s->l1_size * sizeof(uint64_t);
                    844:     int ret;
                    845: 
                    846:     memset(s->l1_table, 0, l1_length);
                    847:     if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0)
                    848:         return -1;
                    849:     ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length);
                    850:     if (ret < 0)
                    851:         return ret;
                    852: 
                    853:     l2_cache_reset(bs);
                    854: #endif
                    855:     return 0;
                    856: }
                    857: 
                    858: /* XXX: put compressed sectors first, then all the cluster aligned
                    859:    tables to avoid losing bytes in alignment */
                    860: static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
                    861:                                  const uint8_t *buf, int nb_sectors)
                    862: {
                    863:     BDRVQcowState *s = bs->opaque;
                    864:     z_stream strm;
                    865:     int ret, out_len;
                    866:     uint8_t *out_buf;
                    867:     uint64_t cluster_offset;
                    868: 
                    869:     if (nb_sectors == 0) {
                    870:         /* align end of file to a sector boundary to ease reading with
                    871:            sector based I/Os */
                    872:         cluster_offset = bdrv_getlength(s->hd);
                    873:         cluster_offset = (cluster_offset + 511) & ~511;
                    874:         bdrv_truncate(s->hd, cluster_offset);
                    875:         return 0;
                    876:     }
                    877: 
                    878:     if (nb_sectors != s->cluster_sectors)
                    879:         return -EINVAL;
                    880: 
                    881:     out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
                    882: 
                    883:     /* best compression, small window, no zlib header */
                    884:     memset(&strm, 0, sizeof(strm));
                    885:     ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
                    886:                        Z_DEFLATED, -12,
                    887:                        9, Z_DEFAULT_STRATEGY);
                    888:     if (ret != 0) {
                    889:         qemu_free(out_buf);
                    890:         return -1;
                    891:     }
                    892: 
                    893:     strm.avail_in = s->cluster_size;
                    894:     strm.next_in = (uint8_t *)buf;
                    895:     strm.avail_out = s->cluster_size;
                    896:     strm.next_out = out_buf;
                    897: 
                    898:     ret = deflate(&strm, Z_FINISH);
                    899:     if (ret != Z_STREAM_END && ret != Z_OK) {
                    900:         qemu_free(out_buf);
                    901:         deflateEnd(&strm);
                    902:         return -1;
                    903:     }
                    904:     out_len = strm.next_out - out_buf;
                    905: 
                    906:     deflateEnd(&strm);
                    907: 
                    908:     if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
                    909:         /* could not compress: write normal cluster */
                    910:         bdrv_write(bs, sector_num, buf, s->cluster_sectors);
                    911:     } else {
                    912:         cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
                    913:             sector_num << 9, out_len);
                    914:         if (!cluster_offset)
                    915:             return -1;
                    916:         cluster_offset &= s->cluster_offset_mask;
                    917:         if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) {
                    918:             qemu_free(out_buf);
                    919:             return -1;
                    920:         }
                    921:     }
                    922: 
                    923:     qemu_free(out_buf);
                    924:     return 0;
                    925: }
                    926: 
                    927: static void qcow_flush(BlockDriverState *bs)
                    928: {
                    929:     BDRVQcowState *s = bs->opaque;
                    930:     bdrv_flush(s->hd);
                    931: }
                    932: 
                    933: static int64_t qcow_vm_state_offset(BDRVQcowState *s)
                    934: {
                    935:        return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
                    936: }
                    937: 
                    938: static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
                    939: {
                    940:     BDRVQcowState *s = bs->opaque;
                    941:     bdi->cluster_size = s->cluster_size;
                    942:     bdi->vm_state_offset = qcow_vm_state_offset(s);
                    943:     return 0;
                    944: }
                    945: 
                    946: 
                    947: static int qcow_check(BlockDriverState *bs)
                    948: {
                    949:     return qcow2_check_refcounts(bs);
                    950: }
                    951: 
                    952: #if 0
                    953: static void dump_refcounts(BlockDriverState *bs)
                    954: {
                    955:     BDRVQcowState *s = bs->opaque;
                    956:     int64_t nb_clusters, k, k1, size;
                    957:     int refcount;
                    958: 
                    959:     size = bdrv_getlength(s->hd);
                    960:     nb_clusters = size_to_clusters(s, size);
                    961:     for(k = 0; k < nb_clusters;) {
                    962:         k1 = k;
                    963:         refcount = get_refcount(bs, k);
                    964:         k++;
                    965:         while (k < nb_clusters && get_refcount(bs, k) == refcount)
                    966:             k++;
                    967:         printf("%lld: refcount=%d nb=%lld\n", k, refcount, k - k1);
                    968:     }
                    969: }
                    970: #endif
                    971: 
                    972: static int qcow_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
                    973:                            int64_t pos, int size)
                    974: {
                    975:     BDRVQcowState *s = bs->opaque;
                    976:     int growable = bs->growable;
                    977: 
                    978:     bs->growable = 1;
                    979:     bdrv_pwrite(bs, qcow_vm_state_offset(s) + pos, buf, size);
                    980:     bs->growable = growable;
                    981: 
                    982:     return size;
                    983: }
                    984: 
                    985: static int qcow_load_vmstate(BlockDriverState *bs, uint8_t *buf,
                    986:                            int64_t pos, int size)
                    987: {
                    988:     BDRVQcowState *s = bs->opaque;
                    989:     int growable = bs->growable;
                    990:     int ret;
                    991: 
                    992:     bs->growable = 1;
                    993:     ret = bdrv_pread(bs, qcow_vm_state_offset(s) + pos, buf, size);
                    994:     bs->growable = growable;
                    995: 
                    996:     return ret;
                    997: }
                    998: 
                    999: static QEMUOptionParameter qcow_create_options[] = {
                   1000:     {
                   1001:         .name = BLOCK_OPT_SIZE,
                   1002:         .type = OPT_SIZE,
                   1003:         .help = "Virtual disk size"
                   1004:     },
                   1005:     {
                   1006:         .name = BLOCK_OPT_BACKING_FILE,
                   1007:         .type = OPT_STRING,
                   1008:         .help = "File name of a base image"
                   1009:     },
                   1010:     {
                   1011:         .name = BLOCK_OPT_BACKING_FMT,
                   1012:         .type = OPT_STRING,
                   1013:         .help = "Image format of the base image"
                   1014:     },
                   1015:     {
                   1016:         .name = BLOCK_OPT_ENCRYPT,
                   1017:         .type = OPT_FLAG,
                   1018:         .help = "Encrypt the image"
                   1019:     },
                   1020:     {
                   1021:         .name = BLOCK_OPT_CLUSTER_SIZE,
                   1022:         .type = OPT_SIZE,
                   1023:         .help = "qcow2 cluster size"
                   1024:     },
                   1025:     { NULL }
                   1026: };
                   1027: 
                   1028: static BlockDriver bdrv_qcow2 = {
                   1029:     .format_name       = "qcow2",
                   1030:     .instance_size     = sizeof(BDRVQcowState),
                   1031:     .bdrv_probe                = qcow_probe,
                   1032:     .bdrv_open         = qcow_open,
                   1033:     .bdrv_close                = qcow_close,
                   1034:     .bdrv_create       = qcow_create,
                   1035:     .bdrv_flush                = qcow_flush,
                   1036:     .bdrv_is_allocated = qcow_is_allocated,
                   1037:     .bdrv_set_key      = qcow_set_key,
                   1038:     .bdrv_make_empty   = qcow_make_empty,
                   1039: 
                   1040:     .bdrv_aio_readv    = qcow_aio_readv,
                   1041:     .bdrv_aio_writev   = qcow_aio_writev,
                   1042:     .bdrv_write_compressed = qcow_write_compressed,
                   1043: 
                   1044:     .bdrv_snapshot_create   = qcow2_snapshot_create,
                   1045:     .bdrv_snapshot_goto     = qcow2_snapshot_goto,
                   1046:     .bdrv_snapshot_delete   = qcow2_snapshot_delete,
                   1047:     .bdrv_snapshot_list     = qcow2_snapshot_list,
                   1048:     .bdrv_get_info     = qcow_get_info,
                   1049: 
                   1050:     .bdrv_save_vmstate    = qcow_save_vmstate,
                   1051:     .bdrv_load_vmstate    = qcow_load_vmstate,
                   1052: 
                   1053:     .create_options = qcow_create_options,
                   1054:     .bdrv_check = qcow_check,
                   1055: };
                   1056: 
                   1057: static void bdrv_qcow2_init(void)
                   1058: {
                   1059:     bdrv_register(&bdrv_qcow2);
                   1060: }
                   1061: 
                   1062: block_init(bdrv_qcow2_init);

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.