Annotation of qemu/block/vdi.c, revision 1.1.1.6

1.1       root        1: /*
                      2:  * Block driver for the Virtual Disk Image (VDI) format
                      3:  *
                      4:  * Copyright (c) 2009 Stefan Weil
                      5:  *
                      6:  * This program is free software: you can redistribute it and/or modify
                      7:  * it under the terms of the GNU General Public License as published by
                      8:  * the Free Software Foundation, either version 2 of the License, or
                      9:  * (at your option) version 3 or any later version.
                     10:  *
                     11:  * This program is distributed in the hope that it will be useful,
                     12:  * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     13:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     14:  * GNU General Public License for more details.
                     15:  *
                     16:  * You should have received a copy of the GNU General Public License
                     17:  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
                     18:  *
                     19:  * Reference:
                     20:  * http://forums.virtualbox.org/viewtopic.php?t=8046
                     21:  *
                     22:  * This driver supports create / read / write operations on VDI images.
                     23:  *
                     24:  * Todo (see also TODO in code):
                     25:  *
                     26:  * Some features like snapshots are still missing.
                     27:  *
                     28:  * Deallocation of zero-filled blocks and shrinking images are missing, too
                     29:  * (might be added to common block layer).
                     30:  *
                     31:  * Allocation of blocks could be optimized (less writes to block map and
                     32:  * header).
                     33:  *
                     34:  * Read and write of adjacents blocks could be done in one operation
                     35:  * (current code uses one operation per block (1 MiB).
                     36:  *
                     37:  * The code is not thread safe (missing locks for changes in header and
                     38:  * block table, no problem with current QEMU).
                     39:  *
                     40:  * Hints:
                     41:  *
                     42:  * Blocks (VDI documentation) correspond to clusters (QEMU).
                     43:  * QEMU's backing files could be implemented using VDI snapshot files (TODO).
                     44:  * VDI snapshot files may also contain the complete machine state.
                     45:  * Maybe this machine state can be converted to QEMU PC machine snapshot data.
                     46:  *
                     47:  * The driver keeps a block cache (little endian entries) in memory.
                     48:  * For the standard block size (1 MiB), a 1 TiB disk will use 4 MiB RAM,
                     49:  * so this seems to be reasonable.
                     50:  */
                     51: 
                     52: #include "qemu-common.h"
                     53: #include "block_int.h"
                     54: #include "module.h"
1.1.1.6 ! root       55: #include "migration.h"
1.1       root       56: 
                     57: #if defined(CONFIG_UUID)
                     58: #include <uuid/uuid.h>
                     59: #else
                     60: /* TODO: move uuid emulation to some central place in QEMU. */
                     61: #include "sysemu.h"     /* UUID_FMT */
                     62: typedef unsigned char uuid_t[16];
                     63: void uuid_generate(uuid_t out);
                     64: int uuid_is_null(const uuid_t uu);
                     65: void uuid_unparse(const uuid_t uu, char *out);
                     66: #endif
                     67: 
                     68: /* Code configuration options. */
                     69: 
                     70: /* Enable debug messages. */
                     71: //~ #define CONFIG_VDI_DEBUG
                     72: 
                     73: /* Support write operations on VDI images. */
                     74: #define CONFIG_VDI_WRITE
                     75: 
                     76: /* Support non-standard block (cluster) size. This is untested.
                     77:  * Maybe it will be needed for very large images.
                     78:  */
                     79: //~ #define CONFIG_VDI_BLOCK_SIZE
                     80: 
                     81: /* Support static (fixed, pre-allocated) images. */
                     82: #define CONFIG_VDI_STATIC_IMAGE
                     83: 
                     84: /* Command line option for static images. */
                     85: #define BLOCK_OPT_STATIC "static"
                     86: 
                     87: #define KiB     1024
                     88: #define MiB     (KiB * KiB)
                     89: 
                     90: #define SECTOR_SIZE 512
1.1.1.5   root       91: #define DEFAULT_CLUSTER_SIZE (1 * MiB)
1.1       root       92: 
                     93: #if defined(CONFIG_VDI_DEBUG)
                     94: #define logout(fmt, ...) \
                     95:                 fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__)
                     96: #else
                     97: #define logout(fmt, ...) ((void)0)
                     98: #endif
                     99: 
                    100: /* Image signature. */
                    101: #define VDI_SIGNATURE 0xbeda107f
                    102: 
                    103: /* Image version. */
                    104: #define VDI_VERSION_1_1 0x00010001
                    105: 
                    106: /* Image type. */
                    107: #define VDI_TYPE_DYNAMIC 1
                    108: #define VDI_TYPE_STATIC  2
                    109: 
                    110: /* Innotek / SUN images use these strings in header.text:
                    111:  * "<<< innotek VirtualBox Disk Image >>>\n"
                    112:  * "<<< Sun xVM VirtualBox Disk Image >>>\n"
                    113:  * "<<< Sun VirtualBox Disk Image >>>\n"
                    114:  * The value does not matter, so QEMU created images use a different text.
                    115:  */
                    116: #define VDI_TEXT "<<< QEMU VM Virtual Disk Image >>>\n"
                    117: 
1.1.1.6 ! root      118: /* A never-allocated block; semantically arbitrary content. */
        !           119: #define VDI_UNALLOCATED 0xffffffffU
        !           120: 
        !           121: /* A discarded (no longer allocated) block; semantically zero-filled. */
        !           122: #define VDI_DISCARDED   0xfffffffeU
        !           123: 
        !           124: #define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED)
1.1       root      125: 
                    126: #if !defined(CONFIG_UUID)
                    127: void uuid_generate(uuid_t out)
                    128: {
1.1.1.4   root      129:     memset(out, 0, sizeof(uuid_t));
1.1       root      130: }
                    131: 
                    132: int uuid_is_null(const uuid_t uu)
                    133: {
                    134:     uuid_t null_uuid = { 0 };
1.1.1.4   root      135:     return memcmp(uu, null_uuid, sizeof(uuid_t)) == 0;
1.1       root      136: }
                    137: 
                    138: void uuid_unparse(const uuid_t uu, char *out)
                    139: {
                    140:     snprintf(out, 37, UUID_FMT,
                    141:             uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7],
                    142:             uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
                    143: }
                    144: #endif
                    145: 
                    146: typedef struct {
                    147:     BlockDriverAIOCB common;
                    148:     int64_t sector_num;
                    149:     QEMUIOVector *qiov;
                    150:     uint8_t *buf;
                    151:     /* Total number of sectors. */
                    152:     int nb_sectors;
                    153:     /* Number of sectors for current AIO. */
                    154:     int n_sectors;
                    155:     /* New allocated block map entry. */
                    156:     uint32_t bmap_first;
                    157:     uint32_t bmap_last;
                    158:     /* Buffer for new allocated block. */
                    159:     void *block_buffer;
                    160:     void *orig_buf;
1.1.1.5   root      161:     bool is_write;
1.1       root      162:     int header_modified;
                    163:     BlockDriverAIOCB *hd_aiocb;
                    164:     struct iovec hd_iov;
                    165:     QEMUIOVector hd_qiov;
                    166:     QEMUBH *bh;
                    167: } VdiAIOCB;
                    168: 
                    169: typedef struct {
                    170:     char text[0x40];
                    171:     uint32_t signature;
                    172:     uint32_t version;
                    173:     uint32_t header_size;
                    174:     uint32_t image_type;
                    175:     uint32_t image_flags;
                    176:     char description[256];
                    177:     uint32_t offset_bmap;
                    178:     uint32_t offset_data;
                    179:     uint32_t cylinders;         /* disk geometry, unused here */
                    180:     uint32_t heads;             /* disk geometry, unused here */
                    181:     uint32_t sectors;           /* disk geometry, unused here */
                    182:     uint32_t sector_size;
                    183:     uint32_t unused1;
                    184:     uint64_t disk_size;
                    185:     uint32_t block_size;
                    186:     uint32_t block_extra;       /* unused here */
                    187:     uint32_t blocks_in_image;
                    188:     uint32_t blocks_allocated;
                    189:     uuid_t uuid_image;
                    190:     uuid_t uuid_last_snap;
                    191:     uuid_t uuid_link;
                    192:     uuid_t uuid_parent;
                    193:     uint64_t unused2[7];
                    194: } VdiHeader;
                    195: 
                    196: typedef struct {
                    197:     /* The block map entries are little endian (even in memory). */
                    198:     uint32_t *bmap;
                    199:     /* Size of block (bytes). */
                    200:     uint32_t block_size;
                    201:     /* Size of block (sectors). */
                    202:     uint32_t block_sectors;
                    203:     /* First sector of block map. */
                    204:     uint32_t bmap_sector;
1.1.1.5   root      205:     /* VDI header (converted to host endianness). */
1.1       root      206:     VdiHeader header;
1.1.1.6 ! root      207: 
        !           208:     Error *migration_blocker;
1.1       root      209: } BDRVVdiState;
                    210: 
                    211: /* Change UUID from little endian (IPRT = VirtualBox format) to big endian
                    212:  * format (network byte order, standard, see RFC 4122) and vice versa.
                    213:  */
                    214: static void uuid_convert(uuid_t uuid)
                    215: {
                    216:     bswap32s((uint32_t *)&uuid[0]);
                    217:     bswap16s((uint16_t *)&uuid[4]);
                    218:     bswap16s((uint16_t *)&uuid[6]);
                    219: }
                    220: 
                    221: static void vdi_header_to_cpu(VdiHeader *header)
                    222: {
                    223:     le32_to_cpus(&header->signature);
                    224:     le32_to_cpus(&header->version);
                    225:     le32_to_cpus(&header->header_size);
                    226:     le32_to_cpus(&header->image_type);
                    227:     le32_to_cpus(&header->image_flags);
                    228:     le32_to_cpus(&header->offset_bmap);
                    229:     le32_to_cpus(&header->offset_data);
                    230:     le32_to_cpus(&header->cylinders);
                    231:     le32_to_cpus(&header->heads);
                    232:     le32_to_cpus(&header->sectors);
                    233:     le32_to_cpus(&header->sector_size);
                    234:     le64_to_cpus(&header->disk_size);
                    235:     le32_to_cpus(&header->block_size);
                    236:     le32_to_cpus(&header->block_extra);
                    237:     le32_to_cpus(&header->blocks_in_image);
                    238:     le32_to_cpus(&header->blocks_allocated);
                    239:     uuid_convert(header->uuid_image);
                    240:     uuid_convert(header->uuid_last_snap);
                    241:     uuid_convert(header->uuid_link);
                    242:     uuid_convert(header->uuid_parent);
                    243: }
                    244: 
                    245: static void vdi_header_to_le(VdiHeader *header)
                    246: {
                    247:     cpu_to_le32s(&header->signature);
                    248:     cpu_to_le32s(&header->version);
                    249:     cpu_to_le32s(&header->header_size);
                    250:     cpu_to_le32s(&header->image_type);
                    251:     cpu_to_le32s(&header->image_flags);
                    252:     cpu_to_le32s(&header->offset_bmap);
                    253:     cpu_to_le32s(&header->offset_data);
                    254:     cpu_to_le32s(&header->cylinders);
                    255:     cpu_to_le32s(&header->heads);
                    256:     cpu_to_le32s(&header->sectors);
                    257:     cpu_to_le32s(&header->sector_size);
                    258:     cpu_to_le64s(&header->disk_size);
                    259:     cpu_to_le32s(&header->block_size);
                    260:     cpu_to_le32s(&header->block_extra);
                    261:     cpu_to_le32s(&header->blocks_in_image);
                    262:     cpu_to_le32s(&header->blocks_allocated);
                    263:     cpu_to_le32s(&header->blocks_allocated);
                    264:     uuid_convert(header->uuid_image);
                    265:     uuid_convert(header->uuid_last_snap);
                    266:     uuid_convert(header->uuid_link);
                    267:     uuid_convert(header->uuid_parent);
                    268: }
                    269: 
                    270: #if defined(CONFIG_VDI_DEBUG)
                    271: static void vdi_header_print(VdiHeader *header)
                    272: {
                    273:     char uuid[37];
                    274:     logout("text        %s", header->text);
                    275:     logout("signature   0x%04x\n", header->signature);
                    276:     logout("header size 0x%04x\n", header->header_size);
                    277:     logout("image type  0x%04x\n", header->image_type);
                    278:     logout("image flags 0x%04x\n", header->image_flags);
                    279:     logout("description %s\n", header->description);
                    280:     logout("offset bmap 0x%04x\n", header->offset_bmap);
                    281:     logout("offset data 0x%04x\n", header->offset_data);
                    282:     logout("cylinders   0x%04x\n", header->cylinders);
                    283:     logout("heads       0x%04x\n", header->heads);
                    284:     logout("sectors     0x%04x\n", header->sectors);
                    285:     logout("sector size 0x%04x\n", header->sector_size);
                    286:     logout("image size  0x%" PRIx64 " B (%" PRIu64 " MiB)\n",
                    287:            header->disk_size, header->disk_size / MiB);
                    288:     logout("block size  0x%04x\n", header->block_size);
                    289:     logout("block extra 0x%04x\n", header->block_extra);
                    290:     logout("blocks tot. 0x%04x\n", header->blocks_in_image);
                    291:     logout("blocks all. 0x%04x\n", header->blocks_allocated);
                    292:     uuid_unparse(header->uuid_image, uuid);
                    293:     logout("uuid image  %s\n", uuid);
                    294:     uuid_unparse(header->uuid_last_snap, uuid);
                    295:     logout("uuid snap   %s\n", uuid);
                    296:     uuid_unparse(header->uuid_link, uuid);
                    297:     logout("uuid link   %s\n", uuid);
                    298:     uuid_unparse(header->uuid_parent, uuid);
                    299:     logout("uuid parent %s\n", uuid);
                    300: }
                    301: #endif
                    302: 
1.1.1.3   root      303: static int vdi_check(BlockDriverState *bs, BdrvCheckResult *res)
1.1       root      304: {
                    305:     /* TODO: additional checks possible. */
                    306:     BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
                    307:     uint32_t blocks_allocated = 0;
                    308:     uint32_t block;
                    309:     uint32_t *bmap;
                    310:     logout("\n");
                    311: 
1.1.1.6 ! root      312:     bmap = g_malloc(s->header.blocks_in_image * sizeof(uint32_t));
1.1       root      313:     memset(bmap, 0xff, s->header.blocks_in_image * sizeof(uint32_t));
                    314: 
                    315:     /* Check block map and value of blocks_allocated. */
                    316:     for (block = 0; block < s->header.blocks_in_image; block++) {
                    317:         uint32_t bmap_entry = le32_to_cpu(s->bmap[block]);
1.1.1.6 ! root      318:         if (VDI_IS_ALLOCATED(bmap_entry)) {
1.1       root      319:             if (bmap_entry < s->header.blocks_in_image) {
                    320:                 blocks_allocated++;
1.1.1.6 ! root      321:                 if (!VDI_IS_ALLOCATED(bmap[bmap_entry])) {
1.1       root      322:                     bmap[bmap_entry] = bmap_entry;
                    323:                 } else {
                    324:                     fprintf(stderr, "ERROR: block index %" PRIu32
                    325:                             " also used by %" PRIu32 "\n", bmap[bmap_entry], bmap_entry);
1.1.1.3   root      326:                     res->corruptions++;
1.1       root      327:                 }
                    328:             } else {
                    329:                 fprintf(stderr, "ERROR: block index %" PRIu32
                    330:                         " too large, is %" PRIu32 "\n", block, bmap_entry);
1.1.1.3   root      331:                 res->corruptions++;
1.1       root      332:             }
                    333:         }
                    334:     }
                    335:     if (blocks_allocated != s->header.blocks_allocated) {
                    336:         fprintf(stderr, "ERROR: allocated blocks mismatch, is %" PRIu32
                    337:                ", should be %" PRIu32 "\n",
                    338:                blocks_allocated, s->header.blocks_allocated);
1.1.1.3   root      339:         res->corruptions++;
1.1       root      340:     }
                    341: 
1.1.1.6 ! root      342:     g_free(bmap);
1.1       root      343: 
1.1.1.3   root      344:     return 0;
1.1       root      345: }
                    346: 
                    347: static int vdi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
                    348: {
                    349:     /* TODO: vdi_get_info would be needed for machine snapshots.
                    350:        vm_state_offset is still missing. */
                    351:     BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
                    352:     logout("\n");
                    353:     bdi->cluster_size = s->block_size;
                    354:     bdi->vm_state_offset = 0;
                    355:     return 0;
                    356: }
                    357: 
                    358: static int vdi_make_empty(BlockDriverState *bs)
                    359: {
                    360:     /* TODO: missing code. */
                    361:     logout("\n");
                    362:     /* The return value for missing code must be 0, see block.c. */
                    363:     return 0;
                    364: }
                    365: 
                    366: static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename)
                    367: {
                    368:     const VdiHeader *header = (const VdiHeader *)buf;
                    369:     int result = 0;
                    370: 
                    371:     logout("\n");
                    372: 
                    373:     if (buf_size < sizeof(*header)) {
                    374:         /* Header too small, no VDI. */
                    375:     } else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) {
                    376:         result = 100;
                    377:     }
                    378: 
                    379:     if (result == 0) {
                    380:         logout("no vdi image\n");
                    381:     } else {
                    382:         logout("%s", header->text);
                    383:     }
                    384: 
                    385:     return result;
                    386: }
                    387: 
1.1.1.3   root      388: static int vdi_open(BlockDriverState *bs, int flags)
1.1       root      389: {
                    390:     BDRVVdiState *s = bs->opaque;
                    391:     VdiHeader header;
                    392:     size_t bmap_size;
                    393: 
                    394:     logout("\n");
                    395: 
1.1.1.3   root      396:     if (bdrv_read(bs->file, 0, (uint8_t *)&header, 1) < 0) {
1.1       root      397:         goto fail;
                    398:     }
                    399: 
                    400:     vdi_header_to_cpu(&header);
                    401: #if defined(CONFIG_VDI_DEBUG)
                    402:     vdi_header_print(&header);
                    403: #endif
                    404: 
1.1.1.2   root      405:     if (header.disk_size % SECTOR_SIZE != 0) {
                    406:         /* 'VBoxManage convertfromraw' can create images with odd disk sizes.
                    407:            We accept them but round the disk size to the next multiple of
                    408:            SECTOR_SIZE. */
                    409:         logout("odd disk size %" PRIu64 " B, round up\n", header.disk_size);
                    410:         header.disk_size += SECTOR_SIZE - 1;
                    411:         header.disk_size &= ~(SECTOR_SIZE - 1);
                    412:     }
                    413: 
1.1       root      414:     if (header.version != VDI_VERSION_1_1) {
                    415:         logout("unsupported version %u.%u\n",
                    416:                header.version >> 16, header.version & 0xffff);
                    417:         goto fail;
                    418:     } else if (header.offset_bmap % SECTOR_SIZE != 0) {
                    419:         /* We only support block maps which start on a sector boundary. */
                    420:         logout("unsupported block map offset 0x%x B\n", header.offset_bmap);
                    421:         goto fail;
                    422:     } else if (header.offset_data % SECTOR_SIZE != 0) {
                    423:         /* We only support data blocks which start on a sector boundary. */
                    424:         logout("unsupported data offset 0x%x B\n", header.offset_data);
                    425:         goto fail;
                    426:     } else if (header.sector_size != SECTOR_SIZE) {
                    427:         logout("unsupported sector size %u B\n", header.sector_size);
                    428:         goto fail;
                    429:     } else if (header.block_size != 1 * MiB) {
                    430:         logout("unsupported block size %u B\n", header.block_size);
                    431:         goto fail;
1.1.1.2   root      432:     } else if (header.disk_size >
1.1       root      433:                (uint64_t)header.blocks_in_image * header.block_size) {
1.1.1.2   root      434:         logout("unsupported disk size %" PRIu64 " B\n", header.disk_size);
1.1       root      435:         goto fail;
                    436:     } else if (!uuid_is_null(header.uuid_link)) {
                    437:         logout("link uuid != 0, unsupported\n");
                    438:         goto fail;
                    439:     } else if (!uuid_is_null(header.uuid_parent)) {
                    440:         logout("parent uuid != 0, unsupported\n");
                    441:         goto fail;
                    442:     }
                    443: 
                    444:     bs->total_sectors = header.disk_size / SECTOR_SIZE;
                    445: 
                    446:     s->block_size = header.block_size;
                    447:     s->block_sectors = header.block_size / SECTOR_SIZE;
                    448:     s->bmap_sector = header.offset_bmap / SECTOR_SIZE;
                    449:     s->header = header;
                    450: 
                    451:     bmap_size = header.blocks_in_image * sizeof(uint32_t);
                    452:     bmap_size = (bmap_size + SECTOR_SIZE - 1) / SECTOR_SIZE;
1.1.1.3   root      453:     if (bmap_size > 0) {
1.1.1.6 ! root      454:         s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
1.1.1.3   root      455:     }
                    456:     if (bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size) < 0) {
1.1       root      457:         goto fail_free_bmap;
                    458:     }
                    459: 
1.1.1.6 ! root      460:     /* Disable migration when vdi images are used */
        !           461:     error_set(&s->migration_blocker,
        !           462:               QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
        !           463:               "vdi", bs->device_name, "live migration");
        !           464:     migrate_add_blocker(s->migration_blocker);
        !           465: 
1.1       root      466:     return 0;
                    467: 
                    468:  fail_free_bmap:
1.1.1.6 ! root      469:     g_free(s->bmap);
1.1       root      470: 
                    471:  fail:
                    472:     return -1;
                    473: }
                    474: 
                    475: static int vdi_is_allocated(BlockDriverState *bs, int64_t sector_num,
                    476:                              int nb_sectors, int *pnum)
                    477: {
                    478:     /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
                    479:     BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
                    480:     size_t bmap_index = sector_num / s->block_sectors;
                    481:     size_t sector_in_block = sector_num % s->block_sectors;
                    482:     int n_sectors = s->block_sectors - sector_in_block;
                    483:     uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]);
                    484:     logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum);
                    485:     if (n_sectors > nb_sectors) {
                    486:         n_sectors = nb_sectors;
                    487:     }
                    488:     *pnum = n_sectors;
1.1.1.6 ! root      489:     return VDI_IS_ALLOCATED(bmap_entry);
1.1       root      490: }
                    491: 
                    492: static void vdi_aio_cancel(BlockDriverAIOCB *blockacb)
                    493: {
                    494:     /* TODO: This code is untested. How can I get it executed? */
1.1.1.3   root      495:     VdiAIOCB *acb = container_of(blockacb, VdiAIOCB, common);
1.1       root      496:     logout("\n");
                    497:     if (acb->hd_aiocb) {
                    498:         bdrv_aio_cancel(acb->hd_aiocb);
                    499:     }
                    500:     qemu_aio_release(acb);
                    501: }
                    502: 
                    503: static AIOPool vdi_aio_pool = {
                    504:     .aiocb_size = sizeof(VdiAIOCB),
                    505:     .cancel = vdi_aio_cancel,
                    506: };
                    507: 
                    508: static VdiAIOCB *vdi_aio_setup(BlockDriverState *bs, int64_t sector_num,
                    509:         QEMUIOVector *qiov, int nb_sectors,
                    510:         BlockDriverCompletionFunc *cb, void *opaque, int is_write)
                    511: {
                    512:     VdiAIOCB *acb;
                    513: 
                    514:     logout("%p, %" PRId64 ", %p, %d, %p, %p, %d\n",
                    515:            bs, sector_num, qiov, nb_sectors, cb, opaque, is_write);
                    516: 
                    517:     acb = qemu_aio_get(&vdi_aio_pool, bs, cb, opaque);
                    518:     if (acb) {
                    519:         acb->hd_aiocb = NULL;
                    520:         acb->sector_num = sector_num;
                    521:         acb->qiov = qiov;
1.1.1.5   root      522:         acb->is_write = is_write;
                    523: 
1.1       root      524:         if (qiov->niov > 1) {
                    525:             acb->buf = qemu_blockalign(bs, qiov->size);
                    526:             acb->orig_buf = acb->buf;
                    527:             if (is_write) {
                    528:                 qemu_iovec_to_buffer(qiov, acb->buf);
                    529:             }
                    530:         } else {
                    531:             acb->buf = (uint8_t *)qiov->iov->iov_base;
                    532:         }
                    533:         acb->nb_sectors = nb_sectors;
                    534:         acb->n_sectors = 0;
                    535:         acb->bmap_first = VDI_UNALLOCATED;
                    536:         acb->bmap_last = VDI_UNALLOCATED;
                    537:         acb->block_buffer = NULL;
                    538:         acb->header_modified = 0;
                    539:     }
                    540:     return acb;
                    541: }
                    542: 
                    543: static int vdi_schedule_bh(QEMUBHFunc *cb, VdiAIOCB *acb)
                    544: {
                    545:     logout("\n");
                    546: 
                    547:     if (acb->bh) {
                    548:         return -EIO;
                    549:     }
                    550: 
                    551:     acb->bh = qemu_bh_new(cb, acb);
                    552:     if (!acb->bh) {
                    553:         return -EIO;
                    554:     }
                    555: 
                    556:     qemu_bh_schedule(acb->bh);
                    557: 
                    558:     return 0;
                    559: }
                    560: 
                    561: static void vdi_aio_read_cb(void *opaque, int ret);
1.1.1.5   root      562: static void vdi_aio_write_cb(void *opaque, int ret);
1.1       root      563: 
1.1.1.5   root      564: static void vdi_aio_rw_bh(void *opaque)
1.1       root      565: {
                    566:     VdiAIOCB *acb = opaque;
                    567:     logout("\n");
                    568:     qemu_bh_delete(acb->bh);
                    569:     acb->bh = NULL;
1.1.1.5   root      570: 
                    571:     if (acb->is_write) {
                    572:         vdi_aio_write_cb(opaque, 0);
                    573:     } else {
                    574:         vdi_aio_read_cb(opaque, 0);
                    575:     }
1.1       root      576: }
                    577: 
                    578: static void vdi_aio_read_cb(void *opaque, int ret)
                    579: {
                    580:     VdiAIOCB *acb = opaque;
                    581:     BlockDriverState *bs = acb->common.bs;
                    582:     BDRVVdiState *s = bs->opaque;
                    583:     uint32_t bmap_entry;
                    584:     uint32_t block_index;
                    585:     uint32_t sector_in_block;
                    586:     uint32_t n_sectors;
                    587: 
                    588:     logout("%u sectors read\n", acb->n_sectors);
                    589: 
                    590:     acb->hd_aiocb = NULL;
                    591: 
                    592:     if (ret < 0) {
                    593:         goto done;
                    594:     }
                    595: 
                    596:     acb->nb_sectors -= acb->n_sectors;
                    597: 
                    598:     if (acb->nb_sectors == 0) {
                    599:         /* request completed */
                    600:         ret = 0;
                    601:         goto done;
                    602:     }
                    603: 
                    604:     acb->sector_num += acb->n_sectors;
                    605:     acb->buf += acb->n_sectors * SECTOR_SIZE;
                    606: 
                    607:     block_index = acb->sector_num / s->block_sectors;
                    608:     sector_in_block = acb->sector_num % s->block_sectors;
                    609:     n_sectors = s->block_sectors - sector_in_block;
                    610:     if (n_sectors > acb->nb_sectors) {
                    611:         n_sectors = acb->nb_sectors;
                    612:     }
                    613: 
                    614:     logout("will read %u sectors starting at sector %" PRIu64 "\n",
                    615:            n_sectors, acb->sector_num);
                    616: 
                    617:     /* prepare next AIO request */
                    618:     acb->n_sectors = n_sectors;
                    619:     bmap_entry = le32_to_cpu(s->bmap[block_index]);
1.1.1.6 ! root      620:     if (!VDI_IS_ALLOCATED(bmap_entry)) {
1.1       root      621:         /* Block not allocated, return zeros, no need to wait. */
                    622:         memset(acb->buf, 0, n_sectors * SECTOR_SIZE);
1.1.1.5   root      623:         ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
1.1       root      624:         if (ret < 0) {
                    625:             goto done;
                    626:         }
                    627:     } else {
                    628:         uint64_t offset = s->header.offset_data / SECTOR_SIZE +
                    629:                           (uint64_t)bmap_entry * s->block_sectors +
                    630:                           sector_in_block;
                    631:         acb->hd_iov.iov_base = (void *)acb->buf;
                    632:         acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
                    633:         qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
1.1.1.3   root      634:         acb->hd_aiocb = bdrv_aio_readv(bs->file, offset, &acb->hd_qiov,
1.1       root      635:                                        n_sectors, vdi_aio_read_cb, acb);
                    636:         if (acb->hd_aiocb == NULL) {
1.1.1.5   root      637:             ret = -EIO;
1.1       root      638:             goto done;
                    639:         }
                    640:     }
                    641:     return;
                    642: done:
                    643:     if (acb->qiov->niov > 1) {
                    644:         qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
                    645:         qemu_vfree(acb->orig_buf);
                    646:     }
                    647:     acb->common.cb(acb->common.opaque, ret);
                    648:     qemu_aio_release(acb);
                    649: }
                    650: 
                    651: static BlockDriverAIOCB *vdi_aio_readv(BlockDriverState *bs,
                    652:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
                    653:         BlockDriverCompletionFunc *cb, void *opaque)
                    654: {
                    655:     VdiAIOCB *acb;
1.1.1.5   root      656:     int ret;
                    657: 
1.1       root      658:     logout("\n");
                    659:     acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
                    660:     if (!acb) {
                    661:         return NULL;
                    662:     }
1.1.1.5   root      663: 
                    664:     ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
                    665:     if (ret < 0) {
                    666:         if (acb->qiov->niov > 1) {
                    667:             qemu_vfree(acb->orig_buf);
                    668:         }
                    669:         qemu_aio_release(acb);
                    670:         return NULL;
                    671:     }
                    672: 
1.1       root      673:     return &acb->common;
                    674: }
                    675: 
                    676: static void vdi_aio_write_cb(void *opaque, int ret)
                    677: {
                    678:     VdiAIOCB *acb = opaque;
                    679:     BlockDriverState *bs = acb->common.bs;
                    680:     BDRVVdiState *s = bs->opaque;
                    681:     uint32_t bmap_entry;
                    682:     uint32_t block_index;
                    683:     uint32_t sector_in_block;
                    684:     uint32_t n_sectors;
                    685: 
                    686:     acb->hd_aiocb = NULL;
                    687: 
                    688:     if (ret < 0) {
                    689:         goto done;
                    690:     }
                    691: 
                    692:     acb->nb_sectors -= acb->n_sectors;
                    693:     acb->sector_num += acb->n_sectors;
                    694:     acb->buf += acb->n_sectors * SECTOR_SIZE;
                    695: 
                    696:     if (acb->nb_sectors == 0) {
                    697:         logout("finished data write\n");
                    698:         acb->n_sectors = 0;
                    699:         if (acb->header_modified) {
                    700:             VdiHeader *header = acb->block_buffer;
                    701:             logout("now writing modified header\n");
1.1.1.6 ! root      702:             assert(VDI_IS_ALLOCATED(acb->bmap_first));
1.1       root      703:             *header = s->header;
                    704:             vdi_header_to_le(header);
                    705:             acb->header_modified = 0;
                    706:             acb->hd_iov.iov_base = acb->block_buffer;
                    707:             acb->hd_iov.iov_len = SECTOR_SIZE;
                    708:             qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
1.1.1.3   root      709:             acb->hd_aiocb = bdrv_aio_writev(bs->file, 0, &acb->hd_qiov, 1,
1.1       root      710:                                             vdi_aio_write_cb, acb);
                    711:             if (acb->hd_aiocb == NULL) {
1.1.1.5   root      712:                 ret = -EIO;
1.1       root      713:                 goto done;
                    714:             }
                    715:             return;
1.1.1.6 ! root      716:         } else if (VDI_IS_ALLOCATED(acb->bmap_first)) {
1.1       root      717:             /* One or more new blocks were allocated. */
                    718:             uint64_t offset;
                    719:             uint32_t bmap_first;
                    720:             uint32_t bmap_last;
1.1.1.6 ! root      721:             g_free(acb->block_buffer);
1.1       root      722:             acb->block_buffer = NULL;
                    723:             bmap_first = acb->bmap_first;
                    724:             bmap_last = acb->bmap_last;
                    725:             logout("now writing modified block map entry %u...%u\n",
                    726:                    bmap_first, bmap_last);
                    727:             /* Write modified sectors from block map. */
                    728:             bmap_first /= (SECTOR_SIZE / sizeof(uint32_t));
                    729:             bmap_last /= (SECTOR_SIZE / sizeof(uint32_t));
                    730:             n_sectors = bmap_last - bmap_first + 1;
                    731:             offset = s->bmap_sector + bmap_first;
                    732:             acb->bmap_first = VDI_UNALLOCATED;
                    733:             acb->hd_iov.iov_base = (void *)((uint8_t *)&s->bmap[0] +
                    734:                                             bmap_first * SECTOR_SIZE);
                    735:             acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
                    736:             qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
                    737:             logout("will write %u block map sectors starting from entry %u\n",
                    738:                    n_sectors, bmap_first);
1.1.1.3   root      739:             acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov,
1.1       root      740:                                             n_sectors, vdi_aio_write_cb, acb);
                    741:             if (acb->hd_aiocb == NULL) {
1.1.1.5   root      742:                 ret = -EIO;
1.1       root      743:                 goto done;
                    744:             }
                    745:             return;
                    746:         }
                    747:         ret = 0;
                    748:         goto done;
                    749:     }
                    750: 
                    751:     logout("%u sectors written\n", acb->n_sectors);
                    752: 
                    753:     block_index = acb->sector_num / s->block_sectors;
                    754:     sector_in_block = acb->sector_num % s->block_sectors;
                    755:     n_sectors = s->block_sectors - sector_in_block;
                    756:     if (n_sectors > acb->nb_sectors) {
                    757:         n_sectors = acb->nb_sectors;
                    758:     }
                    759: 
                    760:     logout("will write %u sectors starting at sector %" PRIu64 "\n",
                    761:            n_sectors, acb->sector_num);
                    762: 
                    763:     /* prepare next AIO request */
                    764:     acb->n_sectors = n_sectors;
                    765:     bmap_entry = le32_to_cpu(s->bmap[block_index]);
1.1.1.6 ! root      766:     if (!VDI_IS_ALLOCATED(bmap_entry)) {
1.1       root      767:         /* Allocate new block and write to it. */
                    768:         uint64_t offset;
                    769:         uint8_t *block;
                    770:         bmap_entry = s->header.blocks_allocated;
                    771:         s->bmap[block_index] = cpu_to_le32(bmap_entry);
                    772:         s->header.blocks_allocated++;
                    773:         offset = s->header.offset_data / SECTOR_SIZE +
                    774:                  (uint64_t)bmap_entry * s->block_sectors;
                    775:         block = acb->block_buffer;
                    776:         if (block == NULL) {
1.1.1.6 ! root      777:             block = g_malloc0(s->block_size);
1.1       root      778:             acb->block_buffer = block;
                    779:             acb->bmap_first = block_index;
                    780:             assert(!acb->header_modified);
                    781:             acb->header_modified = 1;
                    782:         }
                    783:         acb->bmap_last = block_index;
                    784:         memcpy(block + sector_in_block * SECTOR_SIZE,
                    785:                acb->buf, n_sectors * SECTOR_SIZE);
                    786:         acb->hd_iov.iov_base = (void *)block;
                    787:         acb->hd_iov.iov_len = s->block_size;
                    788:         qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
1.1.1.3   root      789:         acb->hd_aiocb = bdrv_aio_writev(bs->file, offset,
1.1       root      790:                                         &acb->hd_qiov, s->block_sectors,
                    791:                                         vdi_aio_write_cb, acb);
                    792:         if (acb->hd_aiocb == NULL) {
1.1.1.5   root      793:             ret = -EIO;
1.1       root      794:             goto done;
                    795:         }
                    796:     } else {
                    797:         uint64_t offset = s->header.offset_data / SECTOR_SIZE +
                    798:                           (uint64_t)bmap_entry * s->block_sectors +
                    799:                           sector_in_block;
                    800:         acb->hd_iov.iov_base = (void *)acb->buf;
                    801:         acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
                    802:         qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
1.1.1.3   root      803:         acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov,
1.1       root      804:                                         n_sectors, vdi_aio_write_cb, acb);
                    805:         if (acb->hd_aiocb == NULL) {
1.1.1.5   root      806:             ret = -EIO;
1.1       root      807:             goto done;
                    808:         }
                    809:     }
                    810: 
                    811:     return;
                    812: 
                    813: done:
                    814:     if (acb->qiov->niov > 1) {
                    815:         qemu_vfree(acb->orig_buf);
                    816:     }
                    817:     acb->common.cb(acb->common.opaque, ret);
                    818:     qemu_aio_release(acb);
                    819: }
                    820: 
                    821: static BlockDriverAIOCB *vdi_aio_writev(BlockDriverState *bs,
                    822:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
                    823:         BlockDriverCompletionFunc *cb, void *opaque)
                    824: {
                    825:     VdiAIOCB *acb;
1.1.1.5   root      826:     int ret;
                    827: 
1.1       root      828:     logout("\n");
                    829:     acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
                    830:     if (!acb) {
                    831:         return NULL;
                    832:     }
1.1.1.5   root      833: 
                    834:     ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
                    835:     if (ret < 0) {
                    836:         if (acb->qiov->niov > 1) {
                    837:             qemu_vfree(acb->orig_buf);
                    838:         }
                    839:         qemu_aio_release(acb);
                    840:         return NULL;
                    841:     }
                    842: 
1.1       root      843:     return &acb->common;
                    844: }
                    845: 
                    846: static int vdi_create(const char *filename, QEMUOptionParameter *options)
                    847: {
                    848:     int fd;
                    849:     int result = 0;
                    850:     uint64_t bytes = 0;
                    851:     uint32_t blocks;
1.1.1.5   root      852:     size_t block_size = DEFAULT_CLUSTER_SIZE;
1.1       root      853:     uint32_t image_type = VDI_TYPE_DYNAMIC;
                    854:     VdiHeader header;
                    855:     size_t i;
                    856:     size_t bmap_size;
                    857:     uint32_t *bmap;
                    858: 
                    859:     logout("\n");
                    860: 
                    861:     /* Read out options. */
                    862:     while (options && options->name) {
                    863:         if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
                    864:             bytes = options->value.n;
                    865: #if defined(CONFIG_VDI_BLOCK_SIZE)
                    866:         } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
                    867:             if (options->value.n) {
                    868:                 /* TODO: Additional checks (SECTOR_SIZE * 2^n, ...). */
                    869:                 block_size = options->value.n;
                    870:             }
                    871: #endif
                    872: #if defined(CONFIG_VDI_STATIC_IMAGE)
                    873:         } else if (!strcmp(options->name, BLOCK_OPT_STATIC)) {
                    874:             if (options->value.n) {
                    875:                 image_type = VDI_TYPE_STATIC;
                    876:             }
                    877: #endif
                    878:         }
                    879:         options++;
                    880:     }
                    881: 
                    882:     fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
                    883:               0644);
                    884:     if (fd < 0) {
                    885:         return -errno;
                    886:     }
                    887: 
1.1.1.2   root      888:     /* We need enough blocks to store the given disk size,
                    889:        so always round up. */
                    890:     blocks = (bytes + block_size - 1) / block_size;
                    891: 
1.1       root      892:     bmap_size = blocks * sizeof(uint32_t);
                    893:     bmap_size = ((bmap_size + SECTOR_SIZE - 1) & ~(SECTOR_SIZE -1));
                    894: 
                    895:     memset(&header, 0, sizeof(header));
                    896:     pstrcpy(header.text, sizeof(header.text), VDI_TEXT);
                    897:     header.signature = VDI_SIGNATURE;
                    898:     header.version = VDI_VERSION_1_1;
                    899:     header.header_size = 0x180;
                    900:     header.image_type = image_type;
                    901:     header.offset_bmap = 0x200;
                    902:     header.offset_data = 0x200 + bmap_size;
                    903:     header.sector_size = SECTOR_SIZE;
                    904:     header.disk_size = bytes;
                    905:     header.block_size = block_size;
                    906:     header.blocks_in_image = blocks;
                    907:     if (image_type == VDI_TYPE_STATIC) {
                    908:         header.blocks_allocated = blocks;
                    909:     }
                    910:     uuid_generate(header.uuid_image);
                    911:     uuid_generate(header.uuid_last_snap);
                    912:     /* There is no need to set header.uuid_link or header.uuid_parent here. */
                    913: #if defined(CONFIG_VDI_DEBUG)
                    914:     vdi_header_print(&header);
                    915: #endif
                    916:     vdi_header_to_le(&header);
                    917:     if (write(fd, &header, sizeof(header)) < 0) {
                    918:         result = -errno;
                    919:     }
                    920: 
1.1.1.3   root      921:     bmap = NULL;
                    922:     if (bmap_size > 0) {
1.1.1.6 ! root      923:         bmap = (uint32_t *)g_malloc0(bmap_size);
1.1.1.3   root      924:     }
1.1       root      925:     for (i = 0; i < blocks; i++) {
                    926:         if (image_type == VDI_TYPE_STATIC) {
                    927:             bmap[i] = i;
                    928:         } else {
                    929:             bmap[i] = VDI_UNALLOCATED;
                    930:         }
                    931:     }
                    932:     if (write(fd, bmap, bmap_size) < 0) {
                    933:         result = -errno;
                    934:     }
1.1.1.6 ! root      935:     g_free(bmap);
1.1       root      936:     if (image_type == VDI_TYPE_STATIC) {
                    937:         if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) {
                    938:             result = -errno;
                    939:         }
                    940:     }
                    941: 
                    942:     if (close(fd) < 0) {
                    943:         result = -errno;
                    944:     }
                    945: 
                    946:     return result;
                    947: }
                    948: 
                    949: static void vdi_close(BlockDriverState *bs)
                    950: {
1.1.1.6 ! root      951:     BDRVVdiState *s = bs->opaque;
        !           952: 
        !           953:     g_free(s->bmap);
        !           954: 
        !           955:     migrate_del_blocker(s->migration_blocker);
        !           956:     error_free(s->migration_blocker);
1.1       root      957: }
                    958: 
1.1.1.6 ! root      959: static coroutine_fn int vdi_co_flush(BlockDriverState *bs)
1.1       root      960: {
                    961:     logout("\n");
1.1.1.6 ! root      962:     return bdrv_co_flush(bs->file);
1.1       root      963: }
                    964: 
                    965: 
                    966: static QEMUOptionParameter vdi_create_options[] = {
                    967:     {
                    968:         .name = BLOCK_OPT_SIZE,
                    969:         .type = OPT_SIZE,
                    970:         .help = "Virtual disk size"
                    971:     },
                    972: #if defined(CONFIG_VDI_BLOCK_SIZE)
                    973:     {
                    974:         .name = BLOCK_OPT_CLUSTER_SIZE,
                    975:         .type = OPT_SIZE,
1.1.1.5   root      976:         .help = "VDI cluster (block) size",
                    977:         .value = { .n = DEFAULT_CLUSTER_SIZE },
1.1       root      978:     },
                    979: #endif
                    980: #if defined(CONFIG_VDI_STATIC_IMAGE)
                    981:     {
                    982:         .name = BLOCK_OPT_STATIC,
                    983:         .type = OPT_FLAG,
                    984:         .help = "VDI static (pre-allocated) image"
                    985:     },
                    986: #endif
                    987:     /* TODO: An additional option to set UUID values might be useful. */
                    988:     { NULL }
                    989: };
                    990: 
                    991: static BlockDriver bdrv_vdi = {
                    992:     .format_name = "vdi",
                    993:     .instance_size = sizeof(BDRVVdiState),
                    994:     .bdrv_probe = vdi_probe,
                    995:     .bdrv_open = vdi_open,
                    996:     .bdrv_close = vdi_close,
                    997:     .bdrv_create = vdi_create,
1.1.1.6 ! root      998:     .bdrv_co_flush_to_disk = vdi_co_flush,
1.1       root      999:     .bdrv_is_allocated = vdi_is_allocated,
                   1000:     .bdrv_make_empty = vdi_make_empty,
                   1001: 
                   1002:     .bdrv_aio_readv = vdi_aio_readv,
                   1003: #if defined(CONFIG_VDI_WRITE)
                   1004:     .bdrv_aio_writev = vdi_aio_writev,
                   1005: #endif
                   1006: 
                   1007:     .bdrv_get_info = vdi_get_info,
                   1008: 
                   1009:     .create_options = vdi_create_options,
                   1010:     .bdrv_check = vdi_check,
                   1011: };
                   1012: 
                   1013: static void bdrv_vdi_init(void)
                   1014: {
                   1015:     logout("\n");
                   1016:     bdrv_register(&bdrv_vdi);
                   1017: }
                   1018: 
                   1019: block_init(bdrv_vdi_init);

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.