|
|
1.1 root 1: /*
2: * Block driver for the Virtual Disk Image (VDI) format
3: *
4: * Copyright (c) 2009 Stefan Weil
5: *
6: * This program is free software: you can redistribute it and/or modify
7: * it under the terms of the GNU General Public License as published by
8: * the Free Software Foundation, either version 2 of the License, or
9: * (at your option) version 3 or any later version.
10: *
11: * This program is distributed in the hope that it will be useful,
12: * but WITHOUT ANY WARRANTY; without even the implied warranty of
13: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14: * GNU General Public License for more details.
15: *
16: * You should have received a copy of the GNU General Public License
17: * along with this program. If not, see <http://www.gnu.org/licenses/>.
18: *
19: * Reference:
20: * http://forums.virtualbox.org/viewtopic.php?t=8046
21: *
22: * This driver supports create / read / write operations on VDI images.
23: *
24: * Todo (see also TODO in code):
25: *
26: * Some features like snapshots are still missing.
27: *
28: * Deallocation of zero-filled blocks and shrinking images are missing, too
29: * (might be added to common block layer).
30: *
31: * Allocation of blocks could be optimized (less writes to block map and
32: * header).
33: *
34: * Read and write of adjacents blocks could be done in one operation
35: * (current code uses one operation per block (1 MiB).
36: *
37: * The code is not thread safe (missing locks for changes in header and
38: * block table, no problem with current QEMU).
39: *
40: * Hints:
41: *
42: * Blocks (VDI documentation) correspond to clusters (QEMU).
43: * QEMU's backing files could be implemented using VDI snapshot files (TODO).
44: * VDI snapshot files may also contain the complete machine state.
45: * Maybe this machine state can be converted to QEMU PC machine snapshot data.
46: *
47: * The driver keeps a block cache (little endian entries) in memory.
48: * For the standard block size (1 MiB), a 1 TiB disk will use 4 MiB RAM,
49: * so this seems to be reasonable.
50: */
51:
52: #include "qemu-common.h"
53: #include "block_int.h"
54: #include "module.h"
1.1.1.6 ! root 55: #include "migration.h"
1.1 root 56:
57: #if defined(CONFIG_UUID)
58: #include <uuid/uuid.h>
59: #else
60: /* TODO: move uuid emulation to some central place in QEMU. */
61: #include "sysemu.h" /* UUID_FMT */
62: typedef unsigned char uuid_t[16];
63: void uuid_generate(uuid_t out);
64: int uuid_is_null(const uuid_t uu);
65: void uuid_unparse(const uuid_t uu, char *out);
66: #endif
67:
68: /* Code configuration options. */
69:
70: /* Enable debug messages. */
71: //~ #define CONFIG_VDI_DEBUG
72:
73: /* Support write operations on VDI images. */
74: #define CONFIG_VDI_WRITE
75:
76: /* Support non-standard block (cluster) size. This is untested.
77: * Maybe it will be needed for very large images.
78: */
79: //~ #define CONFIG_VDI_BLOCK_SIZE
80:
81: /* Support static (fixed, pre-allocated) images. */
82: #define CONFIG_VDI_STATIC_IMAGE
83:
84: /* Command line option for static images. */
85: #define BLOCK_OPT_STATIC "static"
86:
87: #define KiB 1024
88: #define MiB (KiB * KiB)
89:
90: #define SECTOR_SIZE 512
1.1.1.5 root 91: #define DEFAULT_CLUSTER_SIZE (1 * MiB)
1.1 root 92:
93: #if defined(CONFIG_VDI_DEBUG)
94: #define logout(fmt, ...) \
95: fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__)
96: #else
97: #define logout(fmt, ...) ((void)0)
98: #endif
99:
100: /* Image signature. */
101: #define VDI_SIGNATURE 0xbeda107f
102:
103: /* Image version. */
104: #define VDI_VERSION_1_1 0x00010001
105:
106: /* Image type. */
107: #define VDI_TYPE_DYNAMIC 1
108: #define VDI_TYPE_STATIC 2
109:
110: /* Innotek / SUN images use these strings in header.text:
111: * "<<< innotek VirtualBox Disk Image >>>\n"
112: * "<<< Sun xVM VirtualBox Disk Image >>>\n"
113: * "<<< Sun VirtualBox Disk Image >>>\n"
114: * The value does not matter, so QEMU created images use a different text.
115: */
116: #define VDI_TEXT "<<< QEMU VM Virtual Disk Image >>>\n"
117:
1.1.1.6 ! root 118: /* A never-allocated block; semantically arbitrary content. */
! 119: #define VDI_UNALLOCATED 0xffffffffU
! 120:
! 121: /* A discarded (no longer allocated) block; semantically zero-filled. */
! 122: #define VDI_DISCARDED 0xfffffffeU
! 123:
! 124: #define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED)
1.1 root 125:
126: #if !defined(CONFIG_UUID)
127: void uuid_generate(uuid_t out)
128: {
1.1.1.4 root 129: memset(out, 0, sizeof(uuid_t));
1.1 root 130: }
131:
132: int uuid_is_null(const uuid_t uu)
133: {
134: uuid_t null_uuid = { 0 };
1.1.1.4 root 135: return memcmp(uu, null_uuid, sizeof(uuid_t)) == 0;
1.1 root 136: }
137:
138: void uuid_unparse(const uuid_t uu, char *out)
139: {
140: snprintf(out, 37, UUID_FMT,
141: uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7],
142: uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
143: }
144: #endif
145:
146: typedef struct {
147: BlockDriverAIOCB common;
148: int64_t sector_num;
149: QEMUIOVector *qiov;
150: uint8_t *buf;
151: /* Total number of sectors. */
152: int nb_sectors;
153: /* Number of sectors for current AIO. */
154: int n_sectors;
155: /* New allocated block map entry. */
156: uint32_t bmap_first;
157: uint32_t bmap_last;
158: /* Buffer for new allocated block. */
159: void *block_buffer;
160: void *orig_buf;
1.1.1.5 root 161: bool is_write;
1.1 root 162: int header_modified;
163: BlockDriverAIOCB *hd_aiocb;
164: struct iovec hd_iov;
165: QEMUIOVector hd_qiov;
166: QEMUBH *bh;
167: } VdiAIOCB;
168:
169: typedef struct {
170: char text[0x40];
171: uint32_t signature;
172: uint32_t version;
173: uint32_t header_size;
174: uint32_t image_type;
175: uint32_t image_flags;
176: char description[256];
177: uint32_t offset_bmap;
178: uint32_t offset_data;
179: uint32_t cylinders; /* disk geometry, unused here */
180: uint32_t heads; /* disk geometry, unused here */
181: uint32_t sectors; /* disk geometry, unused here */
182: uint32_t sector_size;
183: uint32_t unused1;
184: uint64_t disk_size;
185: uint32_t block_size;
186: uint32_t block_extra; /* unused here */
187: uint32_t blocks_in_image;
188: uint32_t blocks_allocated;
189: uuid_t uuid_image;
190: uuid_t uuid_last_snap;
191: uuid_t uuid_link;
192: uuid_t uuid_parent;
193: uint64_t unused2[7];
194: } VdiHeader;
195:
196: typedef struct {
197: /* The block map entries are little endian (even in memory). */
198: uint32_t *bmap;
199: /* Size of block (bytes). */
200: uint32_t block_size;
201: /* Size of block (sectors). */
202: uint32_t block_sectors;
203: /* First sector of block map. */
204: uint32_t bmap_sector;
1.1.1.5 root 205: /* VDI header (converted to host endianness). */
1.1 root 206: VdiHeader header;
1.1.1.6 ! root 207:
! 208: Error *migration_blocker;
1.1 root 209: } BDRVVdiState;
210:
211: /* Change UUID from little endian (IPRT = VirtualBox format) to big endian
212: * format (network byte order, standard, see RFC 4122) and vice versa.
213: */
214: static void uuid_convert(uuid_t uuid)
215: {
216: bswap32s((uint32_t *)&uuid[0]);
217: bswap16s((uint16_t *)&uuid[4]);
218: bswap16s((uint16_t *)&uuid[6]);
219: }
220:
221: static void vdi_header_to_cpu(VdiHeader *header)
222: {
223: le32_to_cpus(&header->signature);
224: le32_to_cpus(&header->version);
225: le32_to_cpus(&header->header_size);
226: le32_to_cpus(&header->image_type);
227: le32_to_cpus(&header->image_flags);
228: le32_to_cpus(&header->offset_bmap);
229: le32_to_cpus(&header->offset_data);
230: le32_to_cpus(&header->cylinders);
231: le32_to_cpus(&header->heads);
232: le32_to_cpus(&header->sectors);
233: le32_to_cpus(&header->sector_size);
234: le64_to_cpus(&header->disk_size);
235: le32_to_cpus(&header->block_size);
236: le32_to_cpus(&header->block_extra);
237: le32_to_cpus(&header->blocks_in_image);
238: le32_to_cpus(&header->blocks_allocated);
239: uuid_convert(header->uuid_image);
240: uuid_convert(header->uuid_last_snap);
241: uuid_convert(header->uuid_link);
242: uuid_convert(header->uuid_parent);
243: }
244:
245: static void vdi_header_to_le(VdiHeader *header)
246: {
247: cpu_to_le32s(&header->signature);
248: cpu_to_le32s(&header->version);
249: cpu_to_le32s(&header->header_size);
250: cpu_to_le32s(&header->image_type);
251: cpu_to_le32s(&header->image_flags);
252: cpu_to_le32s(&header->offset_bmap);
253: cpu_to_le32s(&header->offset_data);
254: cpu_to_le32s(&header->cylinders);
255: cpu_to_le32s(&header->heads);
256: cpu_to_le32s(&header->sectors);
257: cpu_to_le32s(&header->sector_size);
258: cpu_to_le64s(&header->disk_size);
259: cpu_to_le32s(&header->block_size);
260: cpu_to_le32s(&header->block_extra);
261: cpu_to_le32s(&header->blocks_in_image);
262: cpu_to_le32s(&header->blocks_allocated);
263: cpu_to_le32s(&header->blocks_allocated);
264: uuid_convert(header->uuid_image);
265: uuid_convert(header->uuid_last_snap);
266: uuid_convert(header->uuid_link);
267: uuid_convert(header->uuid_parent);
268: }
269:
270: #if defined(CONFIG_VDI_DEBUG)
271: static void vdi_header_print(VdiHeader *header)
272: {
273: char uuid[37];
274: logout("text %s", header->text);
275: logout("signature 0x%04x\n", header->signature);
276: logout("header size 0x%04x\n", header->header_size);
277: logout("image type 0x%04x\n", header->image_type);
278: logout("image flags 0x%04x\n", header->image_flags);
279: logout("description %s\n", header->description);
280: logout("offset bmap 0x%04x\n", header->offset_bmap);
281: logout("offset data 0x%04x\n", header->offset_data);
282: logout("cylinders 0x%04x\n", header->cylinders);
283: logout("heads 0x%04x\n", header->heads);
284: logout("sectors 0x%04x\n", header->sectors);
285: logout("sector size 0x%04x\n", header->sector_size);
286: logout("image size 0x%" PRIx64 " B (%" PRIu64 " MiB)\n",
287: header->disk_size, header->disk_size / MiB);
288: logout("block size 0x%04x\n", header->block_size);
289: logout("block extra 0x%04x\n", header->block_extra);
290: logout("blocks tot. 0x%04x\n", header->blocks_in_image);
291: logout("blocks all. 0x%04x\n", header->blocks_allocated);
292: uuid_unparse(header->uuid_image, uuid);
293: logout("uuid image %s\n", uuid);
294: uuid_unparse(header->uuid_last_snap, uuid);
295: logout("uuid snap %s\n", uuid);
296: uuid_unparse(header->uuid_link, uuid);
297: logout("uuid link %s\n", uuid);
298: uuid_unparse(header->uuid_parent, uuid);
299: logout("uuid parent %s\n", uuid);
300: }
301: #endif
302:
1.1.1.3 root 303: static int vdi_check(BlockDriverState *bs, BdrvCheckResult *res)
1.1 root 304: {
305: /* TODO: additional checks possible. */
306: BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
307: uint32_t blocks_allocated = 0;
308: uint32_t block;
309: uint32_t *bmap;
310: logout("\n");
311:
1.1.1.6 ! root 312: bmap = g_malloc(s->header.blocks_in_image * sizeof(uint32_t));
1.1 root 313: memset(bmap, 0xff, s->header.blocks_in_image * sizeof(uint32_t));
314:
315: /* Check block map and value of blocks_allocated. */
316: for (block = 0; block < s->header.blocks_in_image; block++) {
317: uint32_t bmap_entry = le32_to_cpu(s->bmap[block]);
1.1.1.6 ! root 318: if (VDI_IS_ALLOCATED(bmap_entry)) {
1.1 root 319: if (bmap_entry < s->header.blocks_in_image) {
320: blocks_allocated++;
1.1.1.6 ! root 321: if (!VDI_IS_ALLOCATED(bmap[bmap_entry])) {
1.1 root 322: bmap[bmap_entry] = bmap_entry;
323: } else {
324: fprintf(stderr, "ERROR: block index %" PRIu32
325: " also used by %" PRIu32 "\n", bmap[bmap_entry], bmap_entry);
1.1.1.3 root 326: res->corruptions++;
1.1 root 327: }
328: } else {
329: fprintf(stderr, "ERROR: block index %" PRIu32
330: " too large, is %" PRIu32 "\n", block, bmap_entry);
1.1.1.3 root 331: res->corruptions++;
1.1 root 332: }
333: }
334: }
335: if (blocks_allocated != s->header.blocks_allocated) {
336: fprintf(stderr, "ERROR: allocated blocks mismatch, is %" PRIu32
337: ", should be %" PRIu32 "\n",
338: blocks_allocated, s->header.blocks_allocated);
1.1.1.3 root 339: res->corruptions++;
1.1 root 340: }
341:
1.1.1.6 ! root 342: g_free(bmap);
1.1 root 343:
1.1.1.3 root 344: return 0;
1.1 root 345: }
346:
347: static int vdi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
348: {
349: /* TODO: vdi_get_info would be needed for machine snapshots.
350: vm_state_offset is still missing. */
351: BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
352: logout("\n");
353: bdi->cluster_size = s->block_size;
354: bdi->vm_state_offset = 0;
355: return 0;
356: }
357:
358: static int vdi_make_empty(BlockDriverState *bs)
359: {
360: /* TODO: missing code. */
361: logout("\n");
362: /* The return value for missing code must be 0, see block.c. */
363: return 0;
364: }
365:
366: static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename)
367: {
368: const VdiHeader *header = (const VdiHeader *)buf;
369: int result = 0;
370:
371: logout("\n");
372:
373: if (buf_size < sizeof(*header)) {
374: /* Header too small, no VDI. */
375: } else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) {
376: result = 100;
377: }
378:
379: if (result == 0) {
380: logout("no vdi image\n");
381: } else {
382: logout("%s", header->text);
383: }
384:
385: return result;
386: }
387:
1.1.1.3 root 388: static int vdi_open(BlockDriverState *bs, int flags)
1.1 root 389: {
390: BDRVVdiState *s = bs->opaque;
391: VdiHeader header;
392: size_t bmap_size;
393:
394: logout("\n");
395:
1.1.1.3 root 396: if (bdrv_read(bs->file, 0, (uint8_t *)&header, 1) < 0) {
1.1 root 397: goto fail;
398: }
399:
400: vdi_header_to_cpu(&header);
401: #if defined(CONFIG_VDI_DEBUG)
402: vdi_header_print(&header);
403: #endif
404:
1.1.1.2 root 405: if (header.disk_size % SECTOR_SIZE != 0) {
406: /* 'VBoxManage convertfromraw' can create images with odd disk sizes.
407: We accept them but round the disk size to the next multiple of
408: SECTOR_SIZE. */
409: logout("odd disk size %" PRIu64 " B, round up\n", header.disk_size);
410: header.disk_size += SECTOR_SIZE - 1;
411: header.disk_size &= ~(SECTOR_SIZE - 1);
412: }
413:
1.1 root 414: if (header.version != VDI_VERSION_1_1) {
415: logout("unsupported version %u.%u\n",
416: header.version >> 16, header.version & 0xffff);
417: goto fail;
418: } else if (header.offset_bmap % SECTOR_SIZE != 0) {
419: /* We only support block maps which start on a sector boundary. */
420: logout("unsupported block map offset 0x%x B\n", header.offset_bmap);
421: goto fail;
422: } else if (header.offset_data % SECTOR_SIZE != 0) {
423: /* We only support data blocks which start on a sector boundary. */
424: logout("unsupported data offset 0x%x B\n", header.offset_data);
425: goto fail;
426: } else if (header.sector_size != SECTOR_SIZE) {
427: logout("unsupported sector size %u B\n", header.sector_size);
428: goto fail;
429: } else if (header.block_size != 1 * MiB) {
430: logout("unsupported block size %u B\n", header.block_size);
431: goto fail;
1.1.1.2 root 432: } else if (header.disk_size >
1.1 root 433: (uint64_t)header.blocks_in_image * header.block_size) {
1.1.1.2 root 434: logout("unsupported disk size %" PRIu64 " B\n", header.disk_size);
1.1 root 435: goto fail;
436: } else if (!uuid_is_null(header.uuid_link)) {
437: logout("link uuid != 0, unsupported\n");
438: goto fail;
439: } else if (!uuid_is_null(header.uuid_parent)) {
440: logout("parent uuid != 0, unsupported\n");
441: goto fail;
442: }
443:
444: bs->total_sectors = header.disk_size / SECTOR_SIZE;
445:
446: s->block_size = header.block_size;
447: s->block_sectors = header.block_size / SECTOR_SIZE;
448: s->bmap_sector = header.offset_bmap / SECTOR_SIZE;
449: s->header = header;
450:
451: bmap_size = header.blocks_in_image * sizeof(uint32_t);
452: bmap_size = (bmap_size + SECTOR_SIZE - 1) / SECTOR_SIZE;
1.1.1.3 root 453: if (bmap_size > 0) {
1.1.1.6 ! root 454: s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
1.1.1.3 root 455: }
456: if (bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size) < 0) {
1.1 root 457: goto fail_free_bmap;
458: }
459:
1.1.1.6 ! root 460: /* Disable migration when vdi images are used */
! 461: error_set(&s->migration_blocker,
! 462: QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
! 463: "vdi", bs->device_name, "live migration");
! 464: migrate_add_blocker(s->migration_blocker);
! 465:
1.1 root 466: return 0;
467:
468: fail_free_bmap:
1.1.1.6 ! root 469: g_free(s->bmap);
1.1 root 470:
471: fail:
472: return -1;
473: }
474:
475: static int vdi_is_allocated(BlockDriverState *bs, int64_t sector_num,
476: int nb_sectors, int *pnum)
477: {
478: /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
479: BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
480: size_t bmap_index = sector_num / s->block_sectors;
481: size_t sector_in_block = sector_num % s->block_sectors;
482: int n_sectors = s->block_sectors - sector_in_block;
483: uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]);
484: logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum);
485: if (n_sectors > nb_sectors) {
486: n_sectors = nb_sectors;
487: }
488: *pnum = n_sectors;
1.1.1.6 ! root 489: return VDI_IS_ALLOCATED(bmap_entry);
1.1 root 490: }
491:
492: static void vdi_aio_cancel(BlockDriverAIOCB *blockacb)
493: {
494: /* TODO: This code is untested. How can I get it executed? */
1.1.1.3 root 495: VdiAIOCB *acb = container_of(blockacb, VdiAIOCB, common);
1.1 root 496: logout("\n");
497: if (acb->hd_aiocb) {
498: bdrv_aio_cancel(acb->hd_aiocb);
499: }
500: qemu_aio_release(acb);
501: }
502:
503: static AIOPool vdi_aio_pool = {
504: .aiocb_size = sizeof(VdiAIOCB),
505: .cancel = vdi_aio_cancel,
506: };
507:
508: static VdiAIOCB *vdi_aio_setup(BlockDriverState *bs, int64_t sector_num,
509: QEMUIOVector *qiov, int nb_sectors,
510: BlockDriverCompletionFunc *cb, void *opaque, int is_write)
511: {
512: VdiAIOCB *acb;
513:
514: logout("%p, %" PRId64 ", %p, %d, %p, %p, %d\n",
515: bs, sector_num, qiov, nb_sectors, cb, opaque, is_write);
516:
517: acb = qemu_aio_get(&vdi_aio_pool, bs, cb, opaque);
518: if (acb) {
519: acb->hd_aiocb = NULL;
520: acb->sector_num = sector_num;
521: acb->qiov = qiov;
1.1.1.5 root 522: acb->is_write = is_write;
523:
1.1 root 524: if (qiov->niov > 1) {
525: acb->buf = qemu_blockalign(bs, qiov->size);
526: acb->orig_buf = acb->buf;
527: if (is_write) {
528: qemu_iovec_to_buffer(qiov, acb->buf);
529: }
530: } else {
531: acb->buf = (uint8_t *)qiov->iov->iov_base;
532: }
533: acb->nb_sectors = nb_sectors;
534: acb->n_sectors = 0;
535: acb->bmap_first = VDI_UNALLOCATED;
536: acb->bmap_last = VDI_UNALLOCATED;
537: acb->block_buffer = NULL;
538: acb->header_modified = 0;
539: }
540: return acb;
541: }
542:
543: static int vdi_schedule_bh(QEMUBHFunc *cb, VdiAIOCB *acb)
544: {
545: logout("\n");
546:
547: if (acb->bh) {
548: return -EIO;
549: }
550:
551: acb->bh = qemu_bh_new(cb, acb);
552: if (!acb->bh) {
553: return -EIO;
554: }
555:
556: qemu_bh_schedule(acb->bh);
557:
558: return 0;
559: }
560:
561: static void vdi_aio_read_cb(void *opaque, int ret);
1.1.1.5 root 562: static void vdi_aio_write_cb(void *opaque, int ret);
1.1 root 563:
1.1.1.5 root 564: static void vdi_aio_rw_bh(void *opaque)
1.1 root 565: {
566: VdiAIOCB *acb = opaque;
567: logout("\n");
568: qemu_bh_delete(acb->bh);
569: acb->bh = NULL;
1.1.1.5 root 570:
571: if (acb->is_write) {
572: vdi_aio_write_cb(opaque, 0);
573: } else {
574: vdi_aio_read_cb(opaque, 0);
575: }
1.1 root 576: }
577:
578: static void vdi_aio_read_cb(void *opaque, int ret)
579: {
580: VdiAIOCB *acb = opaque;
581: BlockDriverState *bs = acb->common.bs;
582: BDRVVdiState *s = bs->opaque;
583: uint32_t bmap_entry;
584: uint32_t block_index;
585: uint32_t sector_in_block;
586: uint32_t n_sectors;
587:
588: logout("%u sectors read\n", acb->n_sectors);
589:
590: acb->hd_aiocb = NULL;
591:
592: if (ret < 0) {
593: goto done;
594: }
595:
596: acb->nb_sectors -= acb->n_sectors;
597:
598: if (acb->nb_sectors == 0) {
599: /* request completed */
600: ret = 0;
601: goto done;
602: }
603:
604: acb->sector_num += acb->n_sectors;
605: acb->buf += acb->n_sectors * SECTOR_SIZE;
606:
607: block_index = acb->sector_num / s->block_sectors;
608: sector_in_block = acb->sector_num % s->block_sectors;
609: n_sectors = s->block_sectors - sector_in_block;
610: if (n_sectors > acb->nb_sectors) {
611: n_sectors = acb->nb_sectors;
612: }
613:
614: logout("will read %u sectors starting at sector %" PRIu64 "\n",
615: n_sectors, acb->sector_num);
616:
617: /* prepare next AIO request */
618: acb->n_sectors = n_sectors;
619: bmap_entry = le32_to_cpu(s->bmap[block_index]);
1.1.1.6 ! root 620: if (!VDI_IS_ALLOCATED(bmap_entry)) {
1.1 root 621: /* Block not allocated, return zeros, no need to wait. */
622: memset(acb->buf, 0, n_sectors * SECTOR_SIZE);
1.1.1.5 root 623: ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
1.1 root 624: if (ret < 0) {
625: goto done;
626: }
627: } else {
628: uint64_t offset = s->header.offset_data / SECTOR_SIZE +
629: (uint64_t)bmap_entry * s->block_sectors +
630: sector_in_block;
631: acb->hd_iov.iov_base = (void *)acb->buf;
632: acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
633: qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
1.1.1.3 root 634: acb->hd_aiocb = bdrv_aio_readv(bs->file, offset, &acb->hd_qiov,
1.1 root 635: n_sectors, vdi_aio_read_cb, acb);
636: if (acb->hd_aiocb == NULL) {
1.1.1.5 root 637: ret = -EIO;
1.1 root 638: goto done;
639: }
640: }
641: return;
642: done:
643: if (acb->qiov->niov > 1) {
644: qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
645: qemu_vfree(acb->orig_buf);
646: }
647: acb->common.cb(acb->common.opaque, ret);
648: qemu_aio_release(acb);
649: }
650:
651: static BlockDriverAIOCB *vdi_aio_readv(BlockDriverState *bs,
652: int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
653: BlockDriverCompletionFunc *cb, void *opaque)
654: {
655: VdiAIOCB *acb;
1.1.1.5 root 656: int ret;
657:
1.1 root 658: logout("\n");
659: acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
660: if (!acb) {
661: return NULL;
662: }
1.1.1.5 root 663:
664: ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
665: if (ret < 0) {
666: if (acb->qiov->niov > 1) {
667: qemu_vfree(acb->orig_buf);
668: }
669: qemu_aio_release(acb);
670: return NULL;
671: }
672:
1.1 root 673: return &acb->common;
674: }
675:
676: static void vdi_aio_write_cb(void *opaque, int ret)
677: {
678: VdiAIOCB *acb = opaque;
679: BlockDriverState *bs = acb->common.bs;
680: BDRVVdiState *s = bs->opaque;
681: uint32_t bmap_entry;
682: uint32_t block_index;
683: uint32_t sector_in_block;
684: uint32_t n_sectors;
685:
686: acb->hd_aiocb = NULL;
687:
688: if (ret < 0) {
689: goto done;
690: }
691:
692: acb->nb_sectors -= acb->n_sectors;
693: acb->sector_num += acb->n_sectors;
694: acb->buf += acb->n_sectors * SECTOR_SIZE;
695:
696: if (acb->nb_sectors == 0) {
697: logout("finished data write\n");
698: acb->n_sectors = 0;
699: if (acb->header_modified) {
700: VdiHeader *header = acb->block_buffer;
701: logout("now writing modified header\n");
1.1.1.6 ! root 702: assert(VDI_IS_ALLOCATED(acb->bmap_first));
1.1 root 703: *header = s->header;
704: vdi_header_to_le(header);
705: acb->header_modified = 0;
706: acb->hd_iov.iov_base = acb->block_buffer;
707: acb->hd_iov.iov_len = SECTOR_SIZE;
708: qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
1.1.1.3 root 709: acb->hd_aiocb = bdrv_aio_writev(bs->file, 0, &acb->hd_qiov, 1,
1.1 root 710: vdi_aio_write_cb, acb);
711: if (acb->hd_aiocb == NULL) {
1.1.1.5 root 712: ret = -EIO;
1.1 root 713: goto done;
714: }
715: return;
1.1.1.6 ! root 716: } else if (VDI_IS_ALLOCATED(acb->bmap_first)) {
1.1 root 717: /* One or more new blocks were allocated. */
718: uint64_t offset;
719: uint32_t bmap_first;
720: uint32_t bmap_last;
1.1.1.6 ! root 721: g_free(acb->block_buffer);
1.1 root 722: acb->block_buffer = NULL;
723: bmap_first = acb->bmap_first;
724: bmap_last = acb->bmap_last;
725: logout("now writing modified block map entry %u...%u\n",
726: bmap_first, bmap_last);
727: /* Write modified sectors from block map. */
728: bmap_first /= (SECTOR_SIZE / sizeof(uint32_t));
729: bmap_last /= (SECTOR_SIZE / sizeof(uint32_t));
730: n_sectors = bmap_last - bmap_first + 1;
731: offset = s->bmap_sector + bmap_first;
732: acb->bmap_first = VDI_UNALLOCATED;
733: acb->hd_iov.iov_base = (void *)((uint8_t *)&s->bmap[0] +
734: bmap_first * SECTOR_SIZE);
735: acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
736: qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
737: logout("will write %u block map sectors starting from entry %u\n",
738: n_sectors, bmap_first);
1.1.1.3 root 739: acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov,
1.1 root 740: n_sectors, vdi_aio_write_cb, acb);
741: if (acb->hd_aiocb == NULL) {
1.1.1.5 root 742: ret = -EIO;
1.1 root 743: goto done;
744: }
745: return;
746: }
747: ret = 0;
748: goto done;
749: }
750:
751: logout("%u sectors written\n", acb->n_sectors);
752:
753: block_index = acb->sector_num / s->block_sectors;
754: sector_in_block = acb->sector_num % s->block_sectors;
755: n_sectors = s->block_sectors - sector_in_block;
756: if (n_sectors > acb->nb_sectors) {
757: n_sectors = acb->nb_sectors;
758: }
759:
760: logout("will write %u sectors starting at sector %" PRIu64 "\n",
761: n_sectors, acb->sector_num);
762:
763: /* prepare next AIO request */
764: acb->n_sectors = n_sectors;
765: bmap_entry = le32_to_cpu(s->bmap[block_index]);
1.1.1.6 ! root 766: if (!VDI_IS_ALLOCATED(bmap_entry)) {
1.1 root 767: /* Allocate new block and write to it. */
768: uint64_t offset;
769: uint8_t *block;
770: bmap_entry = s->header.blocks_allocated;
771: s->bmap[block_index] = cpu_to_le32(bmap_entry);
772: s->header.blocks_allocated++;
773: offset = s->header.offset_data / SECTOR_SIZE +
774: (uint64_t)bmap_entry * s->block_sectors;
775: block = acb->block_buffer;
776: if (block == NULL) {
1.1.1.6 ! root 777: block = g_malloc0(s->block_size);
1.1 root 778: acb->block_buffer = block;
779: acb->bmap_first = block_index;
780: assert(!acb->header_modified);
781: acb->header_modified = 1;
782: }
783: acb->bmap_last = block_index;
784: memcpy(block + sector_in_block * SECTOR_SIZE,
785: acb->buf, n_sectors * SECTOR_SIZE);
786: acb->hd_iov.iov_base = (void *)block;
787: acb->hd_iov.iov_len = s->block_size;
788: qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
1.1.1.3 root 789: acb->hd_aiocb = bdrv_aio_writev(bs->file, offset,
1.1 root 790: &acb->hd_qiov, s->block_sectors,
791: vdi_aio_write_cb, acb);
792: if (acb->hd_aiocb == NULL) {
1.1.1.5 root 793: ret = -EIO;
1.1 root 794: goto done;
795: }
796: } else {
797: uint64_t offset = s->header.offset_data / SECTOR_SIZE +
798: (uint64_t)bmap_entry * s->block_sectors +
799: sector_in_block;
800: acb->hd_iov.iov_base = (void *)acb->buf;
801: acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
802: qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
1.1.1.3 root 803: acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov,
1.1 root 804: n_sectors, vdi_aio_write_cb, acb);
805: if (acb->hd_aiocb == NULL) {
1.1.1.5 root 806: ret = -EIO;
1.1 root 807: goto done;
808: }
809: }
810:
811: return;
812:
813: done:
814: if (acb->qiov->niov > 1) {
815: qemu_vfree(acb->orig_buf);
816: }
817: acb->common.cb(acb->common.opaque, ret);
818: qemu_aio_release(acb);
819: }
820:
821: static BlockDriverAIOCB *vdi_aio_writev(BlockDriverState *bs,
822: int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
823: BlockDriverCompletionFunc *cb, void *opaque)
824: {
825: VdiAIOCB *acb;
1.1.1.5 root 826: int ret;
827:
1.1 root 828: logout("\n");
829: acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
830: if (!acb) {
831: return NULL;
832: }
1.1.1.5 root 833:
834: ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
835: if (ret < 0) {
836: if (acb->qiov->niov > 1) {
837: qemu_vfree(acb->orig_buf);
838: }
839: qemu_aio_release(acb);
840: return NULL;
841: }
842:
1.1 root 843: return &acb->common;
844: }
845:
846: static int vdi_create(const char *filename, QEMUOptionParameter *options)
847: {
848: int fd;
849: int result = 0;
850: uint64_t bytes = 0;
851: uint32_t blocks;
1.1.1.5 root 852: size_t block_size = DEFAULT_CLUSTER_SIZE;
1.1 root 853: uint32_t image_type = VDI_TYPE_DYNAMIC;
854: VdiHeader header;
855: size_t i;
856: size_t bmap_size;
857: uint32_t *bmap;
858:
859: logout("\n");
860:
861: /* Read out options. */
862: while (options && options->name) {
863: if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
864: bytes = options->value.n;
865: #if defined(CONFIG_VDI_BLOCK_SIZE)
866: } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
867: if (options->value.n) {
868: /* TODO: Additional checks (SECTOR_SIZE * 2^n, ...). */
869: block_size = options->value.n;
870: }
871: #endif
872: #if defined(CONFIG_VDI_STATIC_IMAGE)
873: } else if (!strcmp(options->name, BLOCK_OPT_STATIC)) {
874: if (options->value.n) {
875: image_type = VDI_TYPE_STATIC;
876: }
877: #endif
878: }
879: options++;
880: }
881:
882: fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
883: 0644);
884: if (fd < 0) {
885: return -errno;
886: }
887:
1.1.1.2 root 888: /* We need enough blocks to store the given disk size,
889: so always round up. */
890: blocks = (bytes + block_size - 1) / block_size;
891:
1.1 root 892: bmap_size = blocks * sizeof(uint32_t);
893: bmap_size = ((bmap_size + SECTOR_SIZE - 1) & ~(SECTOR_SIZE -1));
894:
895: memset(&header, 0, sizeof(header));
896: pstrcpy(header.text, sizeof(header.text), VDI_TEXT);
897: header.signature = VDI_SIGNATURE;
898: header.version = VDI_VERSION_1_1;
899: header.header_size = 0x180;
900: header.image_type = image_type;
901: header.offset_bmap = 0x200;
902: header.offset_data = 0x200 + bmap_size;
903: header.sector_size = SECTOR_SIZE;
904: header.disk_size = bytes;
905: header.block_size = block_size;
906: header.blocks_in_image = blocks;
907: if (image_type == VDI_TYPE_STATIC) {
908: header.blocks_allocated = blocks;
909: }
910: uuid_generate(header.uuid_image);
911: uuid_generate(header.uuid_last_snap);
912: /* There is no need to set header.uuid_link or header.uuid_parent here. */
913: #if defined(CONFIG_VDI_DEBUG)
914: vdi_header_print(&header);
915: #endif
916: vdi_header_to_le(&header);
917: if (write(fd, &header, sizeof(header)) < 0) {
918: result = -errno;
919: }
920:
1.1.1.3 root 921: bmap = NULL;
922: if (bmap_size > 0) {
1.1.1.6 ! root 923: bmap = (uint32_t *)g_malloc0(bmap_size);
1.1.1.3 root 924: }
1.1 root 925: for (i = 0; i < blocks; i++) {
926: if (image_type == VDI_TYPE_STATIC) {
927: bmap[i] = i;
928: } else {
929: bmap[i] = VDI_UNALLOCATED;
930: }
931: }
932: if (write(fd, bmap, bmap_size) < 0) {
933: result = -errno;
934: }
1.1.1.6 ! root 935: g_free(bmap);
1.1 root 936: if (image_type == VDI_TYPE_STATIC) {
937: if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) {
938: result = -errno;
939: }
940: }
941:
942: if (close(fd) < 0) {
943: result = -errno;
944: }
945:
946: return result;
947: }
948:
949: static void vdi_close(BlockDriverState *bs)
950: {
1.1.1.6 ! root 951: BDRVVdiState *s = bs->opaque;
! 952:
! 953: g_free(s->bmap);
! 954:
! 955: migrate_del_blocker(s->migration_blocker);
! 956: error_free(s->migration_blocker);
1.1 root 957: }
958:
1.1.1.6 ! root 959: static coroutine_fn int vdi_co_flush(BlockDriverState *bs)
1.1 root 960: {
961: logout("\n");
1.1.1.6 ! root 962: return bdrv_co_flush(bs->file);
1.1 root 963: }
964:
965:
966: static QEMUOptionParameter vdi_create_options[] = {
967: {
968: .name = BLOCK_OPT_SIZE,
969: .type = OPT_SIZE,
970: .help = "Virtual disk size"
971: },
972: #if defined(CONFIG_VDI_BLOCK_SIZE)
973: {
974: .name = BLOCK_OPT_CLUSTER_SIZE,
975: .type = OPT_SIZE,
1.1.1.5 root 976: .help = "VDI cluster (block) size",
977: .value = { .n = DEFAULT_CLUSTER_SIZE },
1.1 root 978: },
979: #endif
980: #if defined(CONFIG_VDI_STATIC_IMAGE)
981: {
982: .name = BLOCK_OPT_STATIC,
983: .type = OPT_FLAG,
984: .help = "VDI static (pre-allocated) image"
985: },
986: #endif
987: /* TODO: An additional option to set UUID values might be useful. */
988: { NULL }
989: };
990:
991: static BlockDriver bdrv_vdi = {
992: .format_name = "vdi",
993: .instance_size = sizeof(BDRVVdiState),
994: .bdrv_probe = vdi_probe,
995: .bdrv_open = vdi_open,
996: .bdrv_close = vdi_close,
997: .bdrv_create = vdi_create,
1.1.1.6 ! root 998: .bdrv_co_flush_to_disk = vdi_co_flush,
1.1 root 999: .bdrv_is_allocated = vdi_is_allocated,
1000: .bdrv_make_empty = vdi_make_empty,
1001:
1002: .bdrv_aio_readv = vdi_aio_readv,
1003: #if defined(CONFIG_VDI_WRITE)
1004: .bdrv_aio_writev = vdi_aio_writev,
1005: #endif
1006:
1007: .bdrv_get_info = vdi_get_info,
1008:
1009: .create_options = vdi_create_options,
1010: .bdrv_check = vdi_check,
1011: };
1012:
1013: static void bdrv_vdi_init(void)
1014: {
1015: logout("\n");
1016: bdrv_register(&bdrv_vdi);
1017: }
1018:
1019: block_init(bdrv_vdi_init);
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.