--- qemu/block.c 2018/04/24 16:47:23 1.1.1.6 +++ qemu/block.c 2018/04/24 16:50:42 1.1.1.7 @@ -21,17 +21,20 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "config-host.h" +#ifdef _BSD +/* include native header before sys-queue.h */ +#include +#endif + #include "qemu-common.h" -#ifndef QEMU_IMG #include "console.h" -#endif #include "block_int.h" #ifdef _BSD #include #include #include -#include #include #endif @@ -57,6 +60,7 @@ static int bdrv_write_em(BlockDriverStat const uint8_t *buf, int nb_sectors); BlockDriverState *bdrv_first; + static BlockDriver *first_drv; int path_is_absolute(const char *path) @@ -147,8 +151,6 @@ BlockDriverState *bdrv_new(const char *d BlockDriverState **pbs, *bs; bs = qemu_mallocz(sizeof(BlockDriverState)); - if(!bs) - return NULL; pstrcpy(bs->device_name, sizeof(bs->device_name), device_name); if (device_name[0] != '\0') { /* insert at the end */ @@ -191,8 +193,12 @@ void get_tmp_filename(char *filename, in void get_tmp_filename(char *filename, int size) { int fd; + const char *tmpdir; /* XXX: race condition possible */ - pstrcpy(filename, size, "/tmp/vl.XXXXXX"); + tmpdir = getenv("TMPDIR"); + if (!tmpdir) + tmpdir = "/tmp"; + snprintf(filename, size, "%s/vl.XXXXXX", tmpdir); fd = mkstemp(filename); close(fd); } @@ -312,6 +318,7 @@ int bdrv_file_open(BlockDriverState **pb bdrv_delete(bs); return ret; } + bs->growable = 1; *pbs = bs; return 0; } @@ -335,6 +342,7 @@ int bdrv_open2(BlockDriverState *bs, con if (flags & BDRV_O_SNAPSHOT) { BlockDriverState *bs1; int64_t total_size; + int is_protocol = 0; /* if snapshot, we create a temporary backing file and open it instead of opening 'filename' directly */ @@ -349,10 +357,21 @@ int bdrv_open2(BlockDriverState *bs, con return -1; } total_size = bdrv_getlength(bs1) >> SECTOR_BITS; + + if (bs1->drv && bs1->drv->protocol_name) + is_protocol = 1; + bdrv_delete(bs1); get_tmp_filename(tmp_filename, sizeof(tmp_filename)); - realpath(filename, backing_filename); + + /* Real path is meaningless for protocols */ + if (is_protocol) + snprintf(backing_filename, sizeof(backing_filename), + "%s", filename); + else + realpath(filename, backing_filename); + if (bdrv_create(&bdrv_qcow2, tmp_filename, total_size, backing_filename, 0) < 0) { return -1; @@ -375,17 +394,15 @@ int bdrv_open2(BlockDriverState *bs, con } bs->drv = drv; bs->opaque = qemu_mallocz(drv->instance_size); - if (bs->opaque == NULL && drv->instance_size > 0) - return -1; /* Note: for compatibility, we open disk image files as RDWR, and RDONLY as fallback */ if (!(flags & BDRV_O_FILE)) - open_flags = BDRV_O_RDWR | (flags & BDRV_O_DIRECT); + open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK); else open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT); ret = drv->bdrv_open(bs, filename, open_flags); - if (ret == -EACCES && !(flags & BDRV_O_FILE)) { - ret = drv->bdrv_open(bs, filename, BDRV_O_RDONLY); + if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) { + ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR); bs->read_only = 1; } if (ret < 0) { @@ -412,7 +429,7 @@ int bdrv_open2(BlockDriverState *bs, con } path_combine(backing_filename, sizeof(backing_filename), filename, bs->backing_file); - if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0) + if (bdrv_open(bs->backing_hd, backing_filename, open_flags) < 0) goto fail; } @@ -448,7 +465,14 @@ void bdrv_close(BlockDriverState *bs) void bdrv_delete(BlockDriverState *bs) { - /* XXX: remove the driver list */ + BlockDriverState **pbs; + + pbs = &bdrv_first; + while (*pbs != bs && *pbs != NULL) + pbs = &(*pbs)->next; + if (*pbs == bs) + *pbs = bs->next; + bdrv_close(bs); qemu_free(bs); } @@ -496,6 +520,39 @@ int bdrv_commit(BlockDriverState *bs) return 0; } +static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, + size_t size) +{ + int64_t len; + + if (!bdrv_is_inserted(bs)) + return -ENOMEDIUM; + + if (bs->growable) + return 0; + + len = bdrv_getlength(bs); + + if ((offset + size) > len) + return -EIO; + + return 0; +} + +static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, + int nb_sectors) +{ + int64_t offset; + + /* Deal with byte accesses */ + if (sector_num < 0) + offset = -sector_num; + else + offset = sector_num * 512; + + return bdrv_check_byte_request(bs, offset, nb_sectors * 512); +} + /* return < 0 if error. See bdrv_write() for the return codes */ int bdrv_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors) @@ -504,15 +561,9 @@ int bdrv_read(BlockDriverState *bs, int6 if (!drv) return -ENOMEDIUM; + if (bdrv_check_request(bs, sector_num, nb_sectors)) + return -EIO; - if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) { - memcpy(buf, bs->boot_sector_data, 512); - sector_num++; - nb_sectors--; - buf += 512; - if (nb_sectors == 0) - return 0; - } if (drv->bdrv_pread) { int ret, len; len = nb_sectors * 512; @@ -545,25 +596,26 @@ int bdrv_write(BlockDriverState *bs, int return -ENOMEDIUM; if (bs->read_only) return -EACCES; - if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) { - memcpy(bs->boot_sector_data, buf, 512); - } + if (bdrv_check_request(bs, sector_num, nb_sectors)) + return -EIO; + if (drv->bdrv_pwrite) { - int ret, len; + int ret, len, count = 0; len = nb_sectors * 512; - ret = drv->bdrv_pwrite(bs, sector_num * 512, buf, len); - if (ret < 0) - return ret; - else if (ret != len) - return -EIO; - else { - bs->wr_bytes += (unsigned) len; - bs->wr_ops ++; - return 0; - } - } else { - return drv->bdrv_write(bs, sector_num, buf, nb_sectors); + do { + ret = drv->bdrv_pwrite(bs, sector_num * 512, buf, len - count); + if (ret < 0) { + printf("bdrv_write ret=%d\n", ret); + return ret; + } + count += ret; + buf += ret; + } while (count != len); + bs->wr_bytes += (unsigned) len; + bs->wr_ops ++; + return 0; } + return drv->bdrv_write(bs, sector_num, buf, nb_sectors); } static int bdrv_pread_em(BlockDriverState *bs, int64_t offset, @@ -668,6 +720,9 @@ int bdrv_pread(BlockDriverState *bs, int if (!drv) return -ENOMEDIUM; + if (bdrv_check_byte_request(bs, offset, count1)) + return -EIO; + if (!drv->bdrv_pread) return bdrv_pread_em(bs, offset, buf1, count1); return drv->bdrv_pread(bs, offset, buf1, count1); @@ -683,6 +738,9 @@ int bdrv_pwrite(BlockDriverState *bs, in if (!drv) return -ENOMEDIUM; + if (bdrv_check_byte_request(bs, offset, count1)) + return -EIO; + if (!drv->bdrv_pwrite) return bdrv_pwrite_em(bs, offset, buf1, count1); return drv->bdrv_pwrite(bs, offset, buf1, count1); @@ -728,14 +786,120 @@ void bdrv_get_geometry(BlockDriverState *nb_sectors_ptr = length; } -/* force a given boot sector. */ -void bdrv_set_boot_sector(BlockDriverState *bs, const uint8_t *data, int size) +struct partition { + uint8_t boot_ind; /* 0x80 - active */ + uint8_t head; /* starting head */ + uint8_t sector; /* starting sector */ + uint8_t cyl; /* starting cylinder */ + uint8_t sys_ind; /* What partition type */ + uint8_t end_head; /* end head */ + uint8_t end_sector; /* end sector */ + uint8_t end_cyl; /* end cylinder */ + uint32_t start_sect; /* starting sector counting from 0 */ + uint32_t nr_sects; /* nr of sectors in partition */ +} __attribute__((packed)); + +/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */ +static int guess_disk_lchs(BlockDriverState *bs, + int *pcylinders, int *pheads, int *psectors) +{ + uint8_t buf[512]; + int ret, i, heads, sectors, cylinders; + struct partition *p; + uint32_t nr_sects; + uint64_t nb_sectors; + + bdrv_get_geometry(bs, &nb_sectors); + + ret = bdrv_read(bs, 0, buf, 1); + if (ret < 0) + return -1; + /* test msdos magic */ + if (buf[510] != 0x55 || buf[511] != 0xaa) + return -1; + for(i = 0; i < 4; i++) { + p = ((struct partition *)(buf + 0x1be)) + i; + nr_sects = le32_to_cpu(p->nr_sects); + if (nr_sects && p->end_head) { + /* We make the assumption that the partition terminates on + a cylinder boundary */ + heads = p->end_head + 1; + sectors = p->end_sector & 63; + if (sectors == 0) + continue; + cylinders = nb_sectors / (heads * sectors); + if (cylinders < 1 || cylinders > 16383) + continue; + *pheads = heads; + *psectors = sectors; + *pcylinders = cylinders; +#if 0 + printf("guessed geometry: LCHS=%d %d %d\n", + cylinders, heads, sectors); +#endif + return 0; + } + } + return -1; +} + +void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs) { - bs->boot_sector_enabled = 1; - if (size > 512) - size = 512; - memcpy(bs->boot_sector_data, data, size); - memset(bs->boot_sector_data + size, 0, 512 - size); + int translation, lba_detected = 0; + int cylinders, heads, secs; + uint64_t nb_sectors; + + /* if a geometry hint is available, use it */ + bdrv_get_geometry(bs, &nb_sectors); + bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs); + translation = bdrv_get_translation_hint(bs); + if (cylinders != 0) { + *pcyls = cylinders; + *pheads = heads; + *psecs = secs; + } else { + if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) { + if (heads > 16) { + /* if heads > 16, it means that a BIOS LBA + translation was active, so the default + hardware geometry is OK */ + lba_detected = 1; + goto default_geometry; + } else { + *pcyls = cylinders; + *pheads = heads; + *psecs = secs; + /* disable any translation to be in sync with + the logical geometry */ + if (translation == BIOS_ATA_TRANSLATION_AUTO) { + bdrv_set_translation_hint(bs, + BIOS_ATA_TRANSLATION_NONE); + } + } + } else { + default_geometry: + /* if no geometry, use a standard physical disk geometry */ + cylinders = nb_sectors / (16 * 63); + + if (cylinders > 16383) + cylinders = 16383; + else if (cylinders < 2) + cylinders = 2; + *pcyls = cylinders; + *pheads = 16; + *psecs = 63; + if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) { + if ((*pcyls * *pheads) <= 131072) { + bdrv_set_translation_hint(bs, + BIOS_ATA_TRANSLATION_LARGE); + } else { + bdrv_set_translation_hint(bs, + BIOS_ATA_TRANSLATION_LBA); + } + } + } + bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs); + } } void bdrv_set_geometry_hint(BlockDriverState *bs, @@ -873,7 +1037,43 @@ void bdrv_flush(BlockDriverState *bs) bdrv_flush(bs->backing_hd); } -#ifndef QEMU_IMG +void bdrv_flush_all(void) +{ + BlockDriverState *bs; + + for (bs = bdrv_first; bs != NULL; bs = bs->next) + if (bs->drv && !bdrv_is_read_only(bs) && + (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) + bdrv_flush(bs); +} + +/* + * Returns true iff the specified sector is present in the disk image. Drivers + * not implementing the functionality are assumed to not support backing files, + * hence all their sectors are reported as allocated. + * + * 'pnum' is set to the number of sectors (including and immediately following + * the specified sector) that are known to be in the same + * allocated/unallocated state. + * + * 'nb_sectors' is the max value 'pnum' should be set to. + */ +int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, + int *pnum) +{ + int64_t n; + if (!bs->drv->bdrv_is_allocated) { + if (sector_num >= bs->total_sectors) { + *pnum = 0; + return 0; + } + n = bs->total_sectors - sector_num; + *pnum = (n < nb_sectors) ? (n) : (nb_sectors); + return 1; + } + return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum); +} + void bdrv_info(void) { BlockDriverState *bs; @@ -918,6 +1118,7 @@ void bdrv_info(void) void bdrv_info_stats (void) { BlockDriverState *bs; + BlockDriverInfo bdi; for (bs = bdrv_first; bs != NULL; bs = bs->next) { term_printf ("%s:" @@ -925,13 +1126,17 @@ void bdrv_info_stats (void) " wr_bytes=%" PRIu64 " rd_operations=%" PRIu64 " wr_operations=%" PRIu64 - "\n", + , bs->device_name, bs->rd_bytes, bs->wr_bytes, bs->rd_ops, bs->wr_ops); + if (bdrv_get_info(bs, &bdi) == 0) + term_printf(" high=%" PRId64 + " bytes_free=%" PRId64, + bdi.highest_alloc, bdi.num_free_bytes); + term_printf("\n"); } } -#endif void bdrv_get_backing_filename(BlockDriverState *bs, char *filename, int filename_size) @@ -1088,6 +1293,75 @@ char *bdrv_snapshot_dump(char *buf, int /**************************************************************/ /* async I/Os */ +typedef struct VectorTranslationState { + QEMUIOVector *iov; + uint8_t *bounce; + int is_write; + BlockDriverAIOCB *aiocb; + BlockDriverAIOCB *this_aiocb; +} VectorTranslationState; + +static void bdrv_aio_rw_vector_cb(void *opaque, int ret) +{ + VectorTranslationState *s = opaque; + + if (!s->is_write) { + qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size); + } + qemu_vfree(s->bounce); + s->this_aiocb->cb(s->this_aiocb->opaque, ret); + qemu_aio_release(s->this_aiocb); +} + +static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *iov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque, + int is_write) + +{ + VectorTranslationState *s = qemu_mallocz(sizeof(*s)); + BlockDriverAIOCB *aiocb = qemu_aio_get(bs, cb, opaque); + + s->this_aiocb = aiocb; + s->iov = iov; + s->bounce = qemu_memalign(512, nb_sectors * 512); + s->is_write = is_write; + if (is_write) { + qemu_iovec_to_buffer(s->iov, s->bounce); + s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors, + bdrv_aio_rw_vector_cb, s); + } else { + s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors, + bdrv_aio_rw_vector_cb, s); + } + return aiocb; +} + +BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + if (bdrv_check_request(bs, sector_num, nb_sectors)) + return NULL; + + return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors, + cb, opaque, 0); +} + +BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + if (bdrv_check_request(bs, sector_num, nb_sectors)) + return NULL; + + return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors, + cb, opaque, 1); +} + BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque) @@ -1097,14 +1371,8 @@ BlockDriverAIOCB *bdrv_aio_read(BlockDri if (!drv) return NULL; - - /* XXX: we assume that nb_sectors == 0 is suppored by the async read */ - if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) { - memcpy(buf, bs->boot_sector_data, 512); - sector_num++; - nb_sectors--; - buf += 512; - } + if (bdrv_check_request(bs, sector_num, nb_sectors)) + return NULL; ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque); @@ -1128,9 +1396,8 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDr return NULL; if (bs->read_only) return NULL; - if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) { - memcpy(bs->boot_sector_data, buf, 512); - } + if (bdrv_check_request(bs, sector_num, nb_sectors)) + return NULL; ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque); @@ -1147,6 +1414,11 @@ void bdrv_aio_cancel(BlockDriverAIOCB *a { BlockDriver *drv = acb->bs->drv; + if (acb->cb == bdrv_aio_rw_vector_cb) { + VectorTranslationState *s = acb->opaque; + acb = s->aiocb; + } + drv->bdrv_aio_cancel(acb); } @@ -1154,31 +1426,6 @@ void bdrv_aio_cancel(BlockDriverAIOCB *a /**************************************************************/ /* async block device emulation */ -#ifdef QEMU_IMG -static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs, - int64_t sector_num, uint8_t *buf, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - int ret; - ret = bdrv_read(bs, sector_num, buf, nb_sectors); - cb(opaque, ret); - return NULL; -} - -static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs, - int64_t sector_num, const uint8_t *buf, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - int ret; - ret = bdrv_write(bs, sector_num, buf, nb_sectors); - cb(opaque, ret); - return NULL; -} - -static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb) -{ -} -#else static void bdrv_aio_bh_cb(void *opaque) { BlockDriverAIOCBSync *acb = opaque; @@ -1224,7 +1471,6 @@ static void bdrv_aio_cancel_em(BlockDriv qemu_bh_cancel(acb->bh); qemu_aio_release(acb); } -#endif /* !QEMU_IMG */ /**************************************************************/ /* sync block device emulation */ @@ -1243,17 +1489,15 @@ static int bdrv_read_em(BlockDriverState BlockDriverAIOCB *acb; async_ret = NOT_DONE; - qemu_aio_wait_start(); acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors, bdrv_rw_em_cb, &async_ret); - if (acb == NULL) { - qemu_aio_wait_end(); + if (acb == NULL) return -1; - } + while (async_ret == NOT_DONE) { qemu_aio_wait(); } - qemu_aio_wait_end(); + return async_ret; } @@ -1264,17 +1508,13 @@ static int bdrv_write_em(BlockDriverStat BlockDriverAIOCB *acb; async_ret = NOT_DONE; - qemu_aio_wait_start(); acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors, bdrv_rw_em_cb, &async_ret); - if (acb == NULL) { - qemu_aio_wait_end(); + if (acb == NULL) return -1; - } while (async_ret == NOT_DONE) { qemu_aio_wait(); } - qemu_aio_wait_end(); return async_ret; } @@ -1294,6 +1534,7 @@ void bdrv_init(void) bdrv_register(&bdrv_vvfat); bdrv_register(&bdrv_qcow2); bdrv_register(&bdrv_parallels); + bdrv_register(&bdrv_nbd); } void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb, @@ -1308,8 +1549,6 @@ void *qemu_aio_get(BlockDriverState *bs, drv->free_aiocb = acb->next; } else { acb = qemu_mallocz(drv->aiocb_size); - if (!acb) - return NULL; } acb->bs = bs; acb->cb = cb;