|
|
1.1 ! root 1: /* ! 2: * QEMU Enhanced Disk Format ! 3: * ! 4: * Copyright IBM, Corp. 2010 ! 5: * ! 6: * Authors: ! 7: * Stefan Hajnoczi <[email protected]> ! 8: * Anthony Liguori <[email protected]> ! 9: * ! 10: * This work is licensed under the terms of the GNU LGPL, version 2 or later. ! 11: * See the COPYING.LIB file in the top-level directory. ! 12: * ! 13: */ ! 14: ! 15: #include "trace.h" ! 16: #include "qed.h" ! 17: #include "qerror.h" ! 18: ! 19: static void qed_aio_cancel(BlockDriverAIOCB *blockacb) ! 20: { ! 21: QEDAIOCB *acb = (QEDAIOCB *)blockacb; ! 22: bool finished = false; ! 23: ! 24: /* Wait for the request to finish */ ! 25: acb->finished = &finished; ! 26: while (!finished) { ! 27: qemu_aio_wait(); ! 28: } ! 29: } ! 30: ! 31: static AIOPool qed_aio_pool = { ! 32: .aiocb_size = sizeof(QEDAIOCB), ! 33: .cancel = qed_aio_cancel, ! 34: }; ! 35: ! 36: static int bdrv_qed_probe(const uint8_t *buf, int buf_size, ! 37: const char *filename) ! 38: { ! 39: const QEDHeader *header = (const QEDHeader *)buf; ! 40: ! 41: if (buf_size < sizeof(*header)) { ! 42: return 0; ! 43: } ! 44: if (le32_to_cpu(header->magic) != QED_MAGIC) { ! 45: return 0; ! 46: } ! 47: return 100; ! 48: } ! 49: ! 50: /** ! 51: * Check whether an image format is raw ! 52: * ! 53: * @fmt: Backing file format, may be NULL ! 54: */ ! 55: static bool qed_fmt_is_raw(const char *fmt) ! 56: { ! 57: return fmt && strcmp(fmt, "raw") == 0; ! 58: } ! 59: ! 60: static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu) ! 61: { ! 62: cpu->magic = le32_to_cpu(le->magic); ! 63: cpu->cluster_size = le32_to_cpu(le->cluster_size); ! 64: cpu->table_size = le32_to_cpu(le->table_size); ! 65: cpu->header_size = le32_to_cpu(le->header_size); ! 66: cpu->features = le64_to_cpu(le->features); ! 67: cpu->compat_features = le64_to_cpu(le->compat_features); ! 68: cpu->autoclear_features = le64_to_cpu(le->autoclear_features); ! 69: cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset); ! 70: cpu->image_size = le64_to_cpu(le->image_size); ! 71: cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset); ! 72: cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size); ! 73: } ! 74: ! 75: static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le) ! 76: { ! 77: le->magic = cpu_to_le32(cpu->magic); ! 78: le->cluster_size = cpu_to_le32(cpu->cluster_size); ! 79: le->table_size = cpu_to_le32(cpu->table_size); ! 80: le->header_size = cpu_to_le32(cpu->header_size); ! 81: le->features = cpu_to_le64(cpu->features); ! 82: le->compat_features = cpu_to_le64(cpu->compat_features); ! 83: le->autoclear_features = cpu_to_le64(cpu->autoclear_features); ! 84: le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset); ! 85: le->image_size = cpu_to_le64(cpu->image_size); ! 86: le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset); ! 87: le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size); ! 88: } ! 89: ! 90: static int qed_write_header_sync(BDRVQEDState *s) ! 91: { ! 92: QEDHeader le; ! 93: int ret; ! 94: ! 95: qed_header_cpu_to_le(&s->header, &le); ! 96: ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le)); ! 97: if (ret != sizeof(le)) { ! 98: return ret; ! 99: } ! 100: return 0; ! 101: } ! 102: ! 103: typedef struct { ! 104: GenericCB gencb; ! 105: BDRVQEDState *s; ! 106: struct iovec iov; ! 107: QEMUIOVector qiov; ! 108: int nsectors; ! 109: uint8_t *buf; ! 110: } QEDWriteHeaderCB; ! 111: ! 112: static void qed_write_header_cb(void *opaque, int ret) ! 113: { ! 114: QEDWriteHeaderCB *write_header_cb = opaque; ! 115: ! 116: qemu_vfree(write_header_cb->buf); ! 117: gencb_complete(write_header_cb, ret); ! 118: } ! 119: ! 120: static void qed_write_header_read_cb(void *opaque, int ret) ! 121: { ! 122: QEDWriteHeaderCB *write_header_cb = opaque; ! 123: BDRVQEDState *s = write_header_cb->s; ! 124: BlockDriverAIOCB *acb; ! 125: ! 126: if (ret) { ! 127: qed_write_header_cb(write_header_cb, ret); ! 128: return; ! 129: } ! 130: ! 131: /* Update header */ ! 132: qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf); ! 133: ! 134: acb = bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov, ! 135: write_header_cb->nsectors, qed_write_header_cb, ! 136: write_header_cb); ! 137: if (!acb) { ! 138: qed_write_header_cb(write_header_cb, -EIO); ! 139: } ! 140: } ! 141: ! 142: /** ! 143: * Update header in-place (does not rewrite backing filename or other strings) ! 144: * ! 145: * This function only updates known header fields in-place and does not affect ! 146: * extra data after the QED header. ! 147: */ ! 148: static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb, ! 149: void *opaque) ! 150: { ! 151: /* We must write full sectors for O_DIRECT but cannot necessarily generate ! 152: * the data following the header if an unrecognized compat feature is ! 153: * active. Therefore, first read the sectors containing the header, update ! 154: * them, and write back. ! 155: */ ! 156: ! 157: BlockDriverAIOCB *acb; ! 158: int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) / ! 159: BDRV_SECTOR_SIZE; ! 160: size_t len = nsectors * BDRV_SECTOR_SIZE; ! 161: QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb), ! 162: cb, opaque); ! 163: ! 164: write_header_cb->s = s; ! 165: write_header_cb->nsectors = nsectors; ! 166: write_header_cb->buf = qemu_blockalign(s->bs, len); ! 167: write_header_cb->iov.iov_base = write_header_cb->buf; ! 168: write_header_cb->iov.iov_len = len; ! 169: qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1); ! 170: ! 171: acb = bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors, ! 172: qed_write_header_read_cb, write_header_cb); ! 173: if (!acb) { ! 174: qed_write_header_cb(write_header_cb, -EIO); ! 175: } ! 176: } ! 177: ! 178: static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size) ! 179: { ! 180: uint64_t table_entries; ! 181: uint64_t l2_size; ! 182: ! 183: table_entries = (table_size * cluster_size) / sizeof(uint64_t); ! 184: l2_size = table_entries * cluster_size; ! 185: ! 186: return l2_size * table_entries; ! 187: } ! 188: ! 189: static bool qed_is_cluster_size_valid(uint32_t cluster_size) ! 190: { ! 191: if (cluster_size < QED_MIN_CLUSTER_SIZE || ! 192: cluster_size > QED_MAX_CLUSTER_SIZE) { ! 193: return false; ! 194: } ! 195: if (cluster_size & (cluster_size - 1)) { ! 196: return false; /* not power of 2 */ ! 197: } ! 198: return true; ! 199: } ! 200: ! 201: static bool qed_is_table_size_valid(uint32_t table_size) ! 202: { ! 203: if (table_size < QED_MIN_TABLE_SIZE || ! 204: table_size > QED_MAX_TABLE_SIZE) { ! 205: return false; ! 206: } ! 207: if (table_size & (table_size - 1)) { ! 208: return false; /* not power of 2 */ ! 209: } ! 210: return true; ! 211: } ! 212: ! 213: static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size, ! 214: uint32_t table_size) ! 215: { ! 216: if (image_size % BDRV_SECTOR_SIZE != 0) { ! 217: return false; /* not multiple of sector size */ ! 218: } ! 219: if (image_size > qed_max_image_size(cluster_size, table_size)) { ! 220: return false; /* image is too large */ ! 221: } ! 222: return true; ! 223: } ! 224: ! 225: /** ! 226: * Read a string of known length from the image file ! 227: * ! 228: * @file: Image file ! 229: * @offset: File offset to start of string, in bytes ! 230: * @n: String length in bytes ! 231: * @buf: Destination buffer ! 232: * @buflen: Destination buffer length in bytes ! 233: * @ret: 0 on success, -errno on failure ! 234: * ! 235: * The string is NUL-terminated. ! 236: */ ! 237: static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n, ! 238: char *buf, size_t buflen) ! 239: { ! 240: int ret; ! 241: if (n >= buflen) { ! 242: return -EINVAL; ! 243: } ! 244: ret = bdrv_pread(file, offset, buf, n); ! 245: if (ret < 0) { ! 246: return ret; ! 247: } ! 248: buf[n] = '\0'; ! 249: return 0; ! 250: } ! 251: ! 252: /** ! 253: * Allocate new clusters ! 254: * ! 255: * @s: QED state ! 256: * @n: Number of contiguous clusters to allocate ! 257: * @ret: Offset of first allocated cluster ! 258: * ! 259: * This function only produces the offset where the new clusters should be ! 260: * written. It updates BDRVQEDState but does not make any changes to the image ! 261: * file. ! 262: */ ! 263: static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n) ! 264: { ! 265: uint64_t offset = s->file_size; ! 266: s->file_size += n * s->header.cluster_size; ! 267: return offset; ! 268: } ! 269: ! 270: QEDTable *qed_alloc_table(BDRVQEDState *s) ! 271: { ! 272: /* Honor O_DIRECT memory alignment requirements */ ! 273: return qemu_blockalign(s->bs, ! 274: s->header.cluster_size * s->header.table_size); ! 275: } ! 276: ! 277: /** ! 278: * Allocate a new zeroed L2 table ! 279: */ ! 280: static CachedL2Table *qed_new_l2_table(BDRVQEDState *s) ! 281: { ! 282: CachedL2Table *l2_table = qed_alloc_l2_cache_entry(&s->l2_cache); ! 283: ! 284: l2_table->table = qed_alloc_table(s); ! 285: l2_table->offset = qed_alloc_clusters(s, s->header.table_size); ! 286: ! 287: memset(l2_table->table->offsets, 0, ! 288: s->header.cluster_size * s->header.table_size); ! 289: return l2_table; ! 290: } ! 291: ! 292: static void qed_aio_next_io(void *opaque, int ret); ! 293: ! 294: static int bdrv_qed_open(BlockDriverState *bs, int flags) ! 295: { ! 296: BDRVQEDState *s = bs->opaque; ! 297: QEDHeader le_header; ! 298: int64_t file_size; ! 299: int ret; ! 300: ! 301: s->bs = bs; ! 302: QSIMPLEQ_INIT(&s->allocating_write_reqs); ! 303: ! 304: ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header)); ! 305: if (ret < 0) { ! 306: return ret; ! 307: } ! 308: ret = 0; /* ret should always be 0 or -errno */ ! 309: qed_header_le_to_cpu(&le_header, &s->header); ! 310: ! 311: if (s->header.magic != QED_MAGIC) { ! 312: return -EINVAL; ! 313: } ! 314: if (s->header.features & ~QED_FEATURE_MASK) { ! 315: /* image uses unsupported feature bits */ ! 316: char buf[64]; ! 317: snprintf(buf, sizeof(buf), "%" PRIx64, ! 318: s->header.features & ~QED_FEATURE_MASK); ! 319: qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, ! 320: bs->device_name, "QED", buf); ! 321: return -ENOTSUP; ! 322: } ! 323: if (!qed_is_cluster_size_valid(s->header.cluster_size)) { ! 324: return -EINVAL; ! 325: } ! 326: ! 327: /* Round down file size to the last cluster */ ! 328: file_size = bdrv_getlength(bs->file); ! 329: if (file_size < 0) { ! 330: return file_size; ! 331: } ! 332: s->file_size = qed_start_of_cluster(s, file_size); ! 333: ! 334: if (!qed_is_table_size_valid(s->header.table_size)) { ! 335: return -EINVAL; ! 336: } ! 337: if (!qed_is_image_size_valid(s->header.image_size, ! 338: s->header.cluster_size, ! 339: s->header.table_size)) { ! 340: return -EINVAL; ! 341: } ! 342: if (!qed_check_table_offset(s, s->header.l1_table_offset)) { ! 343: return -EINVAL; ! 344: } ! 345: ! 346: s->table_nelems = (s->header.cluster_size * s->header.table_size) / ! 347: sizeof(uint64_t); ! 348: s->l2_shift = ffs(s->header.cluster_size) - 1; ! 349: s->l2_mask = s->table_nelems - 1; ! 350: s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1; ! 351: ! 352: if ((s->header.features & QED_F_BACKING_FILE)) { ! 353: if ((uint64_t)s->header.backing_filename_offset + ! 354: s->header.backing_filename_size > ! 355: s->header.cluster_size * s->header.header_size) { ! 356: return -EINVAL; ! 357: } ! 358: ! 359: ret = qed_read_string(bs->file, s->header.backing_filename_offset, ! 360: s->header.backing_filename_size, bs->backing_file, ! 361: sizeof(bs->backing_file)); ! 362: if (ret < 0) { ! 363: return ret; ! 364: } ! 365: ! 366: if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) { ! 367: pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw"); ! 368: } ! 369: } ! 370: ! 371: /* Reset unknown autoclear feature bits. This is a backwards ! 372: * compatibility mechanism that allows images to be opened by older ! 373: * programs, which "knock out" unknown feature bits. When an image is ! 374: * opened by a newer program again it can detect that the autoclear ! 375: * feature is no longer valid. ! 376: */ ! 377: if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 && ! 378: !bdrv_is_read_only(bs->file)) { ! 379: s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK; ! 380: ! 381: ret = qed_write_header_sync(s); ! 382: if (ret) { ! 383: return ret; ! 384: } ! 385: ! 386: /* From here on only known autoclear feature bits are valid */ ! 387: bdrv_flush(bs->file); ! 388: } ! 389: ! 390: s->l1_table = qed_alloc_table(s); ! 391: qed_init_l2_cache(&s->l2_cache); ! 392: ! 393: ret = qed_read_l1_table_sync(s); ! 394: if (ret) { ! 395: goto out; ! 396: } ! 397: ! 398: /* If image was not closed cleanly, check consistency */ ! 399: if (s->header.features & QED_F_NEED_CHECK) { ! 400: /* Read-only images cannot be fixed. There is no risk of corruption ! 401: * since write operations are not possible. Therefore, allow ! 402: * potentially inconsistent images to be opened read-only. This can ! 403: * aid data recovery from an otherwise inconsistent image. ! 404: */ ! 405: if (!bdrv_is_read_only(bs->file)) { ! 406: BdrvCheckResult result = {0}; ! 407: ! 408: ret = qed_check(s, &result, true); ! 409: if (!ret && !result.corruptions && !result.check_errors) { ! 410: /* Ensure fixes reach storage before clearing check bit */ ! 411: bdrv_flush(s->bs); ! 412: ! 413: s->header.features &= ~QED_F_NEED_CHECK; ! 414: qed_write_header_sync(s); ! 415: } ! 416: } ! 417: } ! 418: ! 419: out: ! 420: if (ret) { ! 421: qed_free_l2_cache(&s->l2_cache); ! 422: qemu_vfree(s->l1_table); ! 423: } ! 424: return ret; ! 425: } ! 426: ! 427: static void bdrv_qed_close(BlockDriverState *bs) ! 428: { ! 429: BDRVQEDState *s = bs->opaque; ! 430: ! 431: /* Ensure writes reach stable storage */ ! 432: bdrv_flush(bs->file); ! 433: ! 434: /* Clean shutdown, no check required on next open */ ! 435: if (s->header.features & QED_F_NEED_CHECK) { ! 436: s->header.features &= ~QED_F_NEED_CHECK; ! 437: qed_write_header_sync(s); ! 438: } ! 439: ! 440: qed_free_l2_cache(&s->l2_cache); ! 441: qemu_vfree(s->l1_table); ! 442: } ! 443: ! 444: static int bdrv_qed_flush(BlockDriverState *bs) ! 445: { ! 446: return bdrv_flush(bs->file); ! 447: } ! 448: ! 449: static int qed_create(const char *filename, uint32_t cluster_size, ! 450: uint64_t image_size, uint32_t table_size, ! 451: const char *backing_file, const char *backing_fmt) ! 452: { ! 453: QEDHeader header = { ! 454: .magic = QED_MAGIC, ! 455: .cluster_size = cluster_size, ! 456: .table_size = table_size, ! 457: .header_size = 1, ! 458: .features = 0, ! 459: .compat_features = 0, ! 460: .l1_table_offset = cluster_size, ! 461: .image_size = image_size, ! 462: }; ! 463: QEDHeader le_header; ! 464: uint8_t *l1_table = NULL; ! 465: size_t l1_size = header.cluster_size * header.table_size; ! 466: int ret = 0; ! 467: BlockDriverState *bs = NULL; ! 468: ! 469: ret = bdrv_create_file(filename, NULL); ! 470: if (ret < 0) { ! 471: return ret; ! 472: } ! 473: ! 474: ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR | BDRV_O_CACHE_WB); ! 475: if (ret < 0) { ! 476: return ret; ! 477: } ! 478: ! 479: /* File must start empty and grow, check truncate is supported */ ! 480: ret = bdrv_truncate(bs, 0); ! 481: if (ret < 0) { ! 482: goto out; ! 483: } ! 484: ! 485: if (backing_file) { ! 486: header.features |= QED_F_BACKING_FILE; ! 487: header.backing_filename_offset = sizeof(le_header); ! 488: header.backing_filename_size = strlen(backing_file); ! 489: ! 490: if (qed_fmt_is_raw(backing_fmt)) { ! 491: header.features |= QED_F_BACKING_FORMAT_NO_PROBE; ! 492: } ! 493: } ! 494: ! 495: qed_header_cpu_to_le(&header, &le_header); ! 496: ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header)); ! 497: if (ret < 0) { ! 498: goto out; ! 499: } ! 500: ret = bdrv_pwrite(bs, sizeof(le_header), backing_file, ! 501: header.backing_filename_size); ! 502: if (ret < 0) { ! 503: goto out; ! 504: } ! 505: ! 506: l1_table = qemu_mallocz(l1_size); ! 507: ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size); ! 508: if (ret < 0) { ! 509: goto out; ! 510: } ! 511: ! 512: ret = 0; /* success */ ! 513: out: ! 514: qemu_free(l1_table); ! 515: bdrv_delete(bs); ! 516: return ret; ! 517: } ! 518: ! 519: static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options) ! 520: { ! 521: uint64_t image_size = 0; ! 522: uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE; ! 523: uint32_t table_size = QED_DEFAULT_TABLE_SIZE; ! 524: const char *backing_file = NULL; ! 525: const char *backing_fmt = NULL; ! 526: ! 527: while (options && options->name) { ! 528: if (!strcmp(options->name, BLOCK_OPT_SIZE)) { ! 529: image_size = options->value.n; ! 530: } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { ! 531: backing_file = options->value.s; ! 532: } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) { ! 533: backing_fmt = options->value.s; ! 534: } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) { ! 535: if (options->value.n) { ! 536: cluster_size = options->value.n; ! 537: } ! 538: } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) { ! 539: if (options->value.n) { ! 540: table_size = options->value.n; ! 541: } ! 542: } ! 543: options++; ! 544: } ! 545: ! 546: if (!qed_is_cluster_size_valid(cluster_size)) { ! 547: fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n", ! 548: QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE); ! 549: return -EINVAL; ! 550: } ! 551: if (!qed_is_table_size_valid(table_size)) { ! 552: fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n", ! 553: QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE); ! 554: return -EINVAL; ! 555: } ! 556: if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) { ! 557: fprintf(stderr, "QED image size must be a non-zero multiple of " ! 558: "cluster size and less than %" PRIu64 " bytes\n", ! 559: qed_max_image_size(cluster_size, table_size)); ! 560: return -EINVAL; ! 561: } ! 562: ! 563: return qed_create(filename, cluster_size, image_size, table_size, ! 564: backing_file, backing_fmt); ! 565: } ! 566: ! 567: typedef struct { ! 568: int is_allocated; ! 569: int *pnum; ! 570: } QEDIsAllocatedCB; ! 571: ! 572: static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len) ! 573: { ! 574: QEDIsAllocatedCB *cb = opaque; ! 575: *cb->pnum = len / BDRV_SECTOR_SIZE; ! 576: cb->is_allocated = ret == QED_CLUSTER_FOUND; ! 577: } ! 578: ! 579: static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num, ! 580: int nb_sectors, int *pnum) ! 581: { ! 582: BDRVQEDState *s = bs->opaque; ! 583: uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; ! 584: size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE; ! 585: QEDIsAllocatedCB cb = { ! 586: .is_allocated = -1, ! 587: .pnum = pnum, ! 588: }; ! 589: QEDRequest request = { .l2_table = NULL }; ! 590: ! 591: async_context_push(); ! 592: ! 593: qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb); ! 594: ! 595: while (cb.is_allocated == -1) { ! 596: qemu_aio_wait(); ! 597: } ! 598: ! 599: async_context_pop(); ! 600: ! 601: qed_unref_l2_cache_entry(request.l2_table); ! 602: ! 603: return cb.is_allocated; ! 604: } ! 605: ! 606: static int bdrv_qed_make_empty(BlockDriverState *bs) ! 607: { ! 608: return -ENOTSUP; ! 609: } ! 610: ! 611: static BDRVQEDState *acb_to_s(QEDAIOCB *acb) ! 612: { ! 613: return acb->common.bs->opaque; ! 614: } ! 615: ! 616: /** ! 617: * Read from the backing file or zero-fill if no backing file ! 618: * ! 619: * @s: QED state ! 620: * @pos: Byte position in device ! 621: * @qiov: Destination I/O vector ! 622: * @cb: Completion function ! 623: * @opaque: User data for completion function ! 624: * ! 625: * This function reads qiov->size bytes starting at pos from the backing file. ! 626: * If there is no backing file then zeroes are read. ! 627: */ ! 628: static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, ! 629: QEMUIOVector *qiov, ! 630: BlockDriverCompletionFunc *cb, void *opaque) ! 631: { ! 632: BlockDriverAIOCB *aiocb; ! 633: uint64_t backing_length = 0; ! 634: size_t size; ! 635: ! 636: /* If there is a backing file, get its length. Treat the absence of a ! 637: * backing file like a zero length backing file. ! 638: */ ! 639: if (s->bs->backing_hd) { ! 640: int64_t l = bdrv_getlength(s->bs->backing_hd); ! 641: if (l < 0) { ! 642: cb(opaque, l); ! 643: return; ! 644: } ! 645: backing_length = l; ! 646: } ! 647: ! 648: /* Zero all sectors if reading beyond the end of the backing file */ ! 649: if (pos >= backing_length || ! 650: pos + qiov->size > backing_length) { ! 651: qemu_iovec_memset(qiov, 0, qiov->size); ! 652: } ! 653: ! 654: /* Complete now if there are no backing file sectors to read */ ! 655: if (pos >= backing_length) { ! 656: cb(opaque, 0); ! 657: return; ! 658: } ! 659: ! 660: /* If the read straddles the end of the backing file, shorten it */ ! 661: size = MIN((uint64_t)backing_length - pos, qiov->size); ! 662: ! 663: BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING); ! 664: aiocb = bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE, ! 665: qiov, size / BDRV_SECTOR_SIZE, cb, opaque); ! 666: if (!aiocb) { ! 667: cb(opaque, -EIO); ! 668: } ! 669: } ! 670: ! 671: typedef struct { ! 672: GenericCB gencb; ! 673: BDRVQEDState *s; ! 674: QEMUIOVector qiov; ! 675: struct iovec iov; ! 676: uint64_t offset; ! 677: } CopyFromBackingFileCB; ! 678: ! 679: static void qed_copy_from_backing_file_cb(void *opaque, int ret) ! 680: { ! 681: CopyFromBackingFileCB *copy_cb = opaque; ! 682: qemu_vfree(copy_cb->iov.iov_base); ! 683: gencb_complete(©_cb->gencb, ret); ! 684: } ! 685: ! 686: static void qed_copy_from_backing_file_write(void *opaque, int ret) ! 687: { ! 688: CopyFromBackingFileCB *copy_cb = opaque; ! 689: BDRVQEDState *s = copy_cb->s; ! 690: BlockDriverAIOCB *aiocb; ! 691: ! 692: if (ret) { ! 693: qed_copy_from_backing_file_cb(copy_cb, ret); ! 694: return; ! 695: } ! 696: ! 697: BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE); ! 698: aiocb = bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE, ! 699: ©_cb->qiov, ! 700: copy_cb->qiov.size / BDRV_SECTOR_SIZE, ! 701: qed_copy_from_backing_file_cb, copy_cb); ! 702: if (!aiocb) { ! 703: qed_copy_from_backing_file_cb(copy_cb, -EIO); ! 704: } ! 705: } ! 706: ! 707: /** ! 708: * Copy data from backing file into the image ! 709: * ! 710: * @s: QED state ! 711: * @pos: Byte position in device ! 712: * @len: Number of bytes ! 713: * @offset: Byte offset in image file ! 714: * @cb: Completion function ! 715: * @opaque: User data for completion function ! 716: */ ! 717: static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, ! 718: uint64_t len, uint64_t offset, ! 719: BlockDriverCompletionFunc *cb, ! 720: void *opaque) ! 721: { ! 722: CopyFromBackingFileCB *copy_cb; ! 723: ! 724: /* Skip copy entirely if there is no work to do */ ! 725: if (len == 0) { ! 726: cb(opaque, 0); ! 727: return; ! 728: } ! 729: ! 730: copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque); ! 731: copy_cb->s = s; ! 732: copy_cb->offset = offset; ! 733: copy_cb->iov.iov_base = qemu_blockalign(s->bs, len); ! 734: copy_cb->iov.iov_len = len; ! 735: qemu_iovec_init_external(©_cb->qiov, ©_cb->iov, 1); ! 736: ! 737: qed_read_backing_file(s, pos, ©_cb->qiov, ! 738: qed_copy_from_backing_file_write, copy_cb); ! 739: } ! 740: ! 741: /** ! 742: * Link one or more contiguous clusters into a table ! 743: * ! 744: * @s: QED state ! 745: * @table: L2 table ! 746: * @index: First cluster index ! 747: * @n: Number of contiguous clusters ! 748: * @cluster: First cluster byte offset in image file ! 749: */ ! 750: static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index, ! 751: unsigned int n, uint64_t cluster) ! 752: { ! 753: int i; ! 754: for (i = index; i < index + n; i++) { ! 755: table->offsets[i] = cluster; ! 756: cluster += s->header.cluster_size; ! 757: } ! 758: } ! 759: ! 760: static void qed_aio_complete_bh(void *opaque) ! 761: { ! 762: QEDAIOCB *acb = opaque; ! 763: BlockDriverCompletionFunc *cb = acb->common.cb; ! 764: void *user_opaque = acb->common.opaque; ! 765: int ret = acb->bh_ret; ! 766: bool *finished = acb->finished; ! 767: ! 768: qemu_bh_delete(acb->bh); ! 769: qemu_aio_release(acb); ! 770: ! 771: /* Invoke callback */ ! 772: cb(user_opaque, ret); ! 773: ! 774: /* Signal cancel completion */ ! 775: if (finished) { ! 776: *finished = true; ! 777: } ! 778: } ! 779: ! 780: static void qed_aio_complete(QEDAIOCB *acb, int ret) ! 781: { ! 782: BDRVQEDState *s = acb_to_s(acb); ! 783: ! 784: trace_qed_aio_complete(s, acb, ret); ! 785: ! 786: /* Free resources */ ! 787: qemu_iovec_destroy(&acb->cur_qiov); ! 788: qed_unref_l2_cache_entry(acb->request.l2_table); ! 789: ! 790: /* Arrange for a bh to invoke the completion function */ ! 791: acb->bh_ret = ret; ! 792: acb->bh = qemu_bh_new(qed_aio_complete_bh, acb); ! 793: qemu_bh_schedule(acb->bh); ! 794: ! 795: /* Start next allocating write request waiting behind this one. Note that ! 796: * requests enqueue themselves when they first hit an unallocated cluster ! 797: * but they wait until the entire request is finished before waking up the ! 798: * next request in the queue. This ensures that we don't cycle through ! 799: * requests multiple times but rather finish one at a time completely. ! 800: */ ! 801: if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { ! 802: QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next); ! 803: acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs); ! 804: if (acb) { ! 805: qed_aio_next_io(acb, 0); ! 806: } ! 807: } ! 808: } ! 809: ! 810: /** ! 811: * Commit the current L2 table to the cache ! 812: */ ! 813: static void qed_commit_l2_update(void *opaque, int ret) ! 814: { ! 815: QEDAIOCB *acb = opaque; ! 816: BDRVQEDState *s = acb_to_s(acb); ! 817: CachedL2Table *l2_table = acb->request.l2_table; ! 818: ! 819: qed_commit_l2_cache_entry(&s->l2_cache, l2_table); ! 820: ! 821: /* This is guaranteed to succeed because we just committed the entry to the ! 822: * cache. ! 823: */ ! 824: acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, ! 825: l2_table->offset); ! 826: assert(acb->request.l2_table != NULL); ! 827: ! 828: qed_aio_next_io(opaque, ret); ! 829: } ! 830: ! 831: /** ! 832: * Update L1 table with new L2 table offset and write it out ! 833: */ ! 834: static void qed_aio_write_l1_update(void *opaque, int ret) ! 835: { ! 836: QEDAIOCB *acb = opaque; ! 837: BDRVQEDState *s = acb_to_s(acb); ! 838: int index; ! 839: ! 840: if (ret) { ! 841: qed_aio_complete(acb, ret); ! 842: return; ! 843: } ! 844: ! 845: index = qed_l1_index(s, acb->cur_pos); ! 846: s->l1_table->offsets[index] = acb->request.l2_table->offset; ! 847: ! 848: qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb); ! 849: } ! 850: ! 851: /** ! 852: * Update L2 table with new cluster offsets and write them out ! 853: */ ! 854: static void qed_aio_write_l2_update(void *opaque, int ret) ! 855: { ! 856: QEDAIOCB *acb = opaque; ! 857: BDRVQEDState *s = acb_to_s(acb); ! 858: bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1; ! 859: int index; ! 860: ! 861: if (ret) { ! 862: goto err; ! 863: } ! 864: ! 865: if (need_alloc) { ! 866: qed_unref_l2_cache_entry(acb->request.l2_table); ! 867: acb->request.l2_table = qed_new_l2_table(s); ! 868: } ! 869: ! 870: index = qed_l2_index(s, acb->cur_pos); ! 871: qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters, ! 872: acb->cur_cluster); ! 873: ! 874: if (need_alloc) { ! 875: /* Write out the whole new L2 table */ ! 876: qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true, ! 877: qed_aio_write_l1_update, acb); ! 878: } else { ! 879: /* Write out only the updated part of the L2 table */ ! 880: qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false, ! 881: qed_aio_next_io, acb); ! 882: } ! 883: return; ! 884: ! 885: err: ! 886: qed_aio_complete(acb, ret); ! 887: } ! 888: ! 889: /** ! 890: * Flush new data clusters before updating the L2 table ! 891: * ! 892: * This flush is necessary when a backing file is in use. A crash during an ! 893: * allocating write could result in empty clusters in the image. If the write ! 894: * only touched a subregion of the cluster, then backing image sectors have ! 895: * been lost in the untouched region. The solution is to flush after writing a ! 896: * new data cluster and before updating the L2 table. ! 897: */ ! 898: static void qed_aio_write_flush_before_l2_update(void *opaque, int ret) ! 899: { ! 900: QEDAIOCB *acb = opaque; ! 901: BDRVQEDState *s = acb_to_s(acb); ! 902: ! 903: if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update, opaque)) { ! 904: qed_aio_complete(acb, -EIO); ! 905: } ! 906: } ! 907: ! 908: /** ! 909: * Write data to the image file ! 910: */ ! 911: static void qed_aio_write_main(void *opaque, int ret) ! 912: { ! 913: QEDAIOCB *acb = opaque; ! 914: BDRVQEDState *s = acb_to_s(acb); ! 915: uint64_t offset = acb->cur_cluster + ! 916: qed_offset_into_cluster(s, acb->cur_pos); ! 917: BlockDriverCompletionFunc *next_fn; ! 918: BlockDriverAIOCB *file_acb; ! 919: ! 920: trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size); ! 921: ! 922: if (ret) { ! 923: qed_aio_complete(acb, ret); ! 924: return; ! 925: } ! 926: ! 927: if (acb->find_cluster_ret == QED_CLUSTER_FOUND) { ! 928: next_fn = qed_aio_next_io; ! 929: } else { ! 930: if (s->bs->backing_hd) { ! 931: next_fn = qed_aio_write_flush_before_l2_update; ! 932: } else { ! 933: next_fn = qed_aio_write_l2_update; ! 934: } ! 935: } ! 936: ! 937: BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO); ! 938: file_acb = bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE, ! 939: &acb->cur_qiov, ! 940: acb->cur_qiov.size / BDRV_SECTOR_SIZE, ! 941: next_fn, acb); ! 942: if (!file_acb) { ! 943: qed_aio_complete(acb, -EIO); ! 944: } ! 945: } ! 946: ! 947: /** ! 948: * Populate back untouched region of new data cluster ! 949: */ ! 950: static void qed_aio_write_postfill(void *opaque, int ret) ! 951: { ! 952: QEDAIOCB *acb = opaque; ! 953: BDRVQEDState *s = acb_to_s(acb); ! 954: uint64_t start = acb->cur_pos + acb->cur_qiov.size; ! 955: uint64_t len = ! 956: qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start; ! 957: uint64_t offset = acb->cur_cluster + ! 958: qed_offset_into_cluster(s, acb->cur_pos) + ! 959: acb->cur_qiov.size; ! 960: ! 961: if (ret) { ! 962: qed_aio_complete(acb, ret); ! 963: return; ! 964: } ! 965: ! 966: trace_qed_aio_write_postfill(s, acb, start, len, offset); ! 967: qed_copy_from_backing_file(s, start, len, offset, ! 968: qed_aio_write_main, acb); ! 969: } ! 970: ! 971: /** ! 972: * Populate front untouched region of new data cluster ! 973: */ ! 974: static void qed_aio_write_prefill(void *opaque, int ret) ! 975: { ! 976: QEDAIOCB *acb = opaque; ! 977: BDRVQEDState *s = acb_to_s(acb); ! 978: uint64_t start = qed_start_of_cluster(s, acb->cur_pos); ! 979: uint64_t len = qed_offset_into_cluster(s, acb->cur_pos); ! 980: ! 981: trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster); ! 982: qed_copy_from_backing_file(s, start, len, acb->cur_cluster, ! 983: qed_aio_write_postfill, acb); ! 984: } ! 985: ! 986: /** ! 987: * Check if the QED_F_NEED_CHECK bit should be set during allocating write ! 988: */ ! 989: static bool qed_should_set_need_check(BDRVQEDState *s) ! 990: { ! 991: /* The flush before L2 update path ensures consistency */ ! 992: if (s->bs->backing_hd) { ! 993: return false; ! 994: } ! 995: ! 996: return !(s->header.features & QED_F_NEED_CHECK); ! 997: } ! 998: ! 999: /** ! 1000: * Write new data cluster ! 1001: * ! 1002: * @acb: Write request ! 1003: * @len: Length in bytes ! 1004: * ! 1005: * This path is taken when writing to previously unallocated clusters. ! 1006: */ ! 1007: static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) ! 1008: { ! 1009: BDRVQEDState *s = acb_to_s(acb); ! 1010: ! 1011: /* Freeze this request if another allocating write is in progress */ ! 1012: if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { ! 1013: QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next); ! 1014: } ! 1015: if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { ! 1016: return; /* wait for existing request to finish */ ! 1017: } ! 1018: ! 1019: acb->cur_nclusters = qed_bytes_to_clusters(s, ! 1020: qed_offset_into_cluster(s, acb->cur_pos) + len); ! 1021: acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters); ! 1022: qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); ! 1023: ! 1024: if (qed_should_set_need_check(s)) { ! 1025: s->header.features |= QED_F_NEED_CHECK; ! 1026: qed_write_header(s, qed_aio_write_prefill, acb); ! 1027: } else { ! 1028: qed_aio_write_prefill(acb, 0); ! 1029: } ! 1030: } ! 1031: ! 1032: /** ! 1033: * Write data cluster in place ! 1034: * ! 1035: * @acb: Write request ! 1036: * @offset: Cluster offset in bytes ! 1037: * @len: Length in bytes ! 1038: * ! 1039: * This path is taken when writing to already allocated clusters. ! 1040: */ ! 1041: static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) ! 1042: { ! 1043: /* Calculate the I/O vector */ ! 1044: acb->cur_cluster = offset; ! 1045: qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); ! 1046: ! 1047: /* Do the actual write */ ! 1048: qed_aio_write_main(acb, 0); ! 1049: } ! 1050: ! 1051: /** ! 1052: * Write data cluster ! 1053: * ! 1054: * @opaque: Write request ! 1055: * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1, ! 1056: * or -errno ! 1057: * @offset: Cluster offset in bytes ! 1058: * @len: Length in bytes ! 1059: * ! 1060: * Callback from qed_find_cluster(). ! 1061: */ ! 1062: static void qed_aio_write_data(void *opaque, int ret, ! 1063: uint64_t offset, size_t len) ! 1064: { ! 1065: QEDAIOCB *acb = opaque; ! 1066: ! 1067: trace_qed_aio_write_data(acb_to_s(acb), acb, ret, offset, len); ! 1068: ! 1069: acb->find_cluster_ret = ret; ! 1070: ! 1071: switch (ret) { ! 1072: case QED_CLUSTER_FOUND: ! 1073: qed_aio_write_inplace(acb, offset, len); ! 1074: break; ! 1075: ! 1076: case QED_CLUSTER_L2: ! 1077: case QED_CLUSTER_L1: ! 1078: qed_aio_write_alloc(acb, len); ! 1079: break; ! 1080: ! 1081: default: ! 1082: qed_aio_complete(acb, ret); ! 1083: break; ! 1084: } ! 1085: } ! 1086: ! 1087: /** ! 1088: * Read data cluster ! 1089: * ! 1090: * @opaque: Read request ! 1091: * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1, ! 1092: * or -errno ! 1093: * @offset: Cluster offset in bytes ! 1094: * @len: Length in bytes ! 1095: * ! 1096: * Callback from qed_find_cluster(). ! 1097: */ ! 1098: static void qed_aio_read_data(void *opaque, int ret, ! 1099: uint64_t offset, size_t len) ! 1100: { ! 1101: QEDAIOCB *acb = opaque; ! 1102: BDRVQEDState *s = acb_to_s(acb); ! 1103: BlockDriverState *bs = acb->common.bs; ! 1104: BlockDriverAIOCB *file_acb; ! 1105: ! 1106: /* Adjust offset into cluster */ ! 1107: offset += qed_offset_into_cluster(s, acb->cur_pos); ! 1108: ! 1109: trace_qed_aio_read_data(s, acb, ret, offset, len); ! 1110: ! 1111: if (ret < 0) { ! 1112: goto err; ! 1113: } ! 1114: ! 1115: qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); ! 1116: ! 1117: /* Handle backing file and unallocated sparse hole reads */ ! 1118: if (ret != QED_CLUSTER_FOUND) { ! 1119: qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov, ! 1120: qed_aio_next_io, acb); ! 1121: return; ! 1122: } ! 1123: ! 1124: BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); ! 1125: file_acb = bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE, ! 1126: &acb->cur_qiov, ! 1127: acb->cur_qiov.size / BDRV_SECTOR_SIZE, ! 1128: qed_aio_next_io, acb); ! 1129: if (!file_acb) { ! 1130: ret = -EIO; ! 1131: goto err; ! 1132: } ! 1133: return; ! 1134: ! 1135: err: ! 1136: qed_aio_complete(acb, ret); ! 1137: } ! 1138: ! 1139: /** ! 1140: * Begin next I/O or complete the request ! 1141: */ ! 1142: static void qed_aio_next_io(void *opaque, int ret) ! 1143: { ! 1144: QEDAIOCB *acb = opaque; ! 1145: BDRVQEDState *s = acb_to_s(acb); ! 1146: QEDFindClusterFunc *io_fn = ! 1147: acb->is_write ? qed_aio_write_data : qed_aio_read_data; ! 1148: ! 1149: trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size); ! 1150: ! 1151: /* Handle I/O error */ ! 1152: if (ret) { ! 1153: qed_aio_complete(acb, ret); ! 1154: return; ! 1155: } ! 1156: ! 1157: acb->qiov_offset += acb->cur_qiov.size; ! 1158: acb->cur_pos += acb->cur_qiov.size; ! 1159: qemu_iovec_reset(&acb->cur_qiov); ! 1160: ! 1161: /* Complete request */ ! 1162: if (acb->cur_pos >= acb->end_pos) { ! 1163: qed_aio_complete(acb, 0); ! 1164: return; ! 1165: } ! 1166: ! 1167: /* Find next cluster and start I/O */ ! 1168: qed_find_cluster(s, &acb->request, ! 1169: acb->cur_pos, acb->end_pos - acb->cur_pos, ! 1170: io_fn, acb); ! 1171: } ! 1172: ! 1173: static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs, ! 1174: int64_t sector_num, ! 1175: QEMUIOVector *qiov, int nb_sectors, ! 1176: BlockDriverCompletionFunc *cb, ! 1177: void *opaque, bool is_write) ! 1178: { ! 1179: QEDAIOCB *acb = qemu_aio_get(&qed_aio_pool, bs, cb, opaque); ! 1180: ! 1181: trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors, ! 1182: opaque, is_write); ! 1183: ! 1184: acb->is_write = is_write; ! 1185: acb->finished = NULL; ! 1186: acb->qiov = qiov; ! 1187: acb->qiov_offset = 0; ! 1188: acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; ! 1189: acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE; ! 1190: acb->request.l2_table = NULL; ! 1191: qemu_iovec_init(&acb->cur_qiov, qiov->niov); ! 1192: ! 1193: /* Start request */ ! 1194: qed_aio_next_io(acb, 0); ! 1195: return &acb->common; ! 1196: } ! 1197: ! 1198: static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs, ! 1199: int64_t sector_num, ! 1200: QEMUIOVector *qiov, int nb_sectors, ! 1201: BlockDriverCompletionFunc *cb, ! 1202: void *opaque) ! 1203: { ! 1204: return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, false); ! 1205: } ! 1206: ! 1207: static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs, ! 1208: int64_t sector_num, ! 1209: QEMUIOVector *qiov, int nb_sectors, ! 1210: BlockDriverCompletionFunc *cb, ! 1211: void *opaque) ! 1212: { ! 1213: return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, true); ! 1214: } ! 1215: ! 1216: static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs, ! 1217: BlockDriverCompletionFunc *cb, ! 1218: void *opaque) ! 1219: { ! 1220: return bdrv_aio_flush(bs->file, cb, opaque); ! 1221: } ! 1222: ! 1223: static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset) ! 1224: { ! 1225: return -ENOTSUP; ! 1226: } ! 1227: ! 1228: static int64_t bdrv_qed_getlength(BlockDriverState *bs) ! 1229: { ! 1230: BDRVQEDState *s = bs->opaque; ! 1231: return s->header.image_size; ! 1232: } ! 1233: ! 1234: static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) ! 1235: { ! 1236: BDRVQEDState *s = bs->opaque; ! 1237: ! 1238: memset(bdi, 0, sizeof(*bdi)); ! 1239: bdi->cluster_size = s->header.cluster_size; ! 1240: return 0; ! 1241: } ! 1242: ! 1243: static int bdrv_qed_change_backing_file(BlockDriverState *bs, ! 1244: const char *backing_file, ! 1245: const char *backing_fmt) ! 1246: { ! 1247: BDRVQEDState *s = bs->opaque; ! 1248: QEDHeader new_header, le_header; ! 1249: void *buffer; ! 1250: size_t buffer_len, backing_file_len; ! 1251: int ret; ! 1252: ! 1253: /* Refuse to set backing filename if unknown compat feature bits are ! 1254: * active. If the image uses an unknown compat feature then we may not ! 1255: * know the layout of data following the header structure and cannot safely ! 1256: * add a new string. ! 1257: */ ! 1258: if (backing_file && (s->header.compat_features & ! 1259: ~QED_COMPAT_FEATURE_MASK)) { ! 1260: return -ENOTSUP; ! 1261: } ! 1262: ! 1263: memcpy(&new_header, &s->header, sizeof(new_header)); ! 1264: ! 1265: new_header.features &= ~(QED_F_BACKING_FILE | ! 1266: QED_F_BACKING_FORMAT_NO_PROBE); ! 1267: ! 1268: /* Adjust feature flags */ ! 1269: if (backing_file) { ! 1270: new_header.features |= QED_F_BACKING_FILE; ! 1271: ! 1272: if (qed_fmt_is_raw(backing_fmt)) { ! 1273: new_header.features |= QED_F_BACKING_FORMAT_NO_PROBE; ! 1274: } ! 1275: } ! 1276: ! 1277: /* Calculate new header size */ ! 1278: backing_file_len = 0; ! 1279: ! 1280: if (backing_file) { ! 1281: backing_file_len = strlen(backing_file); ! 1282: } ! 1283: ! 1284: buffer_len = sizeof(new_header); ! 1285: new_header.backing_filename_offset = buffer_len; ! 1286: new_header.backing_filename_size = backing_file_len; ! 1287: buffer_len += backing_file_len; ! 1288: ! 1289: /* Make sure we can rewrite header without failing */ ! 1290: if (buffer_len > new_header.header_size * new_header.cluster_size) { ! 1291: return -ENOSPC; ! 1292: } ! 1293: ! 1294: /* Prepare new header */ ! 1295: buffer = qemu_malloc(buffer_len); ! 1296: ! 1297: qed_header_cpu_to_le(&new_header, &le_header); ! 1298: memcpy(buffer, &le_header, sizeof(le_header)); ! 1299: buffer_len = sizeof(le_header); ! 1300: ! 1301: memcpy(buffer + buffer_len, backing_file, backing_file_len); ! 1302: buffer_len += backing_file_len; ! 1303: ! 1304: /* Write new header */ ! 1305: ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len); ! 1306: qemu_free(buffer); ! 1307: if (ret == 0) { ! 1308: memcpy(&s->header, &new_header, sizeof(new_header)); ! 1309: } ! 1310: return ret; ! 1311: } ! 1312: ! 1313: static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result) ! 1314: { ! 1315: BDRVQEDState *s = bs->opaque; ! 1316: ! 1317: return qed_check(s, result, false); ! 1318: } ! 1319: ! 1320: static QEMUOptionParameter qed_create_options[] = { ! 1321: { ! 1322: .name = BLOCK_OPT_SIZE, ! 1323: .type = OPT_SIZE, ! 1324: .help = "Virtual disk size (in bytes)" ! 1325: }, { ! 1326: .name = BLOCK_OPT_BACKING_FILE, ! 1327: .type = OPT_STRING, ! 1328: .help = "File name of a base image" ! 1329: }, { ! 1330: .name = BLOCK_OPT_BACKING_FMT, ! 1331: .type = OPT_STRING, ! 1332: .help = "Image format of the base image" ! 1333: }, { ! 1334: .name = BLOCK_OPT_CLUSTER_SIZE, ! 1335: .type = OPT_SIZE, ! 1336: .help = "Cluster size (in bytes)" ! 1337: }, { ! 1338: .name = BLOCK_OPT_TABLE_SIZE, ! 1339: .type = OPT_SIZE, ! 1340: .help = "L1/L2 table size (in clusters)" ! 1341: }, ! 1342: { /* end of list */ } ! 1343: }; ! 1344: ! 1345: static BlockDriver bdrv_qed = { ! 1346: .format_name = "qed", ! 1347: .instance_size = sizeof(BDRVQEDState), ! 1348: .create_options = qed_create_options, ! 1349: ! 1350: .bdrv_probe = bdrv_qed_probe, ! 1351: .bdrv_open = bdrv_qed_open, ! 1352: .bdrv_close = bdrv_qed_close, ! 1353: .bdrv_create = bdrv_qed_create, ! 1354: .bdrv_flush = bdrv_qed_flush, ! 1355: .bdrv_is_allocated = bdrv_qed_is_allocated, ! 1356: .bdrv_make_empty = bdrv_qed_make_empty, ! 1357: .bdrv_aio_readv = bdrv_qed_aio_readv, ! 1358: .bdrv_aio_writev = bdrv_qed_aio_writev, ! 1359: .bdrv_aio_flush = bdrv_qed_aio_flush, ! 1360: .bdrv_truncate = bdrv_qed_truncate, ! 1361: .bdrv_getlength = bdrv_qed_getlength, ! 1362: .bdrv_get_info = bdrv_qed_get_info, ! 1363: .bdrv_change_backing_file = bdrv_qed_change_backing_file, ! 1364: .bdrv_check = bdrv_qed_check, ! 1365: }; ! 1366: ! 1367: static void bdrv_qed_init(void) ! 1368: { ! 1369: bdrv_register(&bdrv_qed); ! 1370: } ! 1371: ! 1372: block_init(bdrv_qed_init);
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.