File:  [Qemu by Fabrice Bellard] / qemu / block.c
Revision 1.1.1.23 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 19:34:33 2018 UTC (2 years, 5 months ago) by root
Branches: qemu, MAIN
CVS tags: qemu1101, HEAD
qemu 1.1.1

    1: /*
    2:  * QEMU System Emulator block driver
    3:  *
    4:  * Copyright (c) 2003 Fabrice Bellard
    5:  *
    6:  * Permission is hereby granted, free of charge, to any person obtaining a copy
    7:  * of this software and associated documentation files (the "Software"), to deal
    8:  * in the Software without restriction, including without limitation the rights
    9:  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   10:  * copies of the Software, and to permit persons to whom the Software is
   11:  * furnished to do so, subject to the following conditions:
   12:  *
   13:  * The above copyright notice and this permission notice shall be included in
   14:  * all copies or substantial portions of the Software.
   15:  *
   16:  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   17:  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   18:  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
   19:  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   20:  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   21:  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
   22:  * THE SOFTWARE.
   23:  */
   24: #include "config-host.h"
   25: #include "qemu-common.h"
   26: #include "trace.h"
   27: #include "monitor.h"
   28: #include "block_int.h"
   29: #include "module.h"
   30: #include "qjson.h"
   31: #include "qemu-coroutine.h"
   32: #include "qmp-commands.h"
   33: #include "qemu-timer.h"
   34: 
   35: #ifdef CONFIG_BSD
   36: #include <sys/types.h>
   37: #include <sys/stat.h>
   38: #include <sys/ioctl.h>
   39: #include <sys/queue.h>
   40: #ifndef __DragonFly__
   41: #include <sys/disk.h>
   42: #endif
   43: #endif
   44: 
   45: #ifdef _WIN32
   46: #include <windows.h>
   47: #endif
   48: 
   49: #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
   50: 
   51: typedef enum {
   52:     BDRV_REQ_COPY_ON_READ = 0x1,
   53:     BDRV_REQ_ZERO_WRITE   = 0x2,
   54: } BdrvRequestFlags;
   55: 
   56: static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
   57: static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
   58:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
   59:         BlockDriverCompletionFunc *cb, void *opaque);
   60: static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
   61:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
   62:         BlockDriverCompletionFunc *cb, void *opaque);
   63: static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
   64:                                          int64_t sector_num, int nb_sectors,
   65:                                          QEMUIOVector *iov);
   66: static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
   67:                                          int64_t sector_num, int nb_sectors,
   68:                                          QEMUIOVector *iov);
   69: static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
   70:     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
   71:     BdrvRequestFlags flags);
   72: static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
   73:     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
   74:     BdrvRequestFlags flags);
   75: static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
   76:                                                int64_t sector_num,
   77:                                                QEMUIOVector *qiov,
   78:                                                int nb_sectors,
   79:                                                BlockDriverCompletionFunc *cb,
   80:                                                void *opaque,
   81:                                                bool is_write);
   82: static void coroutine_fn bdrv_co_do_rw(void *opaque);
   83: static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
   84:     int64_t sector_num, int nb_sectors);
   85: 
   86: static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
   87:         bool is_write, double elapsed_time, uint64_t *wait);
   88: static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
   89:         double elapsed_time, uint64_t *wait);
   90: static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
   91:         bool is_write, int64_t *wait);
   92: 
   93: static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
   94:     QTAILQ_HEAD_INITIALIZER(bdrv_states);
   95: 
   96: static QLIST_HEAD(, BlockDriver) bdrv_drivers =
   97:     QLIST_HEAD_INITIALIZER(bdrv_drivers);
   98: 
   99: /* The device to use for VM snapshots */
  100: static BlockDriverState *bs_snapshots;
  101: 
  102: /* If non-zero, use only whitelisted block drivers */
  103: static int use_bdrv_whitelist;
  104: 
  105: #ifdef _WIN32
  106: static int is_windows_drive_prefix(const char *filename)
  107: {
  108:     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
  109:              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
  110:             filename[1] == ':');
  111: }
  112: 
  113: int is_windows_drive(const char *filename)
  114: {
  115:     if (is_windows_drive_prefix(filename) &&
  116:         filename[2] == '\0')
  117:         return 1;
  118:     if (strstart(filename, "\\\\.\\", NULL) ||
  119:         strstart(filename, "//./", NULL))
  120:         return 1;
  121:     return 0;
  122: }
  123: #endif
  124: 
  125: /* throttling disk I/O limits */
  126: void bdrv_io_limits_disable(BlockDriverState *bs)
  127: {
  128:     bs->io_limits_enabled = false;
  129: 
  130:     while (qemu_co_queue_next(&bs->throttled_reqs));
  131: 
  132:     if (bs->block_timer) {
  133:         qemu_del_timer(bs->block_timer);
  134:         qemu_free_timer(bs->block_timer);
  135:         bs->block_timer = NULL;
  136:     }
  137: 
  138:     bs->slice_start = 0;
  139:     bs->slice_end   = 0;
  140:     bs->slice_time  = 0;
  141:     memset(&bs->io_base, 0, sizeof(bs->io_base));
  142: }
  143: 
  144: static void bdrv_block_timer(void *opaque)
  145: {
  146:     BlockDriverState *bs = opaque;
  147: 
  148:     qemu_co_queue_next(&bs->throttled_reqs);
  149: }
  150: 
  151: void bdrv_io_limits_enable(BlockDriverState *bs)
  152: {
  153:     qemu_co_queue_init(&bs->throttled_reqs);
  154:     bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
  155:     bs->slice_time  = 5 * BLOCK_IO_SLICE_TIME;
  156:     bs->slice_start = qemu_get_clock_ns(vm_clock);
  157:     bs->slice_end   = bs->slice_start + bs->slice_time;
  158:     memset(&bs->io_base, 0, sizeof(bs->io_base));
  159:     bs->io_limits_enabled = true;
  160: }
  161: 
  162: bool bdrv_io_limits_enabled(BlockDriverState *bs)
  163: {
  164:     BlockIOLimit *io_limits = &bs->io_limits;
  165:     return io_limits->bps[BLOCK_IO_LIMIT_READ]
  166:          || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
  167:          || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
  168:          || io_limits->iops[BLOCK_IO_LIMIT_READ]
  169:          || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
  170:          || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
  171: }
  172: 
  173: static void bdrv_io_limits_intercept(BlockDriverState *bs,
  174:                                      bool is_write, int nb_sectors)
  175: {
  176:     int64_t wait_time = -1;
  177: 
  178:     if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
  179:         qemu_co_queue_wait(&bs->throttled_reqs);
  180:     }
  181: 
  182:     /* In fact, we hope to keep each request's timing, in FIFO mode. The next
  183:      * throttled requests will not be dequeued until the current request is
  184:      * allowed to be serviced. So if the current request still exceeds the
  185:      * limits, it will be inserted to the head. All requests followed it will
  186:      * be still in throttled_reqs queue.
  187:      */
  188: 
  189:     while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
  190:         qemu_mod_timer(bs->block_timer,
  191:                        wait_time + qemu_get_clock_ns(vm_clock));
  192:         qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
  193:     }
  194: 
  195:     qemu_co_queue_next(&bs->throttled_reqs);
  196: }
  197: 
  198: /* check if the path starts with "<protocol>:" */
  199: static int path_has_protocol(const char *path)
  200: {
  201:     const char *p;
  202: 
  203: #ifdef _WIN32
  204:     if (is_windows_drive(path) ||
  205:         is_windows_drive_prefix(path)) {
  206:         return 0;
  207:     }
  208:     p = path + strcspn(path, ":/\\");
  209: #else
  210:     p = path + strcspn(path, ":/");
  211: #endif
  212: 
  213:     return *p == ':';
  214: }
  215: 
  216: int path_is_absolute(const char *path)
  217: {
  218: #ifdef _WIN32
  219:     /* specific case for names like: "\\.\d:" */
  220:     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
  221:         return 1;
  222:     }
  223:     return (*path == '/' || *path == '\\');
  224: #else
  225:     return (*path == '/');
  226: #endif
  227: }
  228: 
  229: /* if filename is absolute, just copy it to dest. Otherwise, build a
  230:    path to it by considering it is relative to base_path. URL are
  231:    supported. */
  232: void path_combine(char *dest, int dest_size,
  233:                   const char *base_path,
  234:                   const char *filename)
  235: {
  236:     const char *p, *p1;
  237:     int len;
  238: 
  239:     if (dest_size <= 0)
  240:         return;
  241:     if (path_is_absolute(filename)) {
  242:         pstrcpy(dest, dest_size, filename);
  243:     } else {
  244:         p = strchr(base_path, ':');
  245:         if (p)
  246:             p++;
  247:         else
  248:             p = base_path;
  249:         p1 = strrchr(base_path, '/');
  250: #ifdef _WIN32
  251:         {
  252:             const char *p2;
  253:             p2 = strrchr(base_path, '\\');
  254:             if (!p1 || p2 > p1)
  255:                 p1 = p2;
  256:         }
  257: #endif
  258:         if (p1)
  259:             p1++;
  260:         else
  261:             p1 = base_path;
  262:         if (p1 > p)
  263:             p = p1;
  264:         len = p - base_path;
  265:         if (len > dest_size - 1)
  266:             len = dest_size - 1;
  267:         memcpy(dest, base_path, len);
  268:         dest[len] = '\0';
  269:         pstrcat(dest, dest_size, filename);
  270:     }
  271: }
  272: 
  273: void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
  274: {
  275:     if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
  276:         pstrcpy(dest, sz, bs->backing_file);
  277:     } else {
  278:         path_combine(dest, sz, bs->filename, bs->backing_file);
  279:     }
  280: }
  281: 
  282: void bdrv_register(BlockDriver *bdrv)
  283: {
  284:     /* Block drivers without coroutine functions need emulation */
  285:     if (!bdrv->bdrv_co_readv) {
  286:         bdrv->bdrv_co_readv = bdrv_co_readv_em;
  287:         bdrv->bdrv_co_writev = bdrv_co_writev_em;
  288: 
  289:         /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
  290:          * the block driver lacks aio we need to emulate that too.
  291:          */
  292:         if (!bdrv->bdrv_aio_readv) {
  293:             /* add AIO emulation layer */
  294:             bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
  295:             bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
  296:         }
  297:     }
  298: 
  299:     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
  300: }
  301: 
  302: /* create a new block device (by default it is empty) */
  303: BlockDriverState *bdrv_new(const char *device_name)
  304: {
  305:     BlockDriverState *bs;
  306: 
  307:     bs = g_malloc0(sizeof(BlockDriverState));
  308:     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
  309:     if (device_name[0] != '\0') {
  310:         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
  311:     }
  312:     bdrv_iostatus_disable(bs);
  313:     return bs;
  314: }
  315: 
  316: BlockDriver *bdrv_find_format(const char *format_name)
  317: {
  318:     BlockDriver *drv1;
  319:     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
  320:         if (!strcmp(drv1->format_name, format_name)) {
  321:             return drv1;
  322:         }
  323:     }
  324:     return NULL;
  325: }
  326: 
  327: static int bdrv_is_whitelisted(BlockDriver *drv)
  328: {
  329:     static const char *whitelist[] = {
  330:         CONFIG_BDRV_WHITELIST
  331:     };
  332:     const char **p;
  333: 
  334:     if (!whitelist[0])
  335:         return 1;               /* no whitelist, anything goes */
  336: 
  337:     for (p = whitelist; *p; p++) {
  338:         if (!strcmp(drv->format_name, *p)) {
  339:             return 1;
  340:         }
  341:     }
  342:     return 0;
  343: }
  344: 
  345: BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
  346: {
  347:     BlockDriver *drv = bdrv_find_format(format_name);
  348:     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
  349: }
  350: 
  351: typedef struct CreateCo {
  352:     BlockDriver *drv;
  353:     char *filename;
  354:     QEMUOptionParameter *options;
  355:     int ret;
  356: } CreateCo;
  357: 
  358: static void coroutine_fn bdrv_create_co_entry(void *opaque)
  359: {
  360:     CreateCo *cco = opaque;
  361:     assert(cco->drv);
  362: 
  363:     cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
  364: }
  365: 
  366: int bdrv_create(BlockDriver *drv, const char* filename,
  367:     QEMUOptionParameter *options)
  368: {
  369:     int ret;
  370: 
  371:     Coroutine *co;
  372:     CreateCo cco = {
  373:         .drv = drv,
  374:         .filename = g_strdup(filename),
  375:         .options = options,
  376:         .ret = NOT_DONE,
  377:     };
  378: 
  379:     if (!drv->bdrv_create) {
  380:         return -ENOTSUP;
  381:     }
  382: 
  383:     if (qemu_in_coroutine()) {
  384:         /* Fast-path if already in coroutine context */
  385:         bdrv_create_co_entry(&cco);
  386:     } else {
  387:         co = qemu_coroutine_create(bdrv_create_co_entry);
  388:         qemu_coroutine_enter(co, &cco);
  389:         while (cco.ret == NOT_DONE) {
  390:             qemu_aio_wait();
  391:         }
  392:     }
  393: 
  394:     ret = cco.ret;
  395:     g_free(cco.filename);
  396: 
  397:     return ret;
  398: }
  399: 
  400: int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
  401: {
  402:     BlockDriver *drv;
  403: 
  404:     drv = bdrv_find_protocol(filename);
  405:     if (drv == NULL) {
  406:         return -ENOENT;
  407:     }
  408: 
  409:     return bdrv_create(drv, filename, options);
  410: }
  411: 
  412: /*
  413:  * Create a uniquely-named empty temporary file.
  414:  * Return 0 upon success, otherwise a negative errno value.
  415:  */
  416: int get_tmp_filename(char *filename, int size)
  417: {
  418: #ifdef _WIN32
  419:     char temp_dir[MAX_PATH];
  420:     /* GetTempFileName requires that its output buffer (4th param)
  421:        have length MAX_PATH or greater.  */
  422:     assert(size >= MAX_PATH);
  423:     return (GetTempPath(MAX_PATH, temp_dir)
  424:             && GetTempFileName(temp_dir, "qem", 0, filename)
  425:             ? 0 : -GetLastError());
  426: #else
  427:     int fd;
  428:     const char *tmpdir;
  429:     tmpdir = getenv("TMPDIR");
  430:     if (!tmpdir)
  431:         tmpdir = "/tmp";
  432:     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
  433:         return -EOVERFLOW;
  434:     }
  435:     fd = mkstemp(filename);
  436:     if (fd < 0 || close(fd)) {
  437:         return -errno;
  438:     }
  439:     return 0;
  440: #endif
  441: }
  442: 
  443: /*
  444:  * Detect host devices. By convention, /dev/cdrom[N] is always
  445:  * recognized as a host CDROM.
  446:  */
  447: static BlockDriver *find_hdev_driver(const char *filename)
  448: {
  449:     int score_max = 0, score;
  450:     BlockDriver *drv = NULL, *d;
  451: 
  452:     QLIST_FOREACH(d, &bdrv_drivers, list) {
  453:         if (d->bdrv_probe_device) {
  454:             score = d->bdrv_probe_device(filename);
  455:             if (score > score_max) {
  456:                 score_max = score;
  457:                 drv = d;
  458:             }
  459:         }
  460:     }
  461: 
  462:     return drv;
  463: }
  464: 
  465: BlockDriver *bdrv_find_protocol(const char *filename)
  466: {
  467:     BlockDriver *drv1;
  468:     char protocol[128];
  469:     int len;
  470:     const char *p;
  471: 
  472:     /* TODO Drivers without bdrv_file_open must be specified explicitly */
  473: 
  474:     /*
  475:      * XXX(hch): we really should not let host device detection
  476:      * override an explicit protocol specification, but moving this
  477:      * later breaks access to device names with colons in them.
  478:      * Thanks to the brain-dead persistent naming schemes on udev-
  479:      * based Linux systems those actually are quite common.
  480:      */
  481:     drv1 = find_hdev_driver(filename);
  482:     if (drv1) {
  483:         return drv1;
  484:     }
  485: 
  486:     if (!path_has_protocol(filename)) {
  487:         return bdrv_find_format("file");
  488:     }
  489:     p = strchr(filename, ':');
  490:     assert(p != NULL);
  491:     len = p - filename;
  492:     if (len > sizeof(protocol) - 1)
  493:         len = sizeof(protocol) - 1;
  494:     memcpy(protocol, filename, len);
  495:     protocol[len] = '\0';
  496:     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
  497:         if (drv1->protocol_name &&
  498:             !strcmp(drv1->protocol_name, protocol)) {
  499:             return drv1;
  500:         }
  501:     }
  502:     return NULL;
  503: }
  504: 
  505: static int find_image_format(const char *filename, BlockDriver **pdrv)
  506: {
  507:     int ret, score, score_max;
  508:     BlockDriver *drv1, *drv;
  509:     uint8_t buf[2048];
  510:     BlockDriverState *bs;
  511: 
  512:     ret = bdrv_file_open(&bs, filename, 0);
  513:     if (ret < 0) {
  514:         *pdrv = NULL;
  515:         return ret;
  516:     }
  517: 
  518:     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
  519:     if (bs->sg || !bdrv_is_inserted(bs)) {
  520:         bdrv_delete(bs);
  521:         drv = bdrv_find_format("raw");
  522:         if (!drv) {
  523:             ret = -ENOENT;
  524:         }
  525:         *pdrv = drv;
  526:         return ret;
  527:     }
  528: 
  529:     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
  530:     bdrv_delete(bs);
  531:     if (ret < 0) {
  532:         *pdrv = NULL;
  533:         return ret;
  534:     }
  535: 
  536:     score_max = 0;
  537:     drv = NULL;
  538:     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
  539:         if (drv1->bdrv_probe) {
  540:             score = drv1->bdrv_probe(buf, ret, filename);
  541:             if (score > score_max) {
  542:                 score_max = score;
  543:                 drv = drv1;
  544:             }
  545:         }
  546:     }
  547:     if (!drv) {
  548:         ret = -ENOENT;
  549:     }
  550:     *pdrv = drv;
  551:     return ret;
  552: }
  553: 
  554: /**
  555:  * Set the current 'total_sectors' value
  556:  */
  557: static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
  558: {
  559:     BlockDriver *drv = bs->drv;
  560: 
  561:     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
  562:     if (bs->sg)
  563:         return 0;
  564: 
  565:     /* query actual device if possible, otherwise just trust the hint */
  566:     if (drv->bdrv_getlength) {
  567:         int64_t length = drv->bdrv_getlength(bs);
  568:         if (length < 0) {
  569:             return length;
  570:         }
  571:         hint = length >> BDRV_SECTOR_BITS;
  572:     }
  573: 
  574:     bs->total_sectors = hint;
  575:     return 0;
  576: }
  577: 
  578: /**
  579:  * Set open flags for a given cache mode
  580:  *
  581:  * Return 0 on success, -1 if the cache mode was invalid.
  582:  */
  583: int bdrv_parse_cache_flags(const char *mode, int *flags)
  584: {
  585:     *flags &= ~BDRV_O_CACHE_MASK;
  586: 
  587:     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
  588:         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
  589:     } else if (!strcmp(mode, "directsync")) {
  590:         *flags |= BDRV_O_NOCACHE;
  591:     } else if (!strcmp(mode, "writeback")) {
  592:         *flags |= BDRV_O_CACHE_WB;
  593:     } else if (!strcmp(mode, "unsafe")) {
  594:         *flags |= BDRV_O_CACHE_WB;
  595:         *flags |= BDRV_O_NO_FLUSH;
  596:     } else if (!strcmp(mode, "writethrough")) {
  597:         /* this is the default */
  598:     } else {
  599:         return -1;
  600:     }
  601: 
  602:     return 0;
  603: }
  604: 
  605: /**
  606:  * The copy-on-read flag is actually a reference count so multiple users may
  607:  * use the feature without worrying about clobbering its previous state.
  608:  * Copy-on-read stays enabled until all users have called to disable it.
  609:  */
  610: void bdrv_enable_copy_on_read(BlockDriverState *bs)
  611: {
  612:     bs->copy_on_read++;
  613: }
  614: 
  615: void bdrv_disable_copy_on_read(BlockDriverState *bs)
  616: {
  617:     assert(bs->copy_on_read > 0);
  618:     bs->copy_on_read--;
  619: }
  620: 
  621: /*
  622:  * Common part for opening disk images and files
  623:  */
  624: static int bdrv_open_common(BlockDriverState *bs, const char *filename,
  625:     int flags, BlockDriver *drv)
  626: {
  627:     int ret, open_flags;
  628: 
  629:     assert(drv != NULL);
  630:     assert(bs->file == NULL);
  631: 
  632:     trace_bdrv_open_common(bs, filename, flags, drv->format_name);
  633: 
  634:     bs->open_flags = flags;
  635:     bs->buffer_alignment = 512;
  636: 
  637:     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
  638:     if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
  639:         bdrv_enable_copy_on_read(bs);
  640:     }
  641: 
  642:     pstrcpy(bs->filename, sizeof(bs->filename), filename);
  643: 
  644:     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
  645:         return -ENOTSUP;
  646:     }
  647: 
  648:     bs->drv = drv;
  649:     bs->opaque = g_malloc0(drv->instance_size);
  650: 
  651:     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
  652: 
  653:     /*
  654:      * Clear flags that are internal to the block layer before opening the
  655:      * image.
  656:      */
  657:     open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
  658: 
  659:     /*
  660:      * Snapshots should be writable.
  661:      */
  662:     if (bs->is_temporary) {
  663:         open_flags |= BDRV_O_RDWR;
  664:     }
  665: 
  666:     bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
  667: 
  668:     /* Open the image, either directly or using a protocol */
  669:     if (drv->bdrv_file_open) {
  670:         ret = drv->bdrv_file_open(bs, filename, open_flags);
  671:     } else {
  672:         ret = bdrv_file_open(&bs->file, filename, open_flags);
  673:         if (ret >= 0) {
  674:             ret = drv->bdrv_open(bs, open_flags);
  675:         }
  676:     }
  677: 
  678:     if (ret < 0) {
  679:         goto free_and_fail;
  680:     }
  681: 
  682:     ret = refresh_total_sectors(bs, bs->total_sectors);
  683:     if (ret < 0) {
  684:         goto free_and_fail;
  685:     }
  686: 
  687: #ifndef _WIN32
  688:     if (bs->is_temporary) {
  689:         unlink(filename);
  690:     }
  691: #endif
  692:     return 0;
  693: 
  694: free_and_fail:
  695:     if (bs->file) {
  696:         bdrv_delete(bs->file);
  697:         bs->file = NULL;
  698:     }
  699:     g_free(bs->opaque);
  700:     bs->opaque = NULL;
  701:     bs->drv = NULL;
  702:     return ret;
  703: }
  704: 
  705: /*
  706:  * Opens a file using a protocol (file, host_device, nbd, ...)
  707:  */
  708: int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
  709: {
  710:     BlockDriverState *bs;
  711:     BlockDriver *drv;
  712:     int ret;
  713: 
  714:     drv = bdrv_find_protocol(filename);
  715:     if (!drv) {
  716:         return -ENOENT;
  717:     }
  718: 
  719:     bs = bdrv_new("");
  720:     ret = bdrv_open_common(bs, filename, flags, drv);
  721:     if (ret < 0) {
  722:         bdrv_delete(bs);
  723:         return ret;
  724:     }
  725:     bs->growable = 1;
  726:     *pbs = bs;
  727:     return 0;
  728: }
  729: 
  730: /*
  731:  * Opens a disk image (raw, qcow2, vmdk, ...)
  732:  */
  733: int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
  734:               BlockDriver *drv)
  735: {
  736:     int ret;
  737:     char tmp_filename[PATH_MAX];
  738: 
  739:     if (flags & BDRV_O_SNAPSHOT) {
  740:         BlockDriverState *bs1;
  741:         int64_t total_size;
  742:         int is_protocol = 0;
  743:         BlockDriver *bdrv_qcow2;
  744:         QEMUOptionParameter *options;
  745:         char backing_filename[PATH_MAX];
  746: 
  747:         /* if snapshot, we create a temporary backing file and open it
  748:            instead of opening 'filename' directly */
  749: 
  750:         /* if there is a backing file, use it */
  751:         bs1 = bdrv_new("");
  752:         ret = bdrv_open(bs1, filename, 0, drv);
  753:         if (ret < 0) {
  754:             bdrv_delete(bs1);
  755:             return ret;
  756:         }
  757:         total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
  758: 
  759:         if (bs1->drv && bs1->drv->protocol_name)
  760:             is_protocol = 1;
  761: 
  762:         bdrv_delete(bs1);
  763: 
  764:         ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
  765:         if (ret < 0) {
  766:             return ret;
  767:         }
  768: 
  769:         /* Real path is meaningless for protocols */
  770:         if (is_protocol)
  771:             snprintf(backing_filename, sizeof(backing_filename),
  772:                      "%s", filename);
  773:         else if (!realpath(filename, backing_filename))
  774:             return -errno;
  775: 
  776:         bdrv_qcow2 = bdrv_find_format("qcow2");
  777:         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
  778: 
  779:         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
  780:         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
  781:         if (drv) {
  782:             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
  783:                 drv->format_name);
  784:         }
  785: 
  786:         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
  787:         free_option_parameters(options);
  788:         if (ret < 0) {
  789:             return ret;
  790:         }
  791: 
  792:         filename = tmp_filename;
  793:         drv = bdrv_qcow2;
  794:         bs->is_temporary = 1;
  795:     }
  796: 
  797:     /* Find the right image format driver */
  798:     if (!drv) {
  799:         ret = find_image_format(filename, &drv);
  800:     }
  801: 
  802:     if (!drv) {
  803:         goto unlink_and_fail;
  804:     }
  805: 
  806:     /* Open the image */
  807:     ret = bdrv_open_common(bs, filename, flags, drv);
  808:     if (ret < 0) {
  809:         goto unlink_and_fail;
  810:     }
  811: 
  812:     /* If there is a backing file, use it */
  813:     if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
  814:         char backing_filename[PATH_MAX];
  815:         int back_flags;
  816:         BlockDriver *back_drv = NULL;
  817: 
  818:         bs->backing_hd = bdrv_new("");
  819:         bdrv_get_full_backing_filename(bs, backing_filename,
  820:                                        sizeof(backing_filename));
  821: 
  822:         if (bs->backing_format[0] != '\0') {
  823:             back_drv = bdrv_find_format(bs->backing_format);
  824:         }
  825: 
  826:         /* backing files always opened read-only */
  827:         back_flags =
  828:             flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
  829: 
  830:         ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
  831:         if (ret < 0) {
  832:             bdrv_close(bs);
  833:             return ret;
  834:         }
  835:         if (bs->is_temporary) {
  836:             bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
  837:         } else {
  838:             /* base image inherits from "parent" */
  839:             bs->backing_hd->keep_read_only = bs->keep_read_only;
  840:         }
  841:     }
  842: 
  843:     if (!bdrv_key_required(bs)) {
  844:         bdrv_dev_change_media_cb(bs, true);
  845:     }
  846: 
  847:     /* throttling disk I/O limits */
  848:     if (bs->io_limits_enabled) {
  849:         bdrv_io_limits_enable(bs);
  850:     }
  851: 
  852:     return 0;
  853: 
  854: unlink_and_fail:
  855:     if (bs->is_temporary) {
  856:         unlink(filename);
  857:     }
  858:     return ret;
  859: }
  860: 
  861: void bdrv_close(BlockDriverState *bs)
  862: {
  863:     bdrv_flush(bs);
  864:     if (bs->drv) {
  865:         if (bs->job) {
  866:             block_job_cancel_sync(bs->job);
  867:         }
  868:         bdrv_drain_all();
  869: 
  870:         if (bs == bs_snapshots) {
  871:             bs_snapshots = NULL;
  872:         }
  873:         if (bs->backing_hd) {
  874:             bdrv_delete(bs->backing_hd);
  875:             bs->backing_hd = NULL;
  876:         }
  877:         bs->drv->bdrv_close(bs);
  878:         g_free(bs->opaque);
  879: #ifdef _WIN32
  880:         if (bs->is_temporary) {
  881:             unlink(bs->filename);
  882:         }
  883: #endif
  884:         bs->opaque = NULL;
  885:         bs->drv = NULL;
  886:         bs->copy_on_read = 0;
  887:         bs->backing_file[0] = '\0';
  888:         bs->backing_format[0] = '\0';
  889:         bs->total_sectors = 0;
  890:         bs->encrypted = 0;
  891:         bs->valid_key = 0;
  892:         bs->sg = 0;
  893:         bs->growable = 0;
  894: 
  895:         if (bs->file != NULL) {
  896:             bdrv_delete(bs->file);
  897:             bs->file = NULL;
  898:         }
  899: 
  900:         bdrv_dev_change_media_cb(bs, false);
  901:     }
  902: 
  903:     /*throttling disk I/O limits*/
  904:     if (bs->io_limits_enabled) {
  905:         bdrv_io_limits_disable(bs);
  906:     }
  907: }
  908: 
  909: void bdrv_close_all(void)
  910: {
  911:     BlockDriverState *bs;
  912: 
  913:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
  914:         bdrv_close(bs);
  915:     }
  916: }
  917: 
  918: /*
  919:  * Wait for pending requests to complete across all BlockDriverStates
  920:  *
  921:  * This function does not flush data to disk, use bdrv_flush_all() for that
  922:  * after calling this function.
  923:  *
  924:  * Note that completion of an asynchronous I/O operation can trigger any
  925:  * number of other I/O operations on other devices---for example a coroutine
  926:  * can be arbitrarily complex and a constant flow of I/O can come until the
  927:  * coroutine is complete.  Because of this, it is not possible to have a
  928:  * function to drain a single device's I/O queue.
  929:  */
  930: void bdrv_drain_all(void)
  931: {
  932:     BlockDriverState *bs;
  933:     bool busy;
  934: 
  935:     do {
  936:         busy = qemu_aio_wait();
  937: 
  938:         /* FIXME: We do not have timer support here, so this is effectively
  939:          * a busy wait.
  940:          */
  941:         QTAILQ_FOREACH(bs, &bdrv_states, list) {
  942:             if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
  943:                 qemu_co_queue_restart_all(&bs->throttled_reqs);
  944:                 busy = true;
  945:             }
  946:         }
  947:     } while (busy);
  948: 
  949:     /* If requests are still pending there is a bug somewhere */
  950:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
  951:         assert(QLIST_EMPTY(&bs->tracked_requests));
  952:         assert(qemu_co_queue_empty(&bs->throttled_reqs));
  953:     }
  954: }
  955: 
  956: /* make a BlockDriverState anonymous by removing from bdrv_state list.
  957:    Also, NULL terminate the device_name to prevent double remove */
  958: void bdrv_make_anon(BlockDriverState *bs)
  959: {
  960:     if (bs->device_name[0] != '\0') {
  961:         QTAILQ_REMOVE(&bdrv_states, bs, list);
  962:     }
  963:     bs->device_name[0] = '\0';
  964: }
  965: 
  966: static void bdrv_rebind(BlockDriverState *bs)
  967: {
  968:     if (bs->drv && bs->drv->bdrv_rebind) {
  969:         bs->drv->bdrv_rebind(bs);
  970:     }
  971: }
  972: 
  973: /*
  974:  * Add new bs contents at the top of an image chain while the chain is
  975:  * live, while keeping required fields on the top layer.
  976:  *
  977:  * This will modify the BlockDriverState fields, and swap contents
  978:  * between bs_new and bs_top. Both bs_new and bs_top are modified.
  979:  *
  980:  * bs_new is required to be anonymous.
  981:  *
  982:  * This function does not create any image files.
  983:  */
  984: void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
  985: {
  986:     BlockDriverState tmp;
  987: 
  988:     /* bs_new must be anonymous */
  989:     assert(bs_new->device_name[0] == '\0');
  990: 
  991:     tmp = *bs_new;
  992: 
  993:     /* there are some fields that need to stay on the top layer: */
  994:     tmp.open_flags        = bs_top->open_flags;
  995: 
  996:     /* dev info */
  997:     tmp.dev_ops           = bs_top->dev_ops;
  998:     tmp.dev_opaque        = bs_top->dev_opaque;
  999:     tmp.dev               = bs_top->dev;
 1000:     tmp.buffer_alignment  = bs_top->buffer_alignment;
 1001:     tmp.copy_on_read      = bs_top->copy_on_read;
 1002: 
 1003:     /* i/o timing parameters */
 1004:     tmp.slice_time        = bs_top->slice_time;
 1005:     tmp.slice_start       = bs_top->slice_start;
 1006:     tmp.slice_end         = bs_top->slice_end;
 1007:     tmp.io_limits         = bs_top->io_limits;
 1008:     tmp.io_base           = bs_top->io_base;
 1009:     tmp.throttled_reqs    = bs_top->throttled_reqs;
 1010:     tmp.block_timer       = bs_top->block_timer;
 1011:     tmp.io_limits_enabled = bs_top->io_limits_enabled;
 1012: 
 1013:     /* geometry */
 1014:     tmp.cyls              = bs_top->cyls;
 1015:     tmp.heads             = bs_top->heads;
 1016:     tmp.secs              = bs_top->secs;
 1017:     tmp.translation       = bs_top->translation;
 1018: 
 1019:     /* r/w error */
 1020:     tmp.on_read_error     = bs_top->on_read_error;
 1021:     tmp.on_write_error    = bs_top->on_write_error;
 1022: 
 1023:     /* i/o status */
 1024:     tmp.iostatus_enabled  = bs_top->iostatus_enabled;
 1025:     tmp.iostatus          = bs_top->iostatus;
 1026: 
 1027:     /* keep the same entry in bdrv_states */
 1028:     pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
 1029:     tmp.list = bs_top->list;
 1030: 
 1031:     /* The contents of 'tmp' will become bs_top, as we are
 1032:      * swapping bs_new and bs_top contents. */
 1033:     tmp.backing_hd = bs_new;
 1034:     pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
 1035:     bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
 1036: 
 1037:     /* swap contents of the fixed new bs and the current top */
 1038:     *bs_new = *bs_top;
 1039:     *bs_top = tmp;
 1040: 
 1041:     /* device_name[] was carried over from the old bs_top.  bs_new
 1042:      * shouldn't be in bdrv_states, so we need to make device_name[]
 1043:      * reflect the anonymity of bs_new
 1044:      */
 1045:     bs_new->device_name[0] = '\0';
 1046: 
 1047:     /* clear the copied fields in the new backing file */
 1048:     bdrv_detach_dev(bs_new, bs_new->dev);
 1049: 
 1050:     qemu_co_queue_init(&bs_new->throttled_reqs);
 1051:     memset(&bs_new->io_base,   0, sizeof(bs_new->io_base));
 1052:     memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
 1053:     bdrv_iostatus_disable(bs_new);
 1054: 
 1055:     /* we don't use bdrv_io_limits_disable() for this, because we don't want
 1056:      * to affect or delete the block_timer, as it has been moved to bs_top */
 1057:     bs_new->io_limits_enabled = false;
 1058:     bs_new->block_timer       = NULL;
 1059:     bs_new->slice_time        = 0;
 1060:     bs_new->slice_start       = 0;
 1061:     bs_new->slice_end         = 0;
 1062: 
 1063:     bdrv_rebind(bs_new);
 1064:     bdrv_rebind(bs_top);
 1065: }
 1066: 
 1067: void bdrv_delete(BlockDriverState *bs)
 1068: {
 1069:     assert(!bs->dev);
 1070:     assert(!bs->job);
 1071:     assert(!bs->in_use);
 1072: 
 1073:     /* remove from list, if necessary */
 1074:     bdrv_make_anon(bs);
 1075: 
 1076:     bdrv_close(bs);
 1077: 
 1078:     assert(bs != bs_snapshots);
 1079:     g_free(bs);
 1080: }
 1081: 
 1082: int bdrv_attach_dev(BlockDriverState *bs, void *dev)
 1083: /* TODO change to DeviceState *dev when all users are qdevified */
 1084: {
 1085:     if (bs->dev) {
 1086:         return -EBUSY;
 1087:     }
 1088:     bs->dev = dev;
 1089:     bdrv_iostatus_reset(bs);
 1090:     return 0;
 1091: }
 1092: 
 1093: /* TODO qdevified devices don't use this, remove when devices are qdevified */
 1094: void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
 1095: {
 1096:     if (bdrv_attach_dev(bs, dev) < 0) {
 1097:         abort();
 1098:     }
 1099: }
 1100: 
 1101: void bdrv_detach_dev(BlockDriverState *bs, void *dev)
 1102: /* TODO change to DeviceState *dev when all users are qdevified */
 1103: {
 1104:     assert(bs->dev == dev);
 1105:     bs->dev = NULL;
 1106:     bs->dev_ops = NULL;
 1107:     bs->dev_opaque = NULL;
 1108:     bs->buffer_alignment = 512;
 1109: }
 1110: 
 1111: /* TODO change to return DeviceState * when all users are qdevified */
 1112: void *bdrv_get_attached_dev(BlockDriverState *bs)
 1113: {
 1114:     return bs->dev;
 1115: }
 1116: 
 1117: void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
 1118:                       void *opaque)
 1119: {
 1120:     bs->dev_ops = ops;
 1121:     bs->dev_opaque = opaque;
 1122:     if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
 1123:         bs_snapshots = NULL;
 1124:     }
 1125: }
 1126: 
 1127: void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
 1128:                                BlockQMPEventAction action, int is_read)
 1129: {
 1130:     QObject *data;
 1131:     const char *action_str;
 1132: 
 1133:     switch (action) {
 1134:     case BDRV_ACTION_REPORT:
 1135:         action_str = "report";
 1136:         break;
 1137:     case BDRV_ACTION_IGNORE:
 1138:         action_str = "ignore";
 1139:         break;
 1140:     case BDRV_ACTION_STOP:
 1141:         action_str = "stop";
 1142:         break;
 1143:     default:
 1144:         abort();
 1145:     }
 1146: 
 1147:     data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
 1148:                               bdrv->device_name,
 1149:                               action_str,
 1150:                               is_read ? "read" : "write");
 1151:     monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
 1152: 
 1153:     qobject_decref(data);
 1154: }
 1155: 
 1156: static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
 1157: {
 1158:     QObject *data;
 1159: 
 1160:     data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
 1161:                               bdrv_get_device_name(bs), ejected);
 1162:     monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
 1163: 
 1164:     qobject_decref(data);
 1165: }
 1166: 
 1167: static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
 1168: {
 1169:     if (bs->dev_ops && bs->dev_ops->change_media_cb) {
 1170:         bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
 1171:         bs->dev_ops->change_media_cb(bs->dev_opaque, load);
 1172:         if (tray_was_closed) {
 1173:             /* tray open */
 1174:             bdrv_emit_qmp_eject_event(bs, true);
 1175:         }
 1176:         if (load) {
 1177:             /* tray close */
 1178:             bdrv_emit_qmp_eject_event(bs, false);
 1179:         }
 1180:     }
 1181: }
 1182: 
 1183: bool bdrv_dev_has_removable_media(BlockDriverState *bs)
 1184: {
 1185:     return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
 1186: }
 1187: 
 1188: void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
 1189: {
 1190:     if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
 1191:         bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
 1192:     }
 1193: }
 1194: 
 1195: bool bdrv_dev_is_tray_open(BlockDriverState *bs)
 1196: {
 1197:     if (bs->dev_ops && bs->dev_ops->is_tray_open) {
 1198:         return bs->dev_ops->is_tray_open(bs->dev_opaque);
 1199:     }
 1200:     return false;
 1201: }
 1202: 
 1203: static void bdrv_dev_resize_cb(BlockDriverState *bs)
 1204: {
 1205:     if (bs->dev_ops && bs->dev_ops->resize_cb) {
 1206:         bs->dev_ops->resize_cb(bs->dev_opaque);
 1207:     }
 1208: }
 1209: 
 1210: bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
 1211: {
 1212:     if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
 1213:         return bs->dev_ops->is_medium_locked(bs->dev_opaque);
 1214:     }
 1215:     return false;
 1216: }
 1217: 
 1218: /*
 1219:  * Run consistency checks on an image
 1220:  *
 1221:  * Returns 0 if the check could be completed (it doesn't mean that the image is
 1222:  * free of errors) or -errno when an internal error occurred. The results of the
 1223:  * check are stored in res.
 1224:  */
 1225: int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
 1226: {
 1227:     if (bs->drv->bdrv_check == NULL) {
 1228:         return -ENOTSUP;
 1229:     }
 1230: 
 1231:     memset(res, 0, sizeof(*res));
 1232:     return bs->drv->bdrv_check(bs, res);
 1233: }
 1234: 
 1235: #define COMMIT_BUF_SECTORS 2048
 1236: 
 1237: /* commit COW file into the raw image */
 1238: int bdrv_commit(BlockDriverState *bs)
 1239: {
 1240:     BlockDriver *drv = bs->drv;
 1241:     BlockDriver *backing_drv;
 1242:     int64_t sector, total_sectors;
 1243:     int n, ro, open_flags;
 1244:     int ret = 0, rw_ret = 0;
 1245:     uint8_t *buf;
 1246:     char filename[1024];
 1247:     BlockDriverState *bs_rw, *bs_ro;
 1248: 
 1249:     if (!drv)
 1250:         return -ENOMEDIUM;
 1251:     
 1252:     if (!bs->backing_hd) {
 1253:         return -ENOTSUP;
 1254:     }
 1255: 
 1256:     if (bs->backing_hd->keep_read_only) {
 1257:         return -EACCES;
 1258:     }
 1259: 
 1260:     if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
 1261:         return -EBUSY;
 1262:     }
 1263: 
 1264:     backing_drv = bs->backing_hd->drv;
 1265:     ro = bs->backing_hd->read_only;
 1266:     strncpy(filename, bs->backing_hd->filename, sizeof(filename));
 1267:     open_flags =  bs->backing_hd->open_flags;
 1268: 
 1269:     if (ro) {
 1270:         /* re-open as RW */
 1271:         bdrv_delete(bs->backing_hd);
 1272:         bs->backing_hd = NULL;
 1273:         bs_rw = bdrv_new("");
 1274:         rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
 1275:             backing_drv);
 1276:         if (rw_ret < 0) {
 1277:             bdrv_delete(bs_rw);
 1278:             /* try to re-open read-only */
 1279:             bs_ro = bdrv_new("");
 1280:             ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
 1281:                 backing_drv);
 1282:             if (ret < 0) {
 1283:                 bdrv_delete(bs_ro);
 1284:                 /* drive not functional anymore */
 1285:                 bs->drv = NULL;
 1286:                 return ret;
 1287:             }
 1288:             bs->backing_hd = bs_ro;
 1289:             return rw_ret;
 1290:         }
 1291:         bs->backing_hd = bs_rw;
 1292:     }
 1293: 
 1294:     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
 1295:     buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
 1296: 
 1297:     for (sector = 0; sector < total_sectors; sector += n) {
 1298:         if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
 1299: 
 1300:             if (bdrv_read(bs, sector, buf, n) != 0) {
 1301:                 ret = -EIO;
 1302:                 goto ro_cleanup;
 1303:             }
 1304: 
 1305:             if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
 1306:                 ret = -EIO;
 1307:                 goto ro_cleanup;
 1308:             }
 1309:         }
 1310:     }
 1311: 
 1312:     if (drv->bdrv_make_empty) {
 1313:         ret = drv->bdrv_make_empty(bs);
 1314:         bdrv_flush(bs);
 1315:     }
 1316: 
 1317:     /*
 1318:      * Make sure all data we wrote to the backing device is actually
 1319:      * stable on disk.
 1320:      */
 1321:     if (bs->backing_hd)
 1322:         bdrv_flush(bs->backing_hd);
 1323: 
 1324: ro_cleanup:
 1325:     g_free(buf);
 1326: 
 1327:     if (ro) {
 1328:         /* re-open as RO */
 1329:         bdrv_delete(bs->backing_hd);
 1330:         bs->backing_hd = NULL;
 1331:         bs_ro = bdrv_new("");
 1332:         ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
 1333:             backing_drv);
 1334:         if (ret < 0) {
 1335:             bdrv_delete(bs_ro);
 1336:             /* drive not functional anymore */
 1337:             bs->drv = NULL;
 1338:             return ret;
 1339:         }
 1340:         bs->backing_hd = bs_ro;
 1341:         bs->backing_hd->keep_read_only = 0;
 1342:     }
 1343: 
 1344:     return ret;
 1345: }
 1346: 
 1347: int bdrv_commit_all(void)
 1348: {
 1349:     BlockDriverState *bs;
 1350: 
 1351:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
 1352:         int ret = bdrv_commit(bs);
 1353:         if (ret < 0) {
 1354:             return ret;
 1355:         }
 1356:     }
 1357:     return 0;
 1358: }
 1359: 
 1360: struct BdrvTrackedRequest {
 1361:     BlockDriverState *bs;
 1362:     int64_t sector_num;
 1363:     int nb_sectors;
 1364:     bool is_write;
 1365:     QLIST_ENTRY(BdrvTrackedRequest) list;
 1366:     Coroutine *co; /* owner, used for deadlock detection */
 1367:     CoQueue wait_queue; /* coroutines blocked on this request */
 1368: };
 1369: 
 1370: /**
 1371:  * Remove an active request from the tracked requests list
 1372:  *
 1373:  * This function should be called when a tracked request is completing.
 1374:  */
 1375: static void tracked_request_end(BdrvTrackedRequest *req)
 1376: {
 1377:     QLIST_REMOVE(req, list);
 1378:     qemu_co_queue_restart_all(&req->wait_queue);
 1379: }
 1380: 
 1381: /**
 1382:  * Add an active request to the tracked requests list
 1383:  */
 1384: static void tracked_request_begin(BdrvTrackedRequest *req,
 1385:                                   BlockDriverState *bs,
 1386:                                   int64_t sector_num,
 1387:                                   int nb_sectors, bool is_write)
 1388: {
 1389:     *req = (BdrvTrackedRequest){
 1390:         .bs = bs,
 1391:         .sector_num = sector_num,
 1392:         .nb_sectors = nb_sectors,
 1393:         .is_write = is_write,
 1394:         .co = qemu_coroutine_self(),
 1395:     };
 1396: 
 1397:     qemu_co_queue_init(&req->wait_queue);
 1398: 
 1399:     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
 1400: }
 1401: 
 1402: /**
 1403:  * Round a region to cluster boundaries
 1404:  */
 1405: static void round_to_clusters(BlockDriverState *bs,
 1406:                               int64_t sector_num, int nb_sectors,
 1407:                               int64_t *cluster_sector_num,
 1408:                               int *cluster_nb_sectors)
 1409: {
 1410:     BlockDriverInfo bdi;
 1411: 
 1412:     if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
 1413:         *cluster_sector_num = sector_num;
 1414:         *cluster_nb_sectors = nb_sectors;
 1415:     } else {
 1416:         int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
 1417:         *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
 1418:         *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
 1419:                                             nb_sectors, c);
 1420:     }
 1421: }
 1422: 
 1423: static bool tracked_request_overlaps(BdrvTrackedRequest *req,
 1424:                                      int64_t sector_num, int nb_sectors) {
 1425:     /*        aaaa   bbbb */
 1426:     if (sector_num >= req->sector_num + req->nb_sectors) {
 1427:         return false;
 1428:     }
 1429:     /* bbbb   aaaa        */
 1430:     if (req->sector_num >= sector_num + nb_sectors) {
 1431:         return false;
 1432:     }
 1433:     return true;
 1434: }
 1435: 
 1436: static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
 1437:         int64_t sector_num, int nb_sectors)
 1438: {
 1439:     BdrvTrackedRequest *req;
 1440:     int64_t cluster_sector_num;
 1441:     int cluster_nb_sectors;
 1442:     bool retry;
 1443: 
 1444:     /* If we touch the same cluster it counts as an overlap.  This guarantees
 1445:      * that allocating writes will be serialized and not race with each other
 1446:      * for the same cluster.  For example, in copy-on-read it ensures that the
 1447:      * CoR read and write operations are atomic and guest writes cannot
 1448:      * interleave between them.
 1449:      */
 1450:     round_to_clusters(bs, sector_num, nb_sectors,
 1451:                       &cluster_sector_num, &cluster_nb_sectors);
 1452: 
 1453:     do {
 1454:         retry = false;
 1455:         QLIST_FOREACH(req, &bs->tracked_requests, list) {
 1456:             if (tracked_request_overlaps(req, cluster_sector_num,
 1457:                                          cluster_nb_sectors)) {
 1458:                 /* Hitting this means there was a reentrant request, for
 1459:                  * example, a block driver issuing nested requests.  This must
 1460:                  * never happen since it means deadlock.
 1461:                  */
 1462:                 assert(qemu_coroutine_self() != req->co);
 1463: 
 1464:                 qemu_co_queue_wait(&req->wait_queue);
 1465:                 retry = true;
 1466:                 break;
 1467:             }
 1468:         }
 1469:     } while (retry);
 1470: }
 1471: 
 1472: /*
 1473:  * Return values:
 1474:  * 0        - success
 1475:  * -EINVAL  - backing format specified, but no file
 1476:  * -ENOSPC  - can't update the backing file because no space is left in the
 1477:  *            image file header
 1478:  * -ENOTSUP - format driver doesn't support changing the backing file
 1479:  */
 1480: int bdrv_change_backing_file(BlockDriverState *bs,
 1481:     const char *backing_file, const char *backing_fmt)
 1482: {
 1483:     BlockDriver *drv = bs->drv;
 1484:     int ret;
 1485: 
 1486:     /* Backing file format doesn't make sense without a backing file */
 1487:     if (backing_fmt && !backing_file) {
 1488:         return -EINVAL;
 1489:     }
 1490: 
 1491:     if (drv->bdrv_change_backing_file != NULL) {
 1492:         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
 1493:     } else {
 1494:         ret = -ENOTSUP;
 1495:     }
 1496: 
 1497:     if (ret == 0) {
 1498:         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
 1499:         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
 1500:     }
 1501:     return ret;
 1502: }
 1503: 
 1504: static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
 1505:                                    size_t size)
 1506: {
 1507:     int64_t len;
 1508: 
 1509:     if (!bdrv_is_inserted(bs))
 1510:         return -ENOMEDIUM;
 1511: 
 1512:     if (bs->growable)
 1513:         return 0;
 1514: 
 1515:     len = bdrv_getlength(bs);
 1516: 
 1517:     if (offset < 0)
 1518:         return -EIO;
 1519: 
 1520:     if ((offset > len) || (len - offset < size))
 1521:         return -EIO;
 1522: 
 1523:     return 0;
 1524: }
 1525: 
 1526: static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
 1527:                               int nb_sectors)
 1528: {
 1529:     return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
 1530:                                    nb_sectors * BDRV_SECTOR_SIZE);
 1531: }
 1532: 
 1533: typedef struct RwCo {
 1534:     BlockDriverState *bs;
 1535:     int64_t sector_num;
 1536:     int nb_sectors;
 1537:     QEMUIOVector *qiov;
 1538:     bool is_write;
 1539:     int ret;
 1540: } RwCo;
 1541: 
 1542: static void coroutine_fn bdrv_rw_co_entry(void *opaque)
 1543: {
 1544:     RwCo *rwco = opaque;
 1545: 
 1546:     if (!rwco->is_write) {
 1547:         rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
 1548:                                      rwco->nb_sectors, rwco->qiov, 0);
 1549:     } else {
 1550:         rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
 1551:                                       rwco->nb_sectors, rwco->qiov, 0);
 1552:     }
 1553: }
 1554: 
 1555: /*
 1556:  * Process a synchronous request using coroutines
 1557:  */
 1558: static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
 1559:                       int nb_sectors, bool is_write)
 1560: {
 1561:     QEMUIOVector qiov;
 1562:     struct iovec iov = {
 1563:         .iov_base = (void *)buf,
 1564:         .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
 1565:     };
 1566:     Coroutine *co;
 1567:     RwCo rwco = {
 1568:         .bs = bs,
 1569:         .sector_num = sector_num,
 1570:         .nb_sectors = nb_sectors,
 1571:         .qiov = &qiov,
 1572:         .is_write = is_write,
 1573:         .ret = NOT_DONE,
 1574:     };
 1575: 
 1576:     qemu_iovec_init_external(&qiov, &iov, 1);
 1577: 
 1578:     /**
 1579:      * In sync call context, when the vcpu is blocked, this throttling timer
 1580:      * will not fire; so the I/O throttling function has to be disabled here
 1581:      * if it has been enabled.
 1582:      */
 1583:     if (bs->io_limits_enabled) {
 1584:         fprintf(stderr, "Disabling I/O throttling on '%s' due "
 1585:                         "to synchronous I/O.\n", bdrv_get_device_name(bs));
 1586:         bdrv_io_limits_disable(bs);
 1587:     }
 1588: 
 1589:     if (qemu_in_coroutine()) {
 1590:         /* Fast-path if already in coroutine context */
 1591:         bdrv_rw_co_entry(&rwco);
 1592:     } else {
 1593:         co = qemu_coroutine_create(bdrv_rw_co_entry);
 1594:         qemu_coroutine_enter(co, &rwco);
 1595:         while (rwco.ret == NOT_DONE) {
 1596:             qemu_aio_wait();
 1597:         }
 1598:     }
 1599:     return rwco.ret;
 1600: }
 1601: 
 1602: /* return < 0 if error. See bdrv_write() for the return codes */
 1603: int bdrv_read(BlockDriverState *bs, int64_t sector_num,
 1604:               uint8_t *buf, int nb_sectors)
 1605: {
 1606:     return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
 1607: }
 1608: 
 1609: #define BITS_PER_LONG  (sizeof(unsigned long) * 8)
 1610: 
 1611: static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
 1612:                              int nb_sectors, int dirty)
 1613: {
 1614:     int64_t start, end;
 1615:     unsigned long val, idx, bit;
 1616: 
 1617:     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
 1618:     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
 1619: 
 1620:     for (; start <= end; start++) {
 1621:         idx = start / BITS_PER_LONG;
 1622:         bit = start % BITS_PER_LONG;
 1623:         val = bs->dirty_bitmap[idx];
 1624:         if (dirty) {
 1625:             if (!(val & (1UL << bit))) {
 1626:                 bs->dirty_count++;
 1627:                 val |= 1UL << bit;
 1628:             }
 1629:         } else {
 1630:             if (val & (1UL << bit)) {
 1631:                 bs->dirty_count--;
 1632:                 val &= ~(1UL << bit);
 1633:             }
 1634:         }
 1635:         bs->dirty_bitmap[idx] = val;
 1636:     }
 1637: }
 1638: 
 1639: /* Return < 0 if error. Important errors are:
 1640:   -EIO         generic I/O error (may happen for all errors)
 1641:   -ENOMEDIUM   No media inserted.
 1642:   -EINVAL      Invalid sector number or nb_sectors
 1643:   -EACCES      Trying to write a read-only device
 1644: */
 1645: int bdrv_write(BlockDriverState *bs, int64_t sector_num,
 1646:                const uint8_t *buf, int nb_sectors)
 1647: {
 1648:     return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
 1649: }
 1650: 
 1651: int bdrv_pread(BlockDriverState *bs, int64_t offset,
 1652:                void *buf, int count1)
 1653: {
 1654:     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
 1655:     int len, nb_sectors, count;
 1656:     int64_t sector_num;
 1657:     int ret;
 1658: 
 1659:     count = count1;
 1660:     /* first read to align to sector start */
 1661:     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
 1662:     if (len > count)
 1663:         len = count;
 1664:     sector_num = offset >> BDRV_SECTOR_BITS;
 1665:     if (len > 0) {
 1666:         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
 1667:             return ret;
 1668:         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
 1669:         count -= len;
 1670:         if (count == 0)
 1671:             return count1;
 1672:         sector_num++;
 1673:         buf += len;
 1674:     }
 1675: 
 1676:     /* read the sectors "in place" */
 1677:     nb_sectors = count >> BDRV_SECTOR_BITS;
 1678:     if (nb_sectors > 0) {
 1679:         if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
 1680:             return ret;
 1681:         sector_num += nb_sectors;
 1682:         len = nb_sectors << BDRV_SECTOR_BITS;
 1683:         buf += len;
 1684:         count -= len;
 1685:     }
 1686: 
 1687:     /* add data from the last sector */
 1688:     if (count > 0) {
 1689:         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
 1690:             return ret;
 1691:         memcpy(buf, tmp_buf, count);
 1692:     }
 1693:     return count1;
 1694: }
 1695: 
 1696: int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
 1697:                 const void *buf, int count1)
 1698: {
 1699:     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
 1700:     int len, nb_sectors, count;
 1701:     int64_t sector_num;
 1702:     int ret;
 1703: 
 1704:     count = count1;
 1705:     /* first write to align to sector start */
 1706:     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
 1707:     if (len > count)
 1708:         len = count;
 1709:     sector_num = offset >> BDRV_SECTOR_BITS;
 1710:     if (len > 0) {
 1711:         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
 1712:             return ret;
 1713:         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
 1714:         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
 1715:             return ret;
 1716:         count -= len;
 1717:         if (count == 0)
 1718:             return count1;
 1719:         sector_num++;
 1720:         buf += len;
 1721:     }
 1722: 
 1723:     /* write the sectors "in place" */
 1724:     nb_sectors = count >> BDRV_SECTOR_BITS;
 1725:     if (nb_sectors > 0) {
 1726:         if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
 1727:             return ret;
 1728:         sector_num += nb_sectors;
 1729:         len = nb_sectors << BDRV_SECTOR_BITS;
 1730:         buf += len;
 1731:         count -= len;
 1732:     }
 1733: 
 1734:     /* add data from the last sector */
 1735:     if (count > 0) {
 1736:         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
 1737:             return ret;
 1738:         memcpy(tmp_buf, buf, count);
 1739:         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
 1740:             return ret;
 1741:     }
 1742:     return count1;
 1743: }
 1744: 
 1745: /*
 1746:  * Writes to the file and ensures that no writes are reordered across this
 1747:  * request (acts as a barrier)
 1748:  *
 1749:  * Returns 0 on success, -errno in error cases.
 1750:  */
 1751: int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
 1752:     const void *buf, int count)
 1753: {
 1754:     int ret;
 1755: 
 1756:     ret = bdrv_pwrite(bs, offset, buf, count);
 1757:     if (ret < 0) {
 1758:         return ret;
 1759:     }
 1760: 
 1761:     /* No flush needed for cache modes that use O_DSYNC */
 1762:     if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
 1763:         bdrv_flush(bs);
 1764:     }
 1765: 
 1766:     return 0;
 1767: }
 1768: 
 1769: static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
 1770:         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 1771: {
 1772:     /* Perform I/O through a temporary buffer so that users who scribble over
 1773:      * their read buffer while the operation is in progress do not end up
 1774:      * modifying the image file.  This is critical for zero-copy guest I/O
 1775:      * where anything might happen inside guest memory.
 1776:      */
 1777:     void *bounce_buffer;
 1778: 
 1779:     BlockDriver *drv = bs->drv;
 1780:     struct iovec iov;
 1781:     QEMUIOVector bounce_qiov;
 1782:     int64_t cluster_sector_num;
 1783:     int cluster_nb_sectors;
 1784:     size_t skip_bytes;
 1785:     int ret;
 1786: 
 1787:     /* Cover entire cluster so no additional backing file I/O is required when
 1788:      * allocating cluster in the image file.
 1789:      */
 1790:     round_to_clusters(bs, sector_num, nb_sectors,
 1791:                       &cluster_sector_num, &cluster_nb_sectors);
 1792: 
 1793:     trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
 1794:                                    cluster_sector_num, cluster_nb_sectors);
 1795: 
 1796:     iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
 1797:     iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
 1798:     qemu_iovec_init_external(&bounce_qiov, &iov, 1);
 1799: 
 1800:     ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
 1801:                              &bounce_qiov);
 1802:     if (ret < 0) {
 1803:         goto err;
 1804:     }
 1805: 
 1806:     if (drv->bdrv_co_write_zeroes &&
 1807:         buffer_is_zero(bounce_buffer, iov.iov_len)) {
 1808:         ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
 1809:                                       cluster_nb_sectors);
 1810:     } else {
 1811:         ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
 1812:                                   &bounce_qiov);
 1813:     }
 1814: 
 1815:     if (ret < 0) {
 1816:         /* It might be okay to ignore write errors for guest requests.  If this
 1817:          * is a deliberate copy-on-read then we don't want to ignore the error.
 1818:          * Simply report it in all cases.
 1819:          */
 1820:         goto err;
 1821:     }
 1822: 
 1823:     skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
 1824:     qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
 1825:                            nb_sectors * BDRV_SECTOR_SIZE);
 1826: 
 1827: err:
 1828:     qemu_vfree(bounce_buffer);
 1829:     return ret;
 1830: }
 1831: 
 1832: /*
 1833:  * Handle a read request in coroutine context
 1834:  */
 1835: static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
 1836:     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
 1837:     BdrvRequestFlags flags)
 1838: {
 1839:     BlockDriver *drv = bs->drv;
 1840:     BdrvTrackedRequest req;
 1841:     int ret;
 1842: 
 1843:     if (!drv) {
 1844:         return -ENOMEDIUM;
 1845:     }
 1846:     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
 1847:         return -EIO;
 1848:     }
 1849: 
 1850:     /* throttling disk read I/O */
 1851:     if (bs->io_limits_enabled) {
 1852:         bdrv_io_limits_intercept(bs, false, nb_sectors);
 1853:     }
 1854: 
 1855:     if (bs->copy_on_read) {
 1856:         flags |= BDRV_REQ_COPY_ON_READ;
 1857:     }
 1858:     if (flags & BDRV_REQ_COPY_ON_READ) {
 1859:         bs->copy_on_read_in_flight++;
 1860:     }
 1861: 
 1862:     if (bs->copy_on_read_in_flight) {
 1863:         wait_for_overlapping_requests(bs, sector_num, nb_sectors);
 1864:     }
 1865: 
 1866:     tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
 1867: 
 1868:     if (flags & BDRV_REQ_COPY_ON_READ) {
 1869:         int pnum;
 1870: 
 1871:         ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
 1872:         if (ret < 0) {
 1873:             goto out;
 1874:         }
 1875: 
 1876:         if (!ret || pnum != nb_sectors) {
 1877:             ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
 1878:             goto out;
 1879:         }
 1880:     }
 1881: 
 1882:     ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
 1883: 
 1884: out:
 1885:     tracked_request_end(&req);
 1886: 
 1887:     if (flags & BDRV_REQ_COPY_ON_READ) {
 1888:         bs->copy_on_read_in_flight--;
 1889:     }
 1890: 
 1891:     return ret;
 1892: }
 1893: 
 1894: int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
 1895:     int nb_sectors, QEMUIOVector *qiov)
 1896: {
 1897:     trace_bdrv_co_readv(bs, sector_num, nb_sectors);
 1898: 
 1899:     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
 1900: }
 1901: 
 1902: int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
 1903:     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 1904: {
 1905:     trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
 1906: 
 1907:     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
 1908:                             BDRV_REQ_COPY_ON_READ);
 1909: }
 1910: 
 1911: static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
 1912:     int64_t sector_num, int nb_sectors)
 1913: {
 1914:     BlockDriver *drv = bs->drv;
 1915:     QEMUIOVector qiov;
 1916:     struct iovec iov;
 1917:     int ret;
 1918: 
 1919:     /* TODO Emulate only part of misaligned requests instead of letting block
 1920:      * drivers return -ENOTSUP and emulate everything */
 1921: 
 1922:     /* First try the efficient write zeroes operation */
 1923:     if (drv->bdrv_co_write_zeroes) {
 1924:         ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
 1925:         if (ret != -ENOTSUP) {
 1926:             return ret;
 1927:         }
 1928:     }
 1929: 
 1930:     /* Fall back to bounce buffer if write zeroes is unsupported */
 1931:     iov.iov_len  = nb_sectors * BDRV_SECTOR_SIZE;
 1932:     iov.iov_base = qemu_blockalign(bs, iov.iov_len);
 1933:     memset(iov.iov_base, 0, iov.iov_len);
 1934:     qemu_iovec_init_external(&qiov, &iov, 1);
 1935: 
 1936:     ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
 1937: 
 1938:     qemu_vfree(iov.iov_base);
 1939:     return ret;
 1940: }
 1941: 
 1942: /*
 1943:  * Handle a write request in coroutine context
 1944:  */
 1945: static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
 1946:     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
 1947:     BdrvRequestFlags flags)
 1948: {
 1949:     BlockDriver *drv = bs->drv;
 1950:     BdrvTrackedRequest req;
 1951:     int ret;
 1952: 
 1953:     if (!bs->drv) {
 1954:         return -ENOMEDIUM;
 1955:     }
 1956:     if (bs->read_only) {
 1957:         return -EACCES;
 1958:     }
 1959:     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
 1960:         return -EIO;
 1961:     }
 1962: 
 1963:     /* throttling disk write I/O */
 1964:     if (bs->io_limits_enabled) {
 1965:         bdrv_io_limits_intercept(bs, true, nb_sectors);
 1966:     }
 1967: 
 1968:     if (bs->copy_on_read_in_flight) {
 1969:         wait_for_overlapping_requests(bs, sector_num, nb_sectors);
 1970:     }
 1971: 
 1972:     tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
 1973: 
 1974:     if (flags & BDRV_REQ_ZERO_WRITE) {
 1975:         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
 1976:     } else {
 1977:         ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
 1978:     }
 1979: 
 1980:     if (bs->dirty_bitmap) {
 1981:         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
 1982:     }
 1983: 
 1984:     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
 1985:         bs->wr_highest_sector = sector_num + nb_sectors - 1;
 1986:     }
 1987: 
 1988:     tracked_request_end(&req);
 1989: 
 1990:     return ret;
 1991: }
 1992: 
 1993: int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
 1994:     int nb_sectors, QEMUIOVector *qiov)
 1995: {
 1996:     trace_bdrv_co_writev(bs, sector_num, nb_sectors);
 1997: 
 1998:     return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
 1999: }
 2000: 
 2001: int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
 2002:                                       int64_t sector_num, int nb_sectors)
 2003: {
 2004:     trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
 2005: 
 2006:     return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
 2007:                              BDRV_REQ_ZERO_WRITE);
 2008: }
 2009: 
 2010: /**
 2011:  * Truncate file to 'offset' bytes (needed only for file protocols)
 2012:  */
 2013: int bdrv_truncate(BlockDriverState *bs, int64_t offset)
 2014: {
 2015:     BlockDriver *drv = bs->drv;
 2016:     int ret;
 2017:     if (!drv)
 2018:         return -ENOMEDIUM;
 2019:     if (!drv->bdrv_truncate)
 2020:         return -ENOTSUP;
 2021:     if (bs->read_only)
 2022:         return -EACCES;
 2023:     if (bdrv_in_use(bs))
 2024:         return -EBUSY;
 2025:     ret = drv->bdrv_truncate(bs, offset);
 2026:     if (ret == 0) {
 2027:         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
 2028:         bdrv_dev_resize_cb(bs);
 2029:     }
 2030:     return ret;
 2031: }
 2032: 
 2033: /**
 2034:  * Length of a allocated file in bytes. Sparse files are counted by actual
 2035:  * allocated space. Return < 0 if error or unknown.
 2036:  */
 2037: int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
 2038: {
 2039:     BlockDriver *drv = bs->drv;
 2040:     if (!drv) {
 2041:         return -ENOMEDIUM;
 2042:     }
 2043:     if (drv->bdrv_get_allocated_file_size) {
 2044:         return drv->bdrv_get_allocated_file_size(bs);
 2045:     }
 2046:     if (bs->file) {
 2047:         return bdrv_get_allocated_file_size(bs->file);
 2048:     }
 2049:     return -ENOTSUP;
 2050: }
 2051: 
 2052: /**
 2053:  * Length of a file in bytes. Return < 0 if error or unknown.
 2054:  */
 2055: int64_t bdrv_getlength(BlockDriverState *bs)
 2056: {
 2057:     BlockDriver *drv = bs->drv;
 2058:     if (!drv)
 2059:         return -ENOMEDIUM;
 2060: 
 2061:     if (bs->growable || bdrv_dev_has_removable_media(bs)) {
 2062:         if (drv->bdrv_getlength) {
 2063:             return drv->bdrv_getlength(bs);
 2064:         }
 2065:     }
 2066:     return bs->total_sectors * BDRV_SECTOR_SIZE;
 2067: }
 2068: 
 2069: /* return 0 as number of sectors if no device present or error */
 2070: void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
 2071: {
 2072:     int64_t length;
 2073:     length = bdrv_getlength(bs);
 2074:     if (length < 0)
 2075:         length = 0;
 2076:     else
 2077:         length = length >> BDRV_SECTOR_BITS;
 2078:     *nb_sectors_ptr = length;
 2079: }
 2080: 
 2081: struct partition {
 2082:         uint8_t boot_ind;           /* 0x80 - active */
 2083:         uint8_t head;               /* starting head */
 2084:         uint8_t sector;             /* starting sector */
 2085:         uint8_t cyl;                /* starting cylinder */
 2086:         uint8_t sys_ind;            /* What partition type */
 2087:         uint8_t end_head;           /* end head */
 2088:         uint8_t end_sector;         /* end sector */
 2089:         uint8_t end_cyl;            /* end cylinder */
 2090:         uint32_t start_sect;        /* starting sector counting from 0 */
 2091:         uint32_t nr_sects;          /* nr of sectors in partition */
 2092: } QEMU_PACKED;
 2093: 
 2094: /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
 2095: static int guess_disk_lchs(BlockDriverState *bs,
 2096:                            int *pcylinders, int *pheads, int *psectors)
 2097: {
 2098:     uint8_t buf[BDRV_SECTOR_SIZE];
 2099:     int ret, i, heads, sectors, cylinders;
 2100:     struct partition *p;
 2101:     uint32_t nr_sects;
 2102:     uint64_t nb_sectors;
 2103:     bool enabled;
 2104: 
 2105:     bdrv_get_geometry(bs, &nb_sectors);
 2106: 
 2107:     /**
 2108:      * The function will be invoked during startup not only in sync I/O mode,
 2109:      * but also in async I/O mode. So the I/O throttling function has to
 2110:      * be disabled temporarily here, not permanently.
 2111:      */
 2112:     enabled = bs->io_limits_enabled;
 2113:     bs->io_limits_enabled = false;
 2114:     ret = bdrv_read(bs, 0, buf, 1);
 2115:     bs->io_limits_enabled = enabled;
 2116:     if (ret < 0)
 2117:         return -1;
 2118:     /* test msdos magic */
 2119:     if (buf[510] != 0x55 || buf[511] != 0xaa)
 2120:         return -1;
 2121:     for(i = 0; i < 4; i++) {
 2122:         p = ((struct partition *)(buf + 0x1be)) + i;
 2123:         nr_sects = le32_to_cpu(p->nr_sects);
 2124:         if (nr_sects && p->end_head) {
 2125:             /* We make the assumption that the partition terminates on
 2126:                a cylinder boundary */
 2127:             heads = p->end_head + 1;
 2128:             sectors = p->end_sector & 63;
 2129:             if (sectors == 0)
 2130:                 continue;
 2131:             cylinders = nb_sectors / (heads * sectors);
 2132:             if (cylinders < 1 || cylinders > 16383)
 2133:                 continue;
 2134:             *pheads = heads;
 2135:             *psectors = sectors;
 2136:             *pcylinders = cylinders;
 2137: #if 0
 2138:             printf("guessed geometry: LCHS=%d %d %d\n",
 2139:                    cylinders, heads, sectors);
 2140: #endif
 2141:             return 0;
 2142:         }
 2143:     }
 2144:     return -1;
 2145: }
 2146: 
 2147: void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
 2148: {
 2149:     int translation, lba_detected = 0;
 2150:     int cylinders, heads, secs;
 2151:     uint64_t nb_sectors;
 2152: 
 2153:     /* if a geometry hint is available, use it */
 2154:     bdrv_get_geometry(bs, &nb_sectors);
 2155:     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
 2156:     translation = bdrv_get_translation_hint(bs);
 2157:     if (cylinders != 0) {
 2158:         *pcyls = cylinders;
 2159:         *pheads = heads;
 2160:         *psecs = secs;
 2161:     } else {
 2162:         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
 2163:             if (heads > 16) {
 2164:                 /* if heads > 16, it means that a BIOS LBA
 2165:                    translation was active, so the default
 2166:                    hardware geometry is OK */
 2167:                 lba_detected = 1;
 2168:                 goto default_geometry;
 2169:             } else {
 2170:                 *pcyls = cylinders;
 2171:                 *pheads = heads;
 2172:                 *psecs = secs;
 2173:                 /* disable any translation to be in sync with
 2174:                    the logical geometry */
 2175:                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
 2176:                     bdrv_set_translation_hint(bs,
 2177:                                               BIOS_ATA_TRANSLATION_NONE);
 2178:                 }
 2179:             }
 2180:         } else {
 2181:         default_geometry:
 2182:             /* if no geometry, use a standard physical disk geometry */
 2183:             cylinders = nb_sectors / (16 * 63);
 2184: 
 2185:             if (cylinders > 16383)
 2186:                 cylinders = 16383;
 2187:             else if (cylinders < 2)
 2188:                 cylinders = 2;
 2189:             *pcyls = cylinders;
 2190:             *pheads = 16;
 2191:             *psecs = 63;
 2192:             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
 2193:                 if ((*pcyls * *pheads) <= 131072) {
 2194:                     bdrv_set_translation_hint(bs,
 2195:                                               BIOS_ATA_TRANSLATION_LARGE);
 2196:                 } else {
 2197:                     bdrv_set_translation_hint(bs,
 2198:                                               BIOS_ATA_TRANSLATION_LBA);
 2199:                 }
 2200:             }
 2201:         }
 2202:         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
 2203:     }
 2204: }
 2205: 
 2206: void bdrv_set_geometry_hint(BlockDriverState *bs,
 2207:                             int cyls, int heads, int secs)
 2208: {
 2209:     bs->cyls = cyls;
 2210:     bs->heads = heads;
 2211:     bs->secs = secs;
 2212: }
 2213: 
 2214: void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
 2215: {
 2216:     bs->translation = translation;
 2217: }
 2218: 
 2219: void bdrv_get_geometry_hint(BlockDriverState *bs,
 2220:                             int *pcyls, int *pheads, int *psecs)
 2221: {
 2222:     *pcyls = bs->cyls;
 2223:     *pheads = bs->heads;
 2224:     *psecs = bs->secs;
 2225: }
 2226: 
 2227: /* throttling disk io limits */
 2228: void bdrv_set_io_limits(BlockDriverState *bs,
 2229:                         BlockIOLimit *io_limits)
 2230: {
 2231:     bs->io_limits = *io_limits;
 2232:     bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
 2233: }
 2234: 
 2235: /* Recognize floppy formats */
 2236: typedef struct FDFormat {
 2237:     FDriveType drive;
 2238:     uint8_t last_sect;
 2239:     uint8_t max_track;
 2240:     uint8_t max_head;
 2241:     FDriveRate rate;
 2242: } FDFormat;
 2243: 
 2244: static const FDFormat fd_formats[] = {
 2245:     /* First entry is default format */
 2246:     /* 1.44 MB 3"1/2 floppy disks */
 2247:     { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
 2248:     { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
 2249:     { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
 2250:     { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
 2251:     { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
 2252:     { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
 2253:     { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
 2254:     { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
 2255:     /* 2.88 MB 3"1/2 floppy disks */
 2256:     { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
 2257:     { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
 2258:     { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
 2259:     { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
 2260:     { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
 2261:     /* 720 kB 3"1/2 floppy disks */
 2262:     { FDRIVE_DRV_144,  9, 80, 1, FDRIVE_RATE_250K, },
 2263:     { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
 2264:     { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
 2265:     { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
 2266:     { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
 2267:     { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
 2268:     /* 1.2 MB 5"1/4 floppy disks */
 2269:     { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
 2270:     { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
 2271:     { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
 2272:     { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
 2273:     { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
 2274:     /* 720 kB 5"1/4 floppy disks */
 2275:     { FDRIVE_DRV_120,  9, 80, 1, FDRIVE_RATE_250K, },
 2276:     { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
 2277:     /* 360 kB 5"1/4 floppy disks */
 2278:     { FDRIVE_DRV_120,  9, 40, 1, FDRIVE_RATE_300K, },
 2279:     { FDRIVE_DRV_120,  9, 40, 0, FDRIVE_RATE_300K, },
 2280:     { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
 2281:     { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
 2282:     /* 320 kB 5"1/4 floppy disks */
 2283:     { FDRIVE_DRV_120,  8, 40, 1, FDRIVE_RATE_250K, },
 2284:     { FDRIVE_DRV_120,  8, 40, 0, FDRIVE_RATE_250K, },
 2285:     /* 360 kB must match 5"1/4 better than 3"1/2... */
 2286:     { FDRIVE_DRV_144,  9, 80, 0, FDRIVE_RATE_250K, },
 2287:     /* end */
 2288:     { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
 2289: };
 2290: 
 2291: void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
 2292:                                    int *max_track, int *last_sect,
 2293:                                    FDriveType drive_in, FDriveType *drive,
 2294:                                    FDriveRate *rate)
 2295: {
 2296:     const FDFormat *parse;
 2297:     uint64_t nb_sectors, size;
 2298:     int i, first_match, match;
 2299: 
 2300:     bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
 2301:     if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
 2302:         /* User defined disk */
 2303:         *rate = FDRIVE_RATE_500K;
 2304:     } else {
 2305:         bdrv_get_geometry(bs, &nb_sectors);
 2306:         match = -1;
 2307:         first_match = -1;
 2308:         for (i = 0; ; i++) {
 2309:             parse = &fd_formats[i];
 2310:             if (parse->drive == FDRIVE_DRV_NONE) {
 2311:                 break;
 2312:             }
 2313:             if (drive_in == parse->drive ||
 2314:                 drive_in == FDRIVE_DRV_NONE) {
 2315:                 size = (parse->max_head + 1) * parse->max_track *
 2316:                     parse->last_sect;
 2317:                 if (nb_sectors == size) {
 2318:                     match = i;
 2319:                     break;
 2320:                 }
 2321:                 if (first_match == -1) {
 2322:                     first_match = i;
 2323:                 }
 2324:             }
 2325:         }
 2326:         if (match == -1) {
 2327:             if (first_match == -1) {
 2328:                 match = 1;
 2329:             } else {
 2330:                 match = first_match;
 2331:             }
 2332:             parse = &fd_formats[match];
 2333:         }
 2334:         *nb_heads = parse->max_head + 1;
 2335:         *max_track = parse->max_track;
 2336:         *last_sect = parse->last_sect;
 2337:         *drive = parse->drive;
 2338:         *rate = parse->rate;
 2339:     }
 2340: }
 2341: 
 2342: int bdrv_get_translation_hint(BlockDriverState *bs)
 2343: {
 2344:     return bs->translation;
 2345: }
 2346: 
 2347: void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
 2348:                        BlockErrorAction on_write_error)
 2349: {
 2350:     bs->on_read_error = on_read_error;
 2351:     bs->on_write_error = on_write_error;
 2352: }
 2353: 
 2354: BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
 2355: {
 2356:     return is_read ? bs->on_read_error : bs->on_write_error;
 2357: }
 2358: 
 2359: int bdrv_is_read_only(BlockDriverState *bs)
 2360: {
 2361:     return bs->read_only;
 2362: }
 2363: 
 2364: int bdrv_is_sg(BlockDriverState *bs)
 2365: {
 2366:     return bs->sg;
 2367: }
 2368: 
 2369: int bdrv_enable_write_cache(BlockDriverState *bs)
 2370: {
 2371:     return bs->enable_write_cache;
 2372: }
 2373: 
 2374: int bdrv_is_encrypted(BlockDriverState *bs)
 2375: {
 2376:     if (bs->backing_hd && bs->backing_hd->encrypted)
 2377:         return 1;
 2378:     return bs->encrypted;
 2379: }
 2380: 
 2381: int bdrv_key_required(BlockDriverState *bs)
 2382: {
 2383:     BlockDriverState *backing_hd = bs->backing_hd;
 2384: 
 2385:     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
 2386:         return 1;
 2387:     return (bs->encrypted && !bs->valid_key);
 2388: }
 2389: 
 2390: int bdrv_set_key(BlockDriverState *bs, const char *key)
 2391: {
 2392:     int ret;
 2393:     if (bs->backing_hd && bs->backing_hd->encrypted) {
 2394:         ret = bdrv_set_key(bs->backing_hd, key);
 2395:         if (ret < 0)
 2396:             return ret;
 2397:         if (!bs->encrypted)
 2398:             return 0;
 2399:     }
 2400:     if (!bs->encrypted) {
 2401:         return -EINVAL;
 2402:     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
 2403:         return -ENOMEDIUM;
 2404:     }
 2405:     ret = bs->drv->bdrv_set_key(bs, key);
 2406:     if (ret < 0) {
 2407:         bs->valid_key = 0;
 2408:     } else if (!bs->valid_key) {
 2409:         bs->valid_key = 1;
 2410:         /* call the change callback now, we skipped it on open */
 2411:         bdrv_dev_change_media_cb(bs, true);
 2412:     }
 2413:     return ret;
 2414: }
 2415: 
 2416: void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
 2417: {
 2418:     if (!bs->drv) {
 2419:         buf[0] = '\0';
 2420:     } else {
 2421:         pstrcpy(buf, buf_size, bs->drv->format_name);
 2422:     }
 2423: }
 2424: 
 2425: void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
 2426:                          void *opaque)
 2427: {
 2428:     BlockDriver *drv;
 2429: 
 2430:     QLIST_FOREACH(drv, &bdrv_drivers, list) {
 2431:         it(opaque, drv->format_name);
 2432:     }
 2433: }
 2434: 
 2435: BlockDriverState *bdrv_find(const char *name)
 2436: {
 2437:     BlockDriverState *bs;
 2438: 
 2439:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
 2440:         if (!strcmp(name, bs->device_name)) {
 2441:             return bs;
 2442:         }
 2443:     }
 2444:     return NULL;
 2445: }
 2446: 
 2447: BlockDriverState *bdrv_next(BlockDriverState *bs)
 2448: {
 2449:     if (!bs) {
 2450:         return QTAILQ_FIRST(&bdrv_states);
 2451:     }
 2452:     return QTAILQ_NEXT(bs, list);
 2453: }
 2454: 
 2455: void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
 2456: {
 2457:     BlockDriverState *bs;
 2458: 
 2459:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
 2460:         it(opaque, bs);
 2461:     }
 2462: }
 2463: 
 2464: const char *bdrv_get_device_name(BlockDriverState *bs)
 2465: {
 2466:     return bs->device_name;
 2467: }
 2468: 
 2469: void bdrv_flush_all(void)
 2470: {
 2471:     BlockDriverState *bs;
 2472: 
 2473:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
 2474:         bdrv_flush(bs);
 2475:     }
 2476: }
 2477: 
 2478: int bdrv_has_zero_init(BlockDriverState *bs)
 2479: {
 2480:     assert(bs->drv);
 2481: 
 2482:     if (bs->drv->bdrv_has_zero_init) {
 2483:         return bs->drv->bdrv_has_zero_init(bs);
 2484:     }
 2485: 
 2486:     return 1;
 2487: }
 2488: 
 2489: typedef struct BdrvCoIsAllocatedData {
 2490:     BlockDriverState *bs;
 2491:     int64_t sector_num;
 2492:     int nb_sectors;
 2493:     int *pnum;
 2494:     int ret;
 2495:     bool done;
 2496: } BdrvCoIsAllocatedData;
 2497: 
 2498: /*
 2499:  * Returns true iff the specified sector is present in the disk image. Drivers
 2500:  * not implementing the functionality are assumed to not support backing files,
 2501:  * hence all their sectors are reported as allocated.
 2502:  *
 2503:  * If 'sector_num' is beyond the end of the disk image the return value is 0
 2504:  * and 'pnum' is set to 0.
 2505:  *
 2506:  * 'pnum' is set to the number of sectors (including and immediately following
 2507:  * the specified sector) that are known to be in the same
 2508:  * allocated/unallocated state.
 2509:  *
 2510:  * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
 2511:  * beyond the end of the disk image it will be clamped.
 2512:  */
 2513: int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
 2514:                                       int nb_sectors, int *pnum)
 2515: {
 2516:     int64_t n;
 2517: 
 2518:     if (sector_num >= bs->total_sectors) {
 2519:         *pnum = 0;
 2520:         return 0;
 2521:     }
 2522: 
 2523:     n = bs->total_sectors - sector_num;
 2524:     if (n < nb_sectors) {
 2525:         nb_sectors = n;
 2526:     }
 2527: 
 2528:     if (!bs->drv->bdrv_co_is_allocated) {
 2529:         *pnum = nb_sectors;
 2530:         return 1;
 2531:     }
 2532: 
 2533:     return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
 2534: }
 2535: 
 2536: /* Coroutine wrapper for bdrv_is_allocated() */
 2537: static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
 2538: {
 2539:     BdrvCoIsAllocatedData *data = opaque;
 2540:     BlockDriverState *bs = data->bs;
 2541: 
 2542:     data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
 2543:                                      data->pnum);
 2544:     data->done = true;
 2545: }
 2546: 
 2547: /*
 2548:  * Synchronous wrapper around bdrv_co_is_allocated().
 2549:  *
 2550:  * See bdrv_co_is_allocated() for details.
 2551:  */
 2552: int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 2553:                       int *pnum)
 2554: {
 2555:     Coroutine *co;
 2556:     BdrvCoIsAllocatedData data = {
 2557:         .bs = bs,
 2558:         .sector_num = sector_num,
 2559:         .nb_sectors = nb_sectors,
 2560:         .pnum = pnum,
 2561:         .done = false,
 2562:     };
 2563: 
 2564:     co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
 2565:     qemu_coroutine_enter(co, &data);
 2566:     while (!data.done) {
 2567:         qemu_aio_wait();
 2568:     }
 2569:     return data.ret;
 2570: }
 2571: 
 2572: BlockInfoList *qmp_query_block(Error **errp)
 2573: {
 2574:     BlockInfoList *head = NULL, *cur_item = NULL;
 2575:     BlockDriverState *bs;
 2576: 
 2577:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
 2578:         BlockInfoList *info = g_malloc0(sizeof(*info));
 2579: 
 2580:         info->value = g_malloc0(sizeof(*info->value));
 2581:         info->value->device = g_strdup(bs->device_name);
 2582:         info->value->type = g_strdup("unknown");
 2583:         info->value->locked = bdrv_dev_is_medium_locked(bs);
 2584:         info->value->removable = bdrv_dev_has_removable_media(bs);
 2585: 
 2586:         if (bdrv_dev_has_removable_media(bs)) {
 2587:             info->value->has_tray_open = true;
 2588:             info->value->tray_open = bdrv_dev_is_tray_open(bs);
 2589:         }
 2590: 
 2591:         if (bdrv_iostatus_is_enabled(bs)) {
 2592:             info->value->has_io_status = true;
 2593:             info->value->io_status = bs->iostatus;
 2594:         }
 2595: 
 2596:         if (bs->drv) {
 2597:             info->value->has_inserted = true;
 2598:             info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
 2599:             info->value->inserted->file = g_strdup(bs->filename);
 2600:             info->value->inserted->ro = bs->read_only;
 2601:             info->value->inserted->drv = g_strdup(bs->drv->format_name);
 2602:             info->value->inserted->encrypted = bs->encrypted;
 2603:             if (bs->backing_file[0]) {
 2604:                 info->value->inserted->has_backing_file = true;
 2605:                 info->value->inserted->backing_file = g_strdup(bs->backing_file);
 2606:             }
 2607: 
 2608:             if (bs->io_limits_enabled) {
 2609:                 info->value->inserted->bps =
 2610:                                bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
 2611:                 info->value->inserted->bps_rd =
 2612:                                bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
 2613:                 info->value->inserted->bps_wr =
 2614:                                bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
 2615:                 info->value->inserted->iops =
 2616:                                bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
 2617:                 info->value->inserted->iops_rd =
 2618:                                bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
 2619:                 info->value->inserted->iops_wr =
 2620:                                bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
 2621:             }
 2622:         }
 2623: 
 2624:         /* XXX: waiting for the qapi to support GSList */
 2625:         if (!cur_item) {
 2626:             head = cur_item = info;
 2627:         } else {
 2628:             cur_item->next = info;
 2629:             cur_item = info;
 2630:         }
 2631:     }
 2632: 
 2633:     return head;
 2634: }
 2635: 
 2636: /* Consider exposing this as a full fledged QMP command */
 2637: static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
 2638: {
 2639:     BlockStats *s;
 2640: 
 2641:     s = g_malloc0(sizeof(*s));
 2642: 
 2643:     if (bs->device_name[0]) {
 2644:         s->has_device = true;
 2645:         s->device = g_strdup(bs->device_name);
 2646:     }
 2647: 
 2648:     s->stats = g_malloc0(sizeof(*s->stats));
 2649:     s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
 2650:     s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
 2651:     s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
 2652:     s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
 2653:     s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
 2654:     s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
 2655:     s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
 2656:     s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
 2657:     s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
 2658: 
 2659:     if (bs->file) {
 2660:         s->has_parent = true;
 2661:         s->parent = qmp_query_blockstat(bs->file, NULL);
 2662:     }
 2663: 
 2664:     return s;
 2665: }
 2666: 
 2667: BlockStatsList *qmp_query_blockstats(Error **errp)
 2668: {
 2669:     BlockStatsList *head = NULL, *cur_item = NULL;
 2670:     BlockDriverState *bs;
 2671: 
 2672:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
 2673:         BlockStatsList *info = g_malloc0(sizeof(*info));
 2674:         info->value = qmp_query_blockstat(bs, NULL);
 2675: 
 2676:         /* XXX: waiting for the qapi to support GSList */
 2677:         if (!cur_item) {
 2678:             head = cur_item = info;
 2679:         } else {
 2680:             cur_item->next = info;
 2681:             cur_item = info;
 2682:         }
 2683:     }
 2684: 
 2685:     return head;
 2686: }
 2687: 
 2688: const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
 2689: {
 2690:     if (bs->backing_hd && bs->backing_hd->encrypted)
 2691:         return bs->backing_file;
 2692:     else if (bs->encrypted)
 2693:         return bs->filename;
 2694:     else
 2695:         return NULL;
 2696: }
 2697: 
 2698: void bdrv_get_backing_filename(BlockDriverState *bs,
 2699:                                char *filename, int filename_size)
 2700: {
 2701:     pstrcpy(filename, filename_size, bs->backing_file);
 2702: }
 2703: 
 2704: int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
 2705:                           const uint8_t *buf, int nb_sectors)
 2706: {
 2707:     BlockDriver *drv = bs->drv;
 2708:     if (!drv)
 2709:         return -ENOMEDIUM;
 2710:     if (!drv->bdrv_write_compressed)
 2711:         return -ENOTSUP;
 2712:     if (bdrv_check_request(bs, sector_num, nb_sectors))
 2713:         return -EIO;
 2714: 
 2715:     if (bs->dirty_bitmap) {
 2716:         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
 2717:     }
 2718: 
 2719:     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
 2720: }
 2721: 
 2722: int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 2723: {
 2724:     BlockDriver *drv = bs->drv;
 2725:     if (!drv)
 2726:         return -ENOMEDIUM;
 2727:     if (!drv->bdrv_get_info)
 2728:         return -ENOTSUP;
 2729:     memset(bdi, 0, sizeof(*bdi));
 2730:     return drv->bdrv_get_info(bs, bdi);
 2731: }
 2732: 
 2733: int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
 2734:                       int64_t pos, int size)
 2735: {
 2736:     BlockDriver *drv = bs->drv;
 2737:     if (!drv)
 2738:         return -ENOMEDIUM;
 2739:     if (drv->bdrv_save_vmstate)
 2740:         return drv->bdrv_save_vmstate(bs, buf, pos, size);
 2741:     if (bs->file)
 2742:         return bdrv_save_vmstate(bs->file, buf, pos, size);
 2743:     return -ENOTSUP;
 2744: }
 2745: 
 2746: int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
 2747:                       int64_t pos, int size)
 2748: {
 2749:     BlockDriver *drv = bs->drv;
 2750:     if (!drv)
 2751:         return -ENOMEDIUM;
 2752:     if (drv->bdrv_load_vmstate)
 2753:         return drv->bdrv_load_vmstate(bs, buf, pos, size);
 2754:     if (bs->file)
 2755:         return bdrv_load_vmstate(bs->file, buf, pos, size);
 2756:     return -ENOTSUP;
 2757: }
 2758: 
 2759: void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
 2760: {
 2761:     BlockDriver *drv = bs->drv;
 2762: 
 2763:     if (!drv || !drv->bdrv_debug_event) {
 2764:         return;
 2765:     }
 2766: 
 2767:     return drv->bdrv_debug_event(bs, event);
 2768: 
 2769: }
 2770: 
 2771: /**************************************************************/
 2772: /* handling of snapshots */
 2773: 
 2774: int bdrv_can_snapshot(BlockDriverState *bs)
 2775: {
 2776:     BlockDriver *drv = bs->drv;
 2777:     if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
 2778:         return 0;
 2779:     }
 2780: 
 2781:     if (!drv->bdrv_snapshot_create) {
 2782:         if (bs->file != NULL) {
 2783:             return bdrv_can_snapshot(bs->file);
 2784:         }
 2785:         return 0;
 2786:     }
 2787: 
 2788:     return 1;
 2789: }
 2790: 
 2791: int bdrv_is_snapshot(BlockDriverState *bs)
 2792: {
 2793:     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
 2794: }
 2795: 
 2796: BlockDriverState *bdrv_snapshots(void)
 2797: {
 2798:     BlockDriverState *bs;
 2799: 
 2800:     if (bs_snapshots) {
 2801:         return bs_snapshots;
 2802:     }
 2803: 
 2804:     bs = NULL;
 2805:     while ((bs = bdrv_next(bs))) {
 2806:         if (bdrv_can_snapshot(bs)) {
 2807:             bs_snapshots = bs;
 2808:             return bs;
 2809:         }
 2810:     }
 2811:     return NULL;
 2812: }
 2813: 
 2814: int bdrv_snapshot_create(BlockDriverState *bs,
 2815:                          QEMUSnapshotInfo *sn_info)
 2816: {
 2817:     BlockDriver *drv = bs->drv;
 2818:     if (!drv)
 2819:         return -ENOMEDIUM;
 2820:     if (drv->bdrv_snapshot_create)
 2821:         return drv->bdrv_snapshot_create(bs, sn_info);
 2822:     if (bs->file)
 2823:         return bdrv_snapshot_create(bs->file, sn_info);
 2824:     return -ENOTSUP;
 2825: }
 2826: 
 2827: int bdrv_snapshot_goto(BlockDriverState *bs,
 2828:                        const char *snapshot_id)
 2829: {
 2830:     BlockDriver *drv = bs->drv;
 2831:     int ret, open_ret;
 2832: 
 2833:     if (!drv)
 2834:         return -ENOMEDIUM;
 2835:     if (drv->bdrv_snapshot_goto)
 2836:         return drv->bdrv_snapshot_goto(bs, snapshot_id);
 2837: 
 2838:     if (bs->file) {
 2839:         drv->bdrv_close(bs);
 2840:         ret = bdrv_snapshot_goto(bs->file, snapshot_id);
 2841:         open_ret = drv->bdrv_open(bs, bs->open_flags);
 2842:         if (open_ret < 0) {
 2843:             bdrv_delete(bs->file);
 2844:             bs->drv = NULL;
 2845:             return open_ret;
 2846:         }
 2847:         return ret;
 2848:     }
 2849: 
 2850:     return -ENOTSUP;
 2851: }
 2852: 
 2853: int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
 2854: {
 2855:     BlockDriver *drv = bs->drv;
 2856:     if (!drv)
 2857:         return -ENOMEDIUM;
 2858:     if (drv->bdrv_snapshot_delete)
 2859:         return drv->bdrv_snapshot_delete(bs, snapshot_id);
 2860:     if (bs->file)
 2861:         return bdrv_snapshot_delete(bs->file, snapshot_id);
 2862:     return -ENOTSUP;
 2863: }
 2864: 
 2865: int bdrv_snapshot_list(BlockDriverState *bs,
 2866:                        QEMUSnapshotInfo **psn_info)
 2867: {
 2868:     BlockDriver *drv = bs->drv;
 2869:     if (!drv)
 2870:         return -ENOMEDIUM;
 2871:     if (drv->bdrv_snapshot_list)
 2872:         return drv->bdrv_snapshot_list(bs, psn_info);
 2873:     if (bs->file)
 2874:         return bdrv_snapshot_list(bs->file, psn_info);
 2875:     return -ENOTSUP;
 2876: }
 2877: 
 2878: int bdrv_snapshot_load_tmp(BlockDriverState *bs,
 2879:         const char *snapshot_name)
 2880: {
 2881:     BlockDriver *drv = bs->drv;
 2882:     if (!drv) {
 2883:         return -ENOMEDIUM;
 2884:     }
 2885:     if (!bs->read_only) {
 2886:         return -EINVAL;
 2887:     }
 2888:     if (drv->bdrv_snapshot_load_tmp) {
 2889:         return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
 2890:     }
 2891:     return -ENOTSUP;
 2892: }
 2893: 
 2894: BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
 2895:         const char *backing_file)
 2896: {
 2897:     if (!bs->drv) {
 2898:         return NULL;
 2899:     }
 2900: 
 2901:     if (bs->backing_hd) {
 2902:         if (strcmp(bs->backing_file, backing_file) == 0) {
 2903:             return bs->backing_hd;
 2904:         } else {
 2905:             return bdrv_find_backing_image(bs->backing_hd, backing_file);
 2906:         }
 2907:     }
 2908: 
 2909:     return NULL;
 2910: }
 2911: 
 2912: #define NB_SUFFIXES 4
 2913: 
 2914: char *get_human_readable_size(char *buf, int buf_size, int64_t size)
 2915: {
 2916:     static const char suffixes[NB_SUFFIXES] = "KMGT";
 2917:     int64_t base;
 2918:     int i;
 2919: 
 2920:     if (size <= 999) {
 2921:         snprintf(buf, buf_size, "%" PRId64, size);
 2922:     } else {
 2923:         base = 1024;
 2924:         for(i = 0; i < NB_SUFFIXES; i++) {
 2925:             if (size < (10 * base)) {
 2926:                 snprintf(buf, buf_size, "%0.1f%c",
 2927:                          (double)size / base,
 2928:                          suffixes[i]);
 2929:                 break;
 2930:             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
 2931:                 snprintf(buf, buf_size, "%" PRId64 "%c",
 2932:                          ((size + (base >> 1)) / base),
 2933:                          suffixes[i]);
 2934:                 break;
 2935:             }
 2936:             base = base * 1024;
 2937:         }
 2938:     }
 2939:     return buf;
 2940: }
 2941: 
 2942: char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
 2943: {
 2944:     char buf1[128], date_buf[128], clock_buf[128];
 2945: #ifdef _WIN32
 2946:     struct tm *ptm;
 2947: #else
 2948:     struct tm tm;
 2949: #endif
 2950:     time_t ti;
 2951:     int64_t secs;
 2952: 
 2953:     if (!sn) {
 2954:         snprintf(buf, buf_size,
 2955:                  "%-10s%-20s%7s%20s%15s",
 2956:                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
 2957:     } else {
 2958:         ti = sn->date_sec;
 2959: #ifdef _WIN32
 2960:         ptm = localtime(&ti);
 2961:         strftime(date_buf, sizeof(date_buf),
 2962:                  "%Y-%m-%d %H:%M:%S", ptm);
 2963: #else
 2964:         localtime_r(&ti, &tm);
 2965:         strftime(date_buf, sizeof(date_buf),
 2966:                  "%Y-%m-%d %H:%M:%S", &tm);
 2967: #endif
 2968:         secs = sn->vm_clock_nsec / 1000000000;
 2969:         snprintf(clock_buf, sizeof(clock_buf),
 2970:                  "%02d:%02d:%02d.%03d",
 2971:                  (int)(secs / 3600),
 2972:                  (int)((secs / 60) % 60),
 2973:                  (int)(secs % 60),
 2974:                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
 2975:         snprintf(buf, buf_size,
 2976:                  "%-10s%-20s%7s%20s%15s",
 2977:                  sn->id_str, sn->name,
 2978:                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
 2979:                  date_buf,
 2980:                  clock_buf);
 2981:     }
 2982:     return buf;
 2983: }
 2984: 
 2985: /**************************************************************/
 2986: /* async I/Os */
 2987: 
 2988: BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
 2989:                                  QEMUIOVector *qiov, int nb_sectors,
 2990:                                  BlockDriverCompletionFunc *cb, void *opaque)
 2991: {
 2992:     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
 2993: 
 2994:     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
 2995:                                  cb, opaque, false);
 2996: }
 2997: 
 2998: BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
 2999:                                   QEMUIOVector *qiov, int nb_sectors,
 3000:                                   BlockDriverCompletionFunc *cb, void *opaque)
 3001: {
 3002:     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
 3003: 
 3004:     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
 3005:                                  cb, opaque, true);
 3006: }
 3007: 
 3008: 
 3009: typedef struct MultiwriteCB {
 3010:     int error;
 3011:     int num_requests;
 3012:     int num_callbacks;
 3013:     struct {
 3014:         BlockDriverCompletionFunc *cb;
 3015:         void *opaque;
 3016:         QEMUIOVector *free_qiov;
 3017:     } callbacks[];
 3018: } MultiwriteCB;
 3019: 
 3020: static void multiwrite_user_cb(MultiwriteCB *mcb)
 3021: {
 3022:     int i;
 3023: 
 3024:     for (i = 0; i < mcb->num_callbacks; i++) {
 3025:         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
 3026:         if (mcb->callbacks[i].free_qiov) {
 3027:             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
 3028:         }
 3029:         g_free(mcb->callbacks[i].free_qiov);
 3030:     }
 3031: }
 3032: 
 3033: static void multiwrite_cb(void *opaque, int ret)
 3034: {
 3035:     MultiwriteCB *mcb = opaque;
 3036: 
 3037:     trace_multiwrite_cb(mcb, ret);
 3038: 
 3039:     if (ret < 0 && !mcb->error) {
 3040:         mcb->error = ret;
 3041:     }
 3042: 
 3043:     mcb->num_requests--;
 3044:     if (mcb->num_requests == 0) {
 3045:         multiwrite_user_cb(mcb);
 3046:         g_free(mcb);
 3047:     }
 3048: }
 3049: 
 3050: static int multiwrite_req_compare(const void *a, const void *b)
 3051: {
 3052:     const BlockRequest *req1 = a, *req2 = b;
 3053: 
 3054:     /*
 3055:      * Note that we can't simply subtract req2->sector from req1->sector
 3056:      * here as that could overflow the return value.
 3057:      */
 3058:     if (req1->sector > req2->sector) {
 3059:         return 1;
 3060:     } else if (req1->sector < req2->sector) {
 3061:         return -1;
 3062:     } else {
 3063:         return 0;
 3064:     }
 3065: }
 3066: 
 3067: /*
 3068:  * Takes a bunch of requests and tries to merge them. Returns the number of
 3069:  * requests that remain after merging.
 3070:  */
 3071: static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
 3072:     int num_reqs, MultiwriteCB *mcb)
 3073: {
 3074:     int i, outidx;
 3075: 
 3076:     // Sort requests by start sector
 3077:     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
 3078: 
 3079:     // Check if adjacent requests touch the same clusters. If so, combine them,
 3080:     // filling up gaps with zero sectors.
 3081:     outidx = 0;
 3082:     for (i = 1; i < num_reqs; i++) {
 3083:         int merge = 0;
 3084:         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
 3085: 
 3086:         // Handle exactly sequential writes and overlapping writes.
 3087:         if (reqs[i].sector <= oldreq_last) {
 3088:             merge = 1;
 3089:         }
 3090: 
 3091:         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
 3092:             merge = 0;
 3093:         }
 3094: 
 3095:         if (merge) {
 3096:             size_t size;
 3097:             QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
 3098:             qemu_iovec_init(qiov,
 3099:                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
 3100: 
 3101:             // Add the first request to the merged one. If the requests are
 3102:             // overlapping, drop the last sectors of the first request.
 3103:             size = (reqs[i].sector - reqs[outidx].sector) << 9;
 3104:             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
 3105: 
 3106:             // We should need to add any zeros between the two requests
 3107:             assert (reqs[i].sector <= oldreq_last);
 3108: 
 3109:             // Add the second request
 3110:             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
 3111: 
 3112:             reqs[outidx].nb_sectors = qiov->size >> 9;
 3113:             reqs[outidx].qiov = qiov;
 3114: 
 3115:             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
 3116:         } else {
 3117:             outidx++;
 3118:             reqs[outidx].sector     = reqs[i].sector;
 3119:             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
 3120:             reqs[outidx].qiov       = reqs[i].qiov;
 3121:         }
 3122:     }
 3123: 
 3124:     return outidx + 1;
 3125: }
 3126: 
 3127: /*
 3128:  * Submit multiple AIO write requests at once.
 3129:  *
 3130:  * On success, the function returns 0 and all requests in the reqs array have
 3131:  * been submitted. In error case this function returns -1, and any of the
 3132:  * requests may or may not be submitted yet. In particular, this means that the
 3133:  * callback will be called for some of the requests, for others it won't. The
 3134:  * caller must check the error field of the BlockRequest to wait for the right
 3135:  * callbacks (if error != 0, no callback will be called).
 3136:  *
 3137:  * The implementation may modify the contents of the reqs array, e.g. to merge
 3138:  * requests. However, the fields opaque and error are left unmodified as they
 3139:  * are used to signal failure for a single request to the caller.
 3140:  */
 3141: int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
 3142: {
 3143:     MultiwriteCB *mcb;
 3144:     int i;
 3145: 
 3146:     /* don't submit writes if we don't have a medium */
 3147:     if (bs->drv == NULL) {
 3148:         for (i = 0; i < num_reqs; i++) {
 3149:             reqs[i].error = -ENOMEDIUM;
 3150:         }
 3151:         return -1;
 3152:     }
 3153: 
 3154:     if (num_reqs == 0) {
 3155:         return 0;
 3156:     }
 3157: 
 3158:     // Create MultiwriteCB structure
 3159:     mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
 3160:     mcb->num_requests = 0;
 3161:     mcb->num_callbacks = num_reqs;
 3162: 
 3163:     for (i = 0; i < num_reqs; i++) {
 3164:         mcb->callbacks[i].cb = reqs[i].cb;
 3165:         mcb->callbacks[i].opaque = reqs[i].opaque;
 3166:     }
 3167: 
 3168:     // Check for mergable requests
 3169:     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
 3170: 
 3171:     trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
 3172: 
 3173:     /* Run the aio requests. */
 3174:     mcb->num_requests = num_reqs;
 3175:     for (i = 0; i < num_reqs; i++) {
 3176:         bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
 3177:             reqs[i].nb_sectors, multiwrite_cb, mcb);
 3178:     }
 3179: 
 3180:     return 0;
 3181: }
 3182: 
 3183: void bdrv_aio_cancel(BlockDriverAIOCB *acb)
 3184: {
 3185:     acb->pool->cancel(acb);
 3186: }
 3187: 
 3188: /* block I/O throttling */
 3189: static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
 3190:                  bool is_write, double elapsed_time, uint64_t *wait)
 3191: {
 3192:     uint64_t bps_limit = 0;
 3193:     double   bytes_limit, bytes_base, bytes_res;
 3194:     double   slice_time, wait_time;
 3195: 
 3196:     if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
 3197:         bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
 3198:     } else if (bs->io_limits.bps[is_write]) {
 3199:         bps_limit = bs->io_limits.bps[is_write];
 3200:     } else {
 3201:         if (wait) {
 3202:             *wait = 0;
 3203:         }
 3204: 
 3205:         return false;
 3206:     }
 3207: 
 3208:     slice_time = bs->slice_end - bs->slice_start;
 3209:     slice_time /= (NANOSECONDS_PER_SECOND);
 3210:     bytes_limit = bps_limit * slice_time;
 3211:     bytes_base  = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
 3212:     if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
 3213:         bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
 3214:     }
 3215: 
 3216:     /* bytes_base: the bytes of data which have been read/written; and
 3217:      *             it is obtained from the history statistic info.
 3218:      * bytes_res: the remaining bytes of data which need to be read/written.
 3219:      * (bytes_base + bytes_res) / bps_limit: used to calcuate
 3220:      *             the total time for completing reading/writting all data.
 3221:      */
 3222:     bytes_res   = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
 3223: 
 3224:     if (bytes_base + bytes_res <= bytes_limit) {
 3225:         if (wait) {
 3226:             *wait = 0;
 3227:         }
 3228: 
 3229:         return false;
 3230:     }
 3231: 
 3232:     /* Calc approx time to dispatch */
 3233:     wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
 3234: 
 3235:     /* When the I/O rate at runtime exceeds the limits,
 3236:      * bs->slice_end need to be extended in order that the current statistic
 3237:      * info can be kept until the timer fire, so it is increased and tuned
 3238:      * based on the result of experiment.
 3239:      */
 3240:     bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
 3241:     bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
 3242:     if (wait) {
 3243:         *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
 3244:     }
 3245: 
 3246:     return true;
 3247: }
 3248: 
 3249: static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
 3250:                              double elapsed_time, uint64_t *wait)
 3251: {
 3252:     uint64_t iops_limit = 0;
 3253:     double   ios_limit, ios_base;
 3254:     double   slice_time, wait_time;
 3255: 
 3256:     if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
 3257:         iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
 3258:     } else if (bs->io_limits.iops[is_write]) {
 3259:         iops_limit = bs->io_limits.iops[is_write];
 3260:     } else {
 3261:         if (wait) {
 3262:             *wait = 0;
 3263:         }
 3264: 
 3265:         return false;
 3266:     }
 3267: 
 3268:     slice_time = bs->slice_end - bs->slice_start;
 3269:     slice_time /= (NANOSECONDS_PER_SECOND);
 3270:     ios_limit  = iops_limit * slice_time;
 3271:     ios_base   = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
 3272:     if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
 3273:         ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
 3274:     }
 3275: 
 3276:     if (ios_base + 1 <= ios_limit) {
 3277:         if (wait) {
 3278:             *wait = 0;
 3279:         }
 3280: 
 3281:         return false;
 3282:     }
 3283: 
 3284:     /* Calc approx time to dispatch */
 3285:     wait_time = (ios_base + 1) / iops_limit;
 3286:     if (wait_time > elapsed_time) {
 3287:         wait_time = wait_time - elapsed_time;
 3288:     } else {
 3289:         wait_time = 0;
 3290:     }
 3291: 
 3292:     bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
 3293:     bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
 3294:     if (wait) {
 3295:         *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
 3296:     }
 3297: 
 3298:     return true;
 3299: }
 3300: 
 3301: static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
 3302:                            bool is_write, int64_t *wait)
 3303: {
 3304:     int64_t  now, max_wait;
 3305:     uint64_t bps_wait = 0, iops_wait = 0;
 3306:     double   elapsed_time;
 3307:     int      bps_ret, iops_ret;
 3308: 
 3309:     now = qemu_get_clock_ns(vm_clock);
 3310:     if ((bs->slice_start < now)
 3311:         && (bs->slice_end > now)) {
 3312:         bs->slice_end = now + bs->slice_time;
 3313:     } else {
 3314:         bs->slice_time  =  5 * BLOCK_IO_SLICE_TIME;
 3315:         bs->slice_start = now;
 3316:         bs->slice_end   = now + bs->slice_time;
 3317: 
 3318:         bs->io_base.bytes[is_write]  = bs->nr_bytes[is_write];
 3319:         bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
 3320: 
 3321:         bs->io_base.ios[is_write]    = bs->nr_ops[is_write];
 3322:         bs->io_base.ios[!is_write]   = bs->nr_ops[!is_write];
 3323:     }
 3324: 
 3325:     elapsed_time  = now - bs->slice_start;
 3326:     elapsed_time  /= (NANOSECONDS_PER_SECOND);
 3327: 
 3328:     bps_ret  = bdrv_exceed_bps_limits(bs, nb_sectors,
 3329:                                       is_write, elapsed_time, &bps_wait);
 3330:     iops_ret = bdrv_exceed_iops_limits(bs, is_write,
 3331:                                       elapsed_time, &iops_wait);
 3332:     if (bps_ret || iops_ret) {
 3333:         max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
 3334:         if (wait) {
 3335:             *wait = max_wait;
 3336:         }
 3337: 
 3338:         now = qemu_get_clock_ns(vm_clock);
 3339:         if (bs->slice_end < now + max_wait) {
 3340:             bs->slice_end = now + max_wait;
 3341:         }
 3342: 
 3343:         return true;
 3344:     }
 3345: 
 3346:     if (wait) {
 3347:         *wait = 0;
 3348:     }
 3349: 
 3350:     return false;
 3351: }
 3352: 
 3353: /**************************************************************/
 3354: /* async block device emulation */
 3355: 
 3356: typedef struct BlockDriverAIOCBSync {
 3357:     BlockDriverAIOCB common;
 3358:     QEMUBH *bh;
 3359:     int ret;
 3360:     /* vector translation state */
 3361:     QEMUIOVector *qiov;
 3362:     uint8_t *bounce;
 3363:     int is_write;
 3364: } BlockDriverAIOCBSync;
 3365: 
 3366: static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
 3367: {
 3368:     BlockDriverAIOCBSync *acb =
 3369:         container_of(blockacb, BlockDriverAIOCBSync, common);
 3370:     qemu_bh_delete(acb->bh);
 3371:     acb->bh = NULL;
 3372:     qemu_aio_release(acb);
 3373: }
 3374: 
 3375: static AIOPool bdrv_em_aio_pool = {
 3376:     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
 3377:     .cancel             = bdrv_aio_cancel_em,
 3378: };
 3379: 
 3380: static void bdrv_aio_bh_cb(void *opaque)
 3381: {
 3382:     BlockDriverAIOCBSync *acb = opaque;
 3383: 
 3384:     if (!acb->is_write)
 3385:         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
 3386:     qemu_vfree(acb->bounce);
 3387:     acb->common.cb(acb->common.opaque, acb->ret);
 3388:     qemu_bh_delete(acb->bh);
 3389:     acb->bh = NULL;
 3390:     qemu_aio_release(acb);
 3391: }
 3392: 
 3393: static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
 3394:                                             int64_t sector_num,
 3395:                                             QEMUIOVector *qiov,
 3396:                                             int nb_sectors,
 3397:                                             BlockDriverCompletionFunc *cb,
 3398:                                             void *opaque,
 3399:                                             int is_write)
 3400: 
 3401: {
 3402:     BlockDriverAIOCBSync *acb;
 3403: 
 3404:     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
 3405:     acb->is_write = is_write;
 3406:     acb->qiov = qiov;
 3407:     acb->bounce = qemu_blockalign(bs, qiov->size);
 3408:     acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
 3409: 
 3410:     if (is_write) {
 3411:         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
 3412:         acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
 3413:     } else {
 3414:         acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
 3415:     }
 3416: 
 3417:     qemu_bh_schedule(acb->bh);
 3418: 
 3419:     return &acb->common;
 3420: }
 3421: 
 3422: static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
 3423:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
 3424:         BlockDriverCompletionFunc *cb, void *opaque)
 3425: {
 3426:     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
 3427: }
 3428: 
 3429: static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
 3430:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
 3431:         BlockDriverCompletionFunc *cb, void *opaque)
 3432: {
 3433:     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
 3434: }
 3435: 
 3436: 
 3437: typedef struct BlockDriverAIOCBCoroutine {
 3438:     BlockDriverAIOCB common;
 3439:     BlockRequest req;
 3440:     bool is_write;
 3441:     QEMUBH* bh;
 3442: } BlockDriverAIOCBCoroutine;
 3443: 
 3444: static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
 3445: {
 3446:     qemu_aio_flush();
 3447: }
 3448: 
 3449: static AIOPool bdrv_em_co_aio_pool = {
 3450:     .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
 3451:     .cancel             = bdrv_aio_co_cancel_em,
 3452: };
 3453: 
 3454: static void bdrv_co_em_bh(void *opaque)
 3455: {
 3456:     BlockDriverAIOCBCoroutine *acb = opaque;
 3457: 
 3458:     acb->common.cb(acb->common.opaque, acb->req.error);
 3459:     qemu_bh_delete(acb->bh);
 3460:     qemu_aio_release(acb);
 3461: }
 3462: 
 3463: /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
 3464: static void coroutine_fn bdrv_co_do_rw(void *opaque)
 3465: {
 3466:     BlockDriverAIOCBCoroutine *acb = opaque;
 3467:     BlockDriverState *bs = acb->common.bs;
 3468: 
 3469:     if (!acb->is_write) {
 3470:         acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
 3471:             acb->req.nb_sectors, acb->req.qiov, 0);
 3472:     } else {
 3473:         acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
 3474:             acb->req.nb_sectors, acb->req.qiov, 0);
 3475:     }
 3476: 
 3477:     acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
 3478:     qemu_bh_schedule(acb->bh);
 3479: }
 3480: 
 3481: static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
 3482:                                                int64_t sector_num,
 3483:                                                QEMUIOVector *qiov,
 3484:                                                int nb_sectors,
 3485:                                                BlockDriverCompletionFunc *cb,
 3486:                                                void *opaque,
 3487:                                                bool is_write)
 3488: {
 3489:     Coroutine *co;
 3490:     BlockDriverAIOCBCoroutine *acb;
 3491: 
 3492:     acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
 3493:     acb->req.sector = sector_num;
 3494:     acb->req.nb_sectors = nb_sectors;
 3495:     acb->req.qiov = qiov;
 3496:     acb->is_write = is_write;
 3497: 
 3498:     co = qemu_coroutine_create(bdrv_co_do_rw);
 3499:     qemu_coroutine_enter(co, acb);
 3500: 
 3501:     return &acb->common;
 3502: }
 3503: 
 3504: static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
 3505: {
 3506:     BlockDriverAIOCBCoroutine *acb = opaque;
 3507:     BlockDriverState *bs = acb->common.bs;
 3508: 
 3509:     acb->req.error = bdrv_co_flush(bs);
 3510:     acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
 3511:     qemu_bh_schedule(acb->bh);
 3512: }
 3513: 
 3514: BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
 3515:         BlockDriverCompletionFunc *cb, void *opaque)
 3516: {
 3517:     trace_bdrv_aio_flush(bs, opaque);
 3518: 
 3519:     Coroutine *co;
 3520:     BlockDriverAIOCBCoroutine *acb;
 3521: 
 3522:     acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
 3523:     co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
 3524:     qemu_coroutine_enter(co, acb);
 3525: 
 3526:     return &acb->common;
 3527: }
 3528: 
 3529: static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
 3530: {
 3531:     BlockDriverAIOCBCoroutine *acb = opaque;
 3532:     BlockDriverState *bs = acb->common.bs;
 3533: 
 3534:     acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
 3535:     acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
 3536:     qemu_bh_schedule(acb->bh);
 3537: }
 3538: 
 3539: BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
 3540:         int64_t sector_num, int nb_sectors,
 3541:         BlockDriverCompletionFunc *cb, void *opaque)
 3542: {
 3543:     Coroutine *co;
 3544:     BlockDriverAIOCBCoroutine *acb;
 3545: 
 3546:     trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
 3547: 
 3548:     acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
 3549:     acb->req.sector = sector_num;
 3550:     acb->req.nb_sectors = nb_sectors;
 3551:     co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
 3552:     qemu_coroutine_enter(co, acb);
 3553: 
 3554:     return &acb->common;
 3555: }
 3556: 
 3557: void bdrv_init(void)
 3558: {
 3559:     module_call_init(MODULE_INIT_BLOCK);
 3560: }
 3561: 
 3562: void bdrv_init_with_whitelist(void)
 3563: {
 3564:     use_bdrv_whitelist = 1;
 3565:     bdrv_init();
 3566: }
 3567: 
 3568: void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
 3569:                    BlockDriverCompletionFunc *cb, void *opaque)
 3570: {
 3571:     BlockDriverAIOCB *acb;
 3572: 
 3573:     if (pool->free_aiocb) {
 3574:         acb = pool->free_aiocb;
 3575:         pool->free_aiocb = acb->next;
 3576:     } else {
 3577:         acb = g_malloc0(pool->aiocb_size);
 3578:         acb->pool = pool;
 3579:     }
 3580:     acb->bs = bs;
 3581:     acb->cb = cb;
 3582:     acb->opaque = opaque;
 3583:     return acb;
 3584: }
 3585: 
 3586: void qemu_aio_release(void *p)
 3587: {
 3588:     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
 3589:     AIOPool *pool = acb->pool;
 3590:     acb->next = pool->free_aiocb;
 3591:     pool->free_aiocb = acb;
 3592: }
 3593: 
 3594: /**************************************************************/
 3595: /* Coroutine block device emulation */
 3596: 
 3597: typedef struct CoroutineIOCompletion {
 3598:     Coroutine *coroutine;
 3599:     int ret;
 3600: } CoroutineIOCompletion;
 3601: 
 3602: static void bdrv_co_io_em_complete(void *opaque, int ret)
 3603: {
 3604:     CoroutineIOCompletion *co = opaque;
 3605: 
 3606:     co->ret = ret;
 3607:     qemu_coroutine_enter(co->coroutine, NULL);
 3608: }
 3609: 
 3610: static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
 3611:                                       int nb_sectors, QEMUIOVector *iov,
 3612:                                       bool is_write)
 3613: {
 3614:     CoroutineIOCompletion co = {
 3615:         .coroutine = qemu_coroutine_self(),
 3616:     };
 3617:     BlockDriverAIOCB *acb;
 3618: 
 3619:     if (is_write) {
 3620:         acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
 3621:                                        bdrv_co_io_em_complete, &co);
 3622:     } else {
 3623:         acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
 3624:                                       bdrv_co_io_em_complete, &co);
 3625:     }
 3626: 
 3627:     trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
 3628:     if (!acb) {
 3629:         return -EIO;
 3630:     }
 3631:     qemu_coroutine_yield();
 3632: 
 3633:     return co.ret;
 3634: }
 3635: 
 3636: static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
 3637:                                          int64_t sector_num, int nb_sectors,
 3638:                                          QEMUIOVector *iov)
 3639: {
 3640:     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
 3641: }
 3642: 
 3643: static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
 3644:                                          int64_t sector_num, int nb_sectors,
 3645:                                          QEMUIOVector *iov)
 3646: {
 3647:     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
 3648: }
 3649: 
 3650: static void coroutine_fn bdrv_flush_co_entry(void *opaque)
 3651: {
 3652:     RwCo *rwco = opaque;
 3653: 
 3654:     rwco->ret = bdrv_co_flush(rwco->bs);
 3655: }
 3656: 
 3657: int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 3658: {
 3659:     int ret;
 3660: 
 3661:     if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
 3662:         return 0;
 3663:     }
 3664: 
 3665:     /* Write back cached data to the OS even with cache=unsafe */
 3666:     if (bs->drv->bdrv_co_flush_to_os) {
 3667:         ret = bs->drv->bdrv_co_flush_to_os(bs);
 3668:         if (ret < 0) {
 3669:             return ret;
 3670:         }
 3671:     }
 3672: 
 3673:     /* But don't actually force it to the disk with cache=unsafe */
 3674:     if (bs->open_flags & BDRV_O_NO_FLUSH) {
 3675:         return 0;
 3676:     }
 3677: 
 3678:     if (bs->drv->bdrv_co_flush_to_disk) {
 3679:         ret = bs->drv->bdrv_co_flush_to_disk(bs);
 3680:     } else if (bs->drv->bdrv_aio_flush) {
 3681:         BlockDriverAIOCB *acb;
 3682:         CoroutineIOCompletion co = {
 3683:             .coroutine = qemu_coroutine_self(),
 3684:         };
 3685: 
 3686:         acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
 3687:         if (acb == NULL) {
 3688:             ret = -EIO;
 3689:         } else {
 3690:             qemu_coroutine_yield();
 3691:             ret = co.ret;
 3692:         }
 3693:     } else {
 3694:         /*
 3695:          * Some block drivers always operate in either writethrough or unsafe
 3696:          * mode and don't support bdrv_flush therefore. Usually qemu doesn't
 3697:          * know how the server works (because the behaviour is hardcoded or
 3698:          * depends on server-side configuration), so we can't ensure that
 3699:          * everything is safe on disk. Returning an error doesn't work because
 3700:          * that would break guests even if the server operates in writethrough
 3701:          * mode.
 3702:          *
 3703:          * Let's hope the user knows what he's doing.
 3704:          */
 3705:         ret = 0;
 3706:     }
 3707:     if (ret < 0) {
 3708:         return ret;
 3709:     }
 3710: 
 3711:     /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
 3712:      * in the case of cache=unsafe, so there are no useless flushes.
 3713:      */
 3714:     return bdrv_co_flush(bs->file);
 3715: }
 3716: 
 3717: void bdrv_invalidate_cache(BlockDriverState *bs)
 3718: {
 3719:     if (bs->drv && bs->drv->bdrv_invalidate_cache) {
 3720:         bs->drv->bdrv_invalidate_cache(bs);
 3721:     }
 3722: }
 3723: 
 3724: void bdrv_invalidate_cache_all(void)
 3725: {
 3726:     BlockDriverState *bs;
 3727: 
 3728:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
 3729:         bdrv_invalidate_cache(bs);
 3730:     }
 3731: }
 3732: 
 3733: void bdrv_clear_incoming_migration_all(void)
 3734: {
 3735:     BlockDriverState *bs;
 3736: 
 3737:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
 3738:         bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
 3739:     }
 3740: }
 3741: 
 3742: int bdrv_flush(BlockDriverState *bs)
 3743: {
 3744:     Coroutine *co;
 3745:     RwCo rwco = {
 3746:         .bs = bs,
 3747:         .ret = NOT_DONE,
 3748:     };
 3749: 
 3750:     if (qemu_in_coroutine()) {
 3751:         /* Fast-path if already in coroutine context */
 3752:         bdrv_flush_co_entry(&rwco);
 3753:     } else {
 3754:         co = qemu_coroutine_create(bdrv_flush_co_entry);
 3755:         qemu_coroutine_enter(co, &rwco);
 3756:         while (rwco.ret == NOT_DONE) {
 3757:             qemu_aio_wait();
 3758:         }
 3759:     }
 3760: 
 3761:     return rwco.ret;
 3762: }
 3763: 
 3764: static void coroutine_fn bdrv_discard_co_entry(void *opaque)
 3765: {
 3766:     RwCo *rwco = opaque;
 3767: 
 3768:     rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
 3769: }
 3770: 
 3771: int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
 3772:                                  int nb_sectors)
 3773: {
 3774:     if (!bs->drv) {
 3775:         return -ENOMEDIUM;
 3776:     } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
 3777:         return -EIO;
 3778:     } else if (bs->read_only) {
 3779:         return -EROFS;
 3780:     } else if (bs->drv->bdrv_co_discard) {
 3781:         return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
 3782:     } else if (bs->drv->bdrv_aio_discard) {
 3783:         BlockDriverAIOCB *acb;
 3784:         CoroutineIOCompletion co = {
 3785:             .coroutine = qemu_coroutine_self(),
 3786:         };
 3787: 
 3788:         acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
 3789:                                         bdrv_co_io_em_complete, &co);
 3790:         if (acb == NULL) {
 3791:             return -EIO;
 3792:         } else {
 3793:             qemu_coroutine_yield();
 3794:             return co.ret;
 3795:         }
 3796:     } else {
 3797:         return 0;
 3798:     }
 3799: }
 3800: 
 3801: int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
 3802: {
 3803:     Coroutine *co;
 3804:     RwCo rwco = {
 3805:         .bs = bs,
 3806:         .sector_num = sector_num,
 3807:         .nb_sectors = nb_sectors,
 3808:         .ret = NOT_DONE,
 3809:     };
 3810: 
 3811:     if (qemu_in_coroutine()) {
 3812:         /* Fast-path if already in coroutine context */
 3813:         bdrv_discard_co_entry(&rwco);
 3814:     } else {
 3815:         co = qemu_coroutine_create(bdrv_discard_co_entry);
 3816:         qemu_coroutine_enter(co, &rwco);
 3817:         while (rwco.ret == NOT_DONE) {
 3818:             qemu_aio_wait();
 3819:         }
 3820:     }
 3821: 
 3822:     return rwco.ret;
 3823: }
 3824: 
 3825: /**************************************************************/
 3826: /* removable device support */
 3827: 
 3828: /**
 3829:  * Return TRUE if the media is present
 3830:  */
 3831: int bdrv_is_inserted(BlockDriverState *bs)
 3832: {
 3833:     BlockDriver *drv = bs->drv;
 3834: 
 3835:     if (!drv)
 3836:         return 0;
 3837:     if (!drv->bdrv_is_inserted)
 3838:         return 1;
 3839:     return drv->bdrv_is_inserted(bs);
 3840: }
 3841: 
 3842: /**
 3843:  * Return whether the media changed since the last call to this
 3844:  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
 3845:  */
 3846: int bdrv_media_changed(BlockDriverState *bs)
 3847: {
 3848:     BlockDriver *drv = bs->drv;
 3849: 
 3850:     if (drv && drv->bdrv_media_changed) {
 3851:         return drv->bdrv_media_changed(bs);
 3852:     }
 3853:     return -ENOTSUP;
 3854: }
 3855: 
 3856: /**
 3857:  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
 3858:  */
 3859: void bdrv_eject(BlockDriverState *bs, bool eject_flag)
 3860: {
 3861:     BlockDriver *drv = bs->drv;
 3862: 
 3863:     if (drv && drv->bdrv_eject) {
 3864:         drv->bdrv_eject(bs, eject_flag);
 3865:     }
 3866: 
 3867:     if (bs->device_name[0] != '\0') {
 3868:         bdrv_emit_qmp_eject_event(bs, eject_flag);
 3869:     }
 3870: }
 3871: 
 3872: /**
 3873:  * Lock or unlock the media (if it is locked, the user won't be able
 3874:  * to eject it manually).
 3875:  */
 3876: void bdrv_lock_medium(BlockDriverState *bs, bool locked)
 3877: {
 3878:     BlockDriver *drv = bs->drv;
 3879: 
 3880:     trace_bdrv_lock_medium(bs, locked);
 3881: 
 3882:     if (drv && drv->bdrv_lock_medium) {
 3883:         drv->bdrv_lock_medium(bs, locked);
 3884:     }
 3885: }
 3886: 
 3887: /* needed for generic scsi interface */
 3888: 
 3889: int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 3890: {
 3891:     BlockDriver *drv = bs->drv;
 3892: 
 3893:     if (drv && drv->bdrv_ioctl)
 3894:         return drv->bdrv_ioctl(bs, req, buf);
 3895:     return -ENOTSUP;
 3896: }
 3897: 
 3898: BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
 3899:         unsigned long int req, void *buf,
 3900:         BlockDriverCompletionFunc *cb, void *opaque)
 3901: {
 3902:     BlockDriver *drv = bs->drv;
 3903: 
 3904:     if (drv && drv->bdrv_aio_ioctl)
 3905:         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
 3906:     return NULL;
 3907: }
 3908: 
 3909: void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
 3910: {
 3911:     bs->buffer_alignment = align;
 3912: }
 3913: 
 3914: void *qemu_blockalign(BlockDriverState *bs, size_t size)
 3915: {
 3916:     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
 3917: }
 3918: 
 3919: void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
 3920: {
 3921:     int64_t bitmap_size;
 3922: 
 3923:     bs->dirty_count = 0;
 3924:     if (enable) {
 3925:         if (!bs->dirty_bitmap) {
 3926:             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
 3927:                     BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
 3928:             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
 3929: 
 3930:             bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
 3931:         }
 3932:     } else {
 3933:         if (bs->dirty_bitmap) {
 3934:             g_free(bs->dirty_bitmap);
 3935:             bs->dirty_bitmap = NULL;
 3936:         }
 3937:     }
 3938: }
 3939: 
 3940: int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
 3941: {
 3942:     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
 3943: 
 3944:     if (bs->dirty_bitmap &&
 3945:         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
 3946:         return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
 3947:             (1UL << (chunk % (sizeof(unsigned long) * 8))));
 3948:     } else {
 3949:         return 0;
 3950:     }
 3951: }
 3952: 
 3953: void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
 3954:                       int nr_sectors)
 3955: {
 3956:     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
 3957: }
 3958: 
 3959: int64_t bdrv_get_dirty_count(BlockDriverState *bs)
 3960: {
 3961:     return bs->dirty_count;
 3962: }
 3963: 
 3964: void bdrv_set_in_use(BlockDriverState *bs, int in_use)
 3965: {
 3966:     assert(bs->in_use != in_use);
 3967:     bs->in_use = in_use;
 3968: }
 3969: 
 3970: int bdrv_in_use(BlockDriverState *bs)
 3971: {
 3972:     return bs->in_use;
 3973: }
 3974: 
 3975: void bdrv_iostatus_enable(BlockDriverState *bs)
 3976: {
 3977:     bs->iostatus_enabled = true;
 3978:     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
 3979: }
 3980: 
 3981: /* The I/O status is only enabled if the drive explicitly
 3982:  * enables it _and_ the VM is configured to stop on errors */
 3983: bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
 3984: {
 3985:     return (bs->iostatus_enabled &&
 3986:            (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
 3987:             bs->on_write_error == BLOCK_ERR_STOP_ANY    ||
 3988:             bs->on_read_error == BLOCK_ERR_STOP_ANY));
 3989: }
 3990: 
 3991: void bdrv_iostatus_disable(BlockDriverState *bs)
 3992: {
 3993:     bs->iostatus_enabled = false;
 3994: }
 3995: 
 3996: void bdrv_iostatus_reset(BlockDriverState *bs)
 3997: {
 3998:     if (bdrv_iostatus_is_enabled(bs)) {
 3999:         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
 4000:     }
 4001: }
 4002: 
 4003: /* XXX: Today this is set by device models because it makes the implementation
 4004:    quite simple. However, the block layer knows about the error, so it's
 4005:    possible to implement this without device models being involved */
 4006: void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
 4007: {
 4008:     if (bdrv_iostatus_is_enabled(bs) &&
 4009:         bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
 4010:         assert(error >= 0);
 4011:         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
 4012:                                          BLOCK_DEVICE_IO_STATUS_FAILED;
 4013:     }
 4014: }
 4015: 
 4016: void
 4017: bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
 4018:         enum BlockAcctType type)
 4019: {
 4020:     assert(type < BDRV_MAX_IOTYPE);
 4021: 
 4022:     cookie->bytes = bytes;
 4023:     cookie->start_time_ns = get_clock();
 4024:     cookie->type = type;
 4025: }
 4026: 
 4027: void
 4028: bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
 4029: {
 4030:     assert(cookie->type < BDRV_MAX_IOTYPE);
 4031: 
 4032:     bs->nr_bytes[cookie->type] += cookie->bytes;
 4033:     bs->nr_ops[cookie->type]++;
 4034:     bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
 4035: }
 4036: 
 4037: int bdrv_img_create(const char *filename, const char *fmt,
 4038:                     const char *base_filename, const char *base_fmt,
 4039:                     char *options, uint64_t img_size, int flags)
 4040: {
 4041:     QEMUOptionParameter *param = NULL, *create_options = NULL;
 4042:     QEMUOptionParameter *backing_fmt, *backing_file, *size;
 4043:     BlockDriverState *bs = NULL;
 4044:     BlockDriver *drv, *proto_drv;
 4045:     BlockDriver *backing_drv = NULL;
 4046:     int ret = 0;
 4047: 
 4048:     /* Find driver and parse its options */
 4049:     drv = bdrv_find_format(fmt);
 4050:     if (!drv) {
 4051:         error_report("Unknown file format '%s'", fmt);
 4052:         ret = -EINVAL;
 4053:         goto out;
 4054:     }
 4055: 
 4056:     proto_drv = bdrv_find_protocol(filename);
 4057:     if (!proto_drv) {
 4058:         error_report("Unknown protocol '%s'", filename);
 4059:         ret = -EINVAL;
 4060:         goto out;
 4061:     }
 4062: 
 4063:     create_options = append_option_parameters(create_options,
 4064:                                               drv->create_options);
 4065:     create_options = append_option_parameters(create_options,
 4066:                                               proto_drv->create_options);
 4067: 
 4068:     /* Create parameter list with default values */
 4069:     param = parse_option_parameters("", create_options, param);
 4070: 
 4071:     set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
 4072: 
 4073:     /* Parse -o options */
 4074:     if (options) {
 4075:         param = parse_option_parameters(options, create_options, param);
 4076:         if (param == NULL) {
 4077:             error_report("Invalid options for file format '%s'.", fmt);
 4078:             ret = -EINVAL;
 4079:             goto out;
 4080:         }
 4081:     }
 4082: 
 4083:     if (base_filename) {
 4084:         if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
 4085:                                  base_filename)) {
 4086:             error_report("Backing file not supported for file format '%s'",
 4087:                          fmt);
 4088:             ret = -EINVAL;
 4089:             goto out;
 4090:         }
 4091:     }
 4092: 
 4093:     if (base_fmt) {
 4094:         if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
 4095:             error_report("Backing file format not supported for file "
 4096:                          "format '%s'", fmt);
 4097:             ret = -EINVAL;
 4098:             goto out;
 4099:         }
 4100:     }
 4101: 
 4102:     backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
 4103:     if (backing_file && backing_file->value.s) {
 4104:         if (!strcmp(filename, backing_file->value.s)) {
 4105:             error_report("Error: Trying to create an image with the "
 4106:                          "same filename as the backing file");
 4107:             ret = -EINVAL;
 4108:             goto out;
 4109:         }
 4110:     }
 4111: 
 4112:     backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
 4113:     if (backing_fmt && backing_fmt->value.s) {
 4114:         backing_drv = bdrv_find_format(backing_fmt->value.s);
 4115:         if (!backing_drv) {
 4116:             error_report("Unknown backing file format '%s'",
 4117:                          backing_fmt->value.s);
 4118:             ret = -EINVAL;
 4119:             goto out;
 4120:         }
 4121:     }
 4122: 
 4123:     // The size for the image must always be specified, with one exception:
 4124:     // If we are using a backing file, we can obtain the size from there
 4125:     size = get_option_parameter(param, BLOCK_OPT_SIZE);
 4126:     if (size && size->value.n == -1) {
 4127:         if (backing_file && backing_file->value.s) {
 4128:             uint64_t size;
 4129:             char buf[32];
 4130:             int back_flags;
 4131: 
 4132:             /* backing files always opened read-only */
 4133:             back_flags =
 4134:                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
 4135: 
 4136:             bs = bdrv_new("");
 4137: 
 4138:             ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
 4139:             if (ret < 0) {
 4140:                 error_report("Could not open '%s'", backing_file->value.s);
 4141:                 goto out;
 4142:             }
 4143:             bdrv_get_geometry(bs, &size);
 4144:             size *= 512;
 4145: 
 4146:             snprintf(buf, sizeof(buf), "%" PRId64, size);
 4147:             set_option_parameter(param, BLOCK_OPT_SIZE, buf);
 4148:         } else {
 4149:             error_report("Image creation needs a size parameter");
 4150:             ret = -EINVAL;
 4151:             goto out;
 4152:         }
 4153:     }
 4154: 
 4155:     printf("Formatting '%s', fmt=%s ", filename, fmt);
 4156:     print_option_parameters(param);
 4157:     puts("");
 4158: 
 4159:     ret = bdrv_create(drv, filename, param);
 4160: 
 4161:     if (ret < 0) {
 4162:         if (ret == -ENOTSUP) {
 4163:             error_report("Formatting or formatting option not supported for "
 4164:                          "file format '%s'", fmt);
 4165:         } else if (ret == -EFBIG) {
 4166:             error_report("The image size is too large for file format '%s'",
 4167:                          fmt);
 4168:         } else {
 4169:             error_report("%s: error while creating %s: %s", filename, fmt,
 4170:                          strerror(-ret));
 4171:         }
 4172:     }
 4173: 
 4174: out:
 4175:     free_option_parameters(create_options);
 4176:     free_option_parameters(param);
 4177: 
 4178:     if (bs) {
 4179:         bdrv_delete(bs);
 4180:     }
 4181: 
 4182:     return ret;
 4183: }
 4184: 
 4185: void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
 4186:                        int64_t speed, BlockDriverCompletionFunc *cb,
 4187:                        void *opaque, Error **errp)
 4188: {
 4189:     BlockJob *job;
 4190: 
 4191:     if (bs->job || bdrv_in_use(bs)) {
 4192:         error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
 4193:         return NULL;
 4194:     }
 4195:     bdrv_set_in_use(bs, 1);
 4196: 
 4197:     job = g_malloc0(job_type->instance_size);
 4198:     job->job_type      = job_type;
 4199:     job->bs            = bs;
 4200:     job->cb            = cb;
 4201:     job->opaque        = opaque;
 4202:     job->busy          = true;
 4203:     bs->job = job;
 4204: 
 4205:     /* Only set speed when necessary to avoid NotSupported error */
 4206:     if (speed != 0) {
 4207:         Error *local_err = NULL;
 4208: 
 4209:         block_job_set_speed(job, speed, &local_err);
 4210:         if (error_is_set(&local_err)) {
 4211:             bs->job = NULL;
 4212:             g_free(job);
 4213:             bdrv_set_in_use(bs, 0);
 4214:             error_propagate(errp, local_err);
 4215:             return NULL;
 4216:         }
 4217:     }
 4218:     return job;
 4219: }
 4220: 
 4221: void block_job_complete(BlockJob *job, int ret)
 4222: {
 4223:     BlockDriverState *bs = job->bs;
 4224: 
 4225:     assert(bs->job == job);
 4226:     job->cb(job->opaque, ret);
 4227:     bs->job = NULL;
 4228:     g_free(job);
 4229:     bdrv_set_in_use(bs, 0);
 4230: }
 4231: 
 4232: void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 4233: {
 4234:     Error *local_err = NULL;
 4235: 
 4236:     if (!job->job_type->set_speed) {
 4237:         error_set(errp, QERR_NOT_SUPPORTED);
 4238:         return;
 4239:     }
 4240:     job->job_type->set_speed(job, speed, &local_err);
 4241:     if (error_is_set(&local_err)) {
 4242:         error_propagate(errp, local_err);
 4243:         return;
 4244:     }
 4245: 
 4246:     job->speed = speed;
 4247: }
 4248: 
 4249: void block_job_cancel(BlockJob *job)
 4250: {
 4251:     job->cancelled = true;
 4252:     if (job->co && !job->busy) {
 4253:         qemu_coroutine_enter(job->co, NULL);
 4254:     }
 4255: }
 4256: 
 4257: bool block_job_is_cancelled(BlockJob *job)
 4258: {
 4259:     return job->cancelled;
 4260: }
 4261: 
 4262: struct BlockCancelData {
 4263:     BlockJob *job;
 4264:     BlockDriverCompletionFunc *cb;
 4265:     void *opaque;
 4266:     bool cancelled;
 4267:     int ret;
 4268: };
 4269: 
 4270: static void block_job_cancel_cb(void *opaque, int ret)
 4271: {
 4272:     struct BlockCancelData *data = opaque;
 4273: 
 4274:     data->cancelled = block_job_is_cancelled(data->job);
 4275:     data->ret = ret;
 4276:     data->cb(data->opaque, ret);
 4277: }
 4278: 
 4279: int block_job_cancel_sync(BlockJob *job)
 4280: {
 4281:     struct BlockCancelData data;
 4282:     BlockDriverState *bs = job->bs;
 4283: 
 4284:     assert(bs->job == job);
 4285: 
 4286:     /* Set up our own callback to store the result and chain to
 4287:      * the original callback.
 4288:      */
 4289:     data.job = job;
 4290:     data.cb = job->cb;
 4291:     data.opaque = job->opaque;
 4292:     data.ret = -EINPROGRESS;
 4293:     job->cb = block_job_cancel_cb;
 4294:     job->opaque = &data;
 4295:     block_job_cancel(job);
 4296:     while (data.ret == -EINPROGRESS) {
 4297:         qemu_aio_wait();
 4298:     }
 4299:     return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
 4300: }
 4301: 
 4302: void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
 4303: {
 4304:     /* Check cancellation *before* setting busy = false, too!  */
 4305:     if (!block_job_is_cancelled(job)) {
 4306:         job->busy = false;
 4307:         co_sleep_ns(clock, ns);
 4308:         job->busy = true;
 4309:     }
 4310: }

unix.superglobalmegacorp.com