Annotation of qemu/block.c, revision 1.1.1.23

1.1       root        1: /*
                      2:  * QEMU System Emulator block driver
1.1.1.6   root        3:  *
1.1       root        4:  * Copyright (c) 2003 Fabrice Bellard
1.1.1.6   root        5:  *
1.1       root        6:  * Permission is hereby granted, free of charge, to any person obtaining a copy
                      7:  * of this software and associated documentation files (the "Software"), to deal
                      8:  * in the Software without restriction, including without limitation the rights
                      9:  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
                     10:  * copies of the Software, and to permit persons to whom the Software is
                     11:  * furnished to do so, subject to the following conditions:
                     12:  *
                     13:  * The above copyright notice and this permission notice shall be included in
                     14:  * all copies or substantial portions of the Software.
                     15:  *
                     16:  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
                     17:  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
                     18:  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
                     19:  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
                     20:  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
                     21:  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
                     22:  * THE SOFTWARE.
                     23:  */
1.1.1.7   root       24: #include "config-host.h"
1.1.1.6   root       25: #include "qemu-common.h"
1.1.1.19  root       26: #include "trace.h"
1.1.1.13  root       27: #include "monitor.h"
1.1       root       28: #include "block_int.h"
1.1.1.13  root       29: #include "module.h"
1.1.1.22  root       30: #include "qjson.h"
                     31: #include "qemu-coroutine.h"
                     32: #include "qmp-commands.h"
1.1.1.23! root       33: #include "qemu-timer.h"
1.1       root       34: 
1.1.1.14  root       35: #ifdef CONFIG_BSD
1.1       root       36: #include <sys/types.h>
                     37: #include <sys/stat.h>
                     38: #include <sys/ioctl.h>
1.1.1.14  root       39: #include <sys/queue.h>
1.1.1.13  root       40: #ifndef __DragonFly__
1.1       root       41: #include <sys/disk.h>
                     42: #endif
1.1.1.13  root       43: #endif
                     44: 
                     45: #ifdef _WIN32
                     46: #include <windows.h>
                     47: #endif
1.1       root       48: 
1.1.1.22  root       49: #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
                     50: 
1.1.1.23! root       51: typedef enum {
        !            52:     BDRV_REQ_COPY_ON_READ = 0x1,
        !            53:     BDRV_REQ_ZERO_WRITE   = 0x2,
        !            54: } BdrvRequestFlags;
        !            55: 
1.1.1.22  root       56: static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
1.1.1.13  root       57: static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
                     58:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1.1.1.5   root       59:         BlockDriverCompletionFunc *cb, void *opaque);
1.1.1.13  root       60: static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
                     61:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1.1.1.5   root       62:         BlockDriverCompletionFunc *cb, void *opaque);
1.1.1.22  root       63: static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
                     64:                                          int64_t sector_num, int nb_sectors,
                     65:                                          QEMUIOVector *iov);
                     66: static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                     67:                                          int64_t sector_num, int nb_sectors,
                     68:                                          QEMUIOVector *iov);
                     69: static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1.1.1.23! root       70:     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
        !            71:     BdrvRequestFlags flags);
1.1.1.22  root       72: static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1.1.1.23! root       73:     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
        !            74:     BdrvRequestFlags flags);
1.1.1.22  root       75: static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                     76:                                                int64_t sector_num,
                     77:                                                QEMUIOVector *qiov,
                     78:                                                int nb_sectors,
                     79:                                                BlockDriverCompletionFunc *cb,
                     80:                                                void *opaque,
                     81:                                                bool is_write);
                     82: static void coroutine_fn bdrv_co_do_rw(void *opaque);
1.1.1.23! root       83: static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
        !            84:     int64_t sector_num, int nb_sectors);
        !            85: 
        !            86: static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
        !            87:         bool is_write, double elapsed_time, uint64_t *wait);
        !            88: static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
        !            89:         double elapsed_time, uint64_t *wait);
        !            90: static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
        !            91:         bool is_write, int64_t *wait);
1.1.1.3   root       92: 
1.1.1.18  root       93: static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
                     94:     QTAILQ_HEAD_INITIALIZER(bdrv_states);
1.1.1.7   root       95: 
1.1.1.18  root       96: static QLIST_HEAD(, BlockDriver) bdrv_drivers =
                     97:     QLIST_HEAD_INITIALIZER(bdrv_drivers);
                     98: 
                     99: /* The device to use for VM snapshots */
                    100: static BlockDriverState *bs_snapshots;
1.1       root      101: 
1.1.1.14  root      102: /* If non-zero, use only whitelisted block drivers */
                    103: static int use_bdrv_whitelist;
                    104: 
1.1.1.19  root      105: #ifdef _WIN32
                    106: static int is_windows_drive_prefix(const char *filename)
                    107: {
                    108:     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
                    109:              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
                    110:             filename[1] == ':');
                    111: }
                    112: 
                    113: int is_windows_drive(const char *filename)
                    114: {
                    115:     if (is_windows_drive_prefix(filename) &&
                    116:         filename[2] == '\0')
                    117:         return 1;
                    118:     if (strstart(filename, "\\\\.\\", NULL) ||
                    119:         strstart(filename, "//./", NULL))
                    120:         return 1;
                    121:     return 0;
                    122: }
                    123: #endif
                    124: 
1.1.1.23! root      125: /* throttling disk I/O limits */
        !           126: void bdrv_io_limits_disable(BlockDriverState *bs)
        !           127: {
        !           128:     bs->io_limits_enabled = false;
        !           129: 
        !           130:     while (qemu_co_queue_next(&bs->throttled_reqs));
        !           131: 
        !           132:     if (bs->block_timer) {
        !           133:         qemu_del_timer(bs->block_timer);
        !           134:         qemu_free_timer(bs->block_timer);
        !           135:         bs->block_timer = NULL;
        !           136:     }
        !           137: 
        !           138:     bs->slice_start = 0;
        !           139:     bs->slice_end   = 0;
        !           140:     bs->slice_time  = 0;
        !           141:     memset(&bs->io_base, 0, sizeof(bs->io_base));
        !           142: }
        !           143: 
        !           144: static void bdrv_block_timer(void *opaque)
        !           145: {
        !           146:     BlockDriverState *bs = opaque;
        !           147: 
        !           148:     qemu_co_queue_next(&bs->throttled_reqs);
        !           149: }
        !           150: 
        !           151: void bdrv_io_limits_enable(BlockDriverState *bs)
        !           152: {
        !           153:     qemu_co_queue_init(&bs->throttled_reqs);
        !           154:     bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
        !           155:     bs->slice_time  = 5 * BLOCK_IO_SLICE_TIME;
        !           156:     bs->slice_start = qemu_get_clock_ns(vm_clock);
        !           157:     bs->slice_end   = bs->slice_start + bs->slice_time;
        !           158:     memset(&bs->io_base, 0, sizeof(bs->io_base));
        !           159:     bs->io_limits_enabled = true;
        !           160: }
        !           161: 
        !           162: bool bdrv_io_limits_enabled(BlockDriverState *bs)
        !           163: {
        !           164:     BlockIOLimit *io_limits = &bs->io_limits;
        !           165:     return io_limits->bps[BLOCK_IO_LIMIT_READ]
        !           166:          || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
        !           167:          || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
        !           168:          || io_limits->iops[BLOCK_IO_LIMIT_READ]
        !           169:          || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
        !           170:          || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
        !           171: }
        !           172: 
        !           173: static void bdrv_io_limits_intercept(BlockDriverState *bs,
        !           174:                                      bool is_write, int nb_sectors)
        !           175: {
        !           176:     int64_t wait_time = -1;
        !           177: 
        !           178:     if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
        !           179:         qemu_co_queue_wait(&bs->throttled_reqs);
        !           180:     }
        !           181: 
        !           182:     /* In fact, we hope to keep each request's timing, in FIFO mode. The next
        !           183:      * throttled requests will not be dequeued until the current request is
        !           184:      * allowed to be serviced. So if the current request still exceeds the
        !           185:      * limits, it will be inserted to the head. All requests followed it will
        !           186:      * be still in throttled_reqs queue.
        !           187:      */
        !           188: 
        !           189:     while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
        !           190:         qemu_mod_timer(bs->block_timer,
        !           191:                        wait_time + qemu_get_clock_ns(vm_clock));
        !           192:         qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
        !           193:     }
        !           194: 
        !           195:     qemu_co_queue_next(&bs->throttled_reqs);
        !           196: }
        !           197: 
1.1.1.19  root      198: /* check if the path starts with "<protocol>:" */
                    199: static int path_has_protocol(const char *path)
                    200: {
1.1.1.23! root      201:     const char *p;
        !           202: 
1.1.1.19  root      203: #ifdef _WIN32
                    204:     if (is_windows_drive(path) ||
                    205:         is_windows_drive_prefix(path)) {
                    206:         return 0;
                    207:     }
1.1.1.23! root      208:     p = path + strcspn(path, ":/\\");
        !           209: #else
        !           210:     p = path + strcspn(path, ":/");
1.1.1.19  root      211: #endif
                    212: 
1.1.1.23! root      213:     return *p == ':';
1.1.1.19  root      214: }
                    215: 
1.1.1.5   root      216: int path_is_absolute(const char *path)
                    217: {
                    218: #ifdef _WIN32
                    219:     /* specific case for names like: "\\.\d:" */
1.1.1.23! root      220:     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
1.1.1.5   root      221:         return 1;
1.1.1.23! root      222:     }
        !           223:     return (*path == '/' || *path == '\\');
1.1.1.5   root      224: #else
1.1.1.23! root      225:     return (*path == '/');
1.1.1.5   root      226: #endif
1.1.1.2   root      227: }
                    228: 
1.1.1.5   root      229: /* if filename is absolute, just copy it to dest. Otherwise, build a
                    230:    path to it by considering it is relative to base_path. URL are
                    231:    supported. */
                    232: void path_combine(char *dest, int dest_size,
                    233:                   const char *base_path,
                    234:                   const char *filename)
                    235: {
                    236:     const char *p, *p1;
                    237:     int len;
                    238: 
                    239:     if (dest_size <= 0)
                    240:         return;
                    241:     if (path_is_absolute(filename)) {
                    242:         pstrcpy(dest, dest_size, filename);
                    243:     } else {
                    244:         p = strchr(base_path, ':');
                    245:         if (p)
                    246:             p++;
                    247:         else
                    248:             p = base_path;
                    249:         p1 = strrchr(base_path, '/');
                    250: #ifdef _WIN32
                    251:         {
                    252:             const char *p2;
                    253:             p2 = strrchr(base_path, '\\');
                    254:             if (!p1 || p2 > p1)
                    255:                 p1 = p2;
1.1.1.2   root      256:         }
1.1.1.5   root      257: #endif
                    258:         if (p1)
                    259:             p1++;
                    260:         else
                    261:             p1 = base_path;
                    262:         if (p1 > p)
                    263:             p = p1;
                    264:         len = p - base_path;
                    265:         if (len > dest_size - 1)
                    266:             len = dest_size - 1;
                    267:         memcpy(dest, base_path, len);
                    268:         dest[len] = '\0';
                    269:         pstrcat(dest, dest_size, filename);
1.1.1.2   root      270:     }
                    271: }
                    272: 
1.1.1.23! root      273: void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
        !           274: {
        !           275:     if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
        !           276:         pstrcpy(dest, sz, bs->backing_file);
        !           277:     } else {
        !           278:         path_combine(dest, sz, bs->filename, bs->backing_file);
        !           279:     }
        !           280: }
        !           281: 
1.1.1.13  root      282: void bdrv_register(BlockDriver *bdrv)
1.1       root      283: {
1.1.1.22  root      284:     /* Block drivers without coroutine functions need emulation */
                    285:     if (!bdrv->bdrv_co_readv) {
                    286:         bdrv->bdrv_co_readv = bdrv_co_readv_em;
                    287:         bdrv->bdrv_co_writev = bdrv_co_writev_em;
                    288: 
                    289:         /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
                    290:          * the block driver lacks aio we need to emulate that too.
                    291:          */
                    292:         if (!bdrv->bdrv_aio_readv) {
                    293:             /* add AIO emulation layer */
                    294:             bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
                    295:             bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
                    296:         }
1.1.1.5   root      297:     }
1.1.1.14  root      298: 
1.1.1.18  root      299:     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
1.1       root      300: }
                    301: 
                    302: /* create a new block device (by default it is empty) */
                    303: BlockDriverState *bdrv_new(const char *device_name)
                    304: {
1.1.1.18  root      305:     BlockDriverState *bs;
1.1       root      306: 
1.1.1.22  root      307:     bs = g_malloc0(sizeof(BlockDriverState));
1.1       root      308:     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
                    309:     if (device_name[0] != '\0') {
1.1.1.18  root      310:         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
1.1       root      311:     }
1.1.1.22  root      312:     bdrv_iostatus_disable(bs);
1.1       root      313:     return bs;
                    314: }
                    315: 
                    316: BlockDriver *bdrv_find_format(const char *format_name)
                    317: {
                    318:     BlockDriver *drv1;
1.1.1.18  root      319:     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
                    320:         if (!strcmp(drv1->format_name, format_name)) {
1.1       root      321:             return drv1;
1.1.1.18  root      322:         }
1.1       root      323:     }
                    324:     return NULL;
                    325: }
                    326: 
1.1.1.14  root      327: static int bdrv_is_whitelisted(BlockDriver *drv)
                    328: {
                    329:     static const char *whitelist[] = {
                    330:         CONFIG_BDRV_WHITELIST
                    331:     };
                    332:     const char **p;
                    333: 
                    334:     if (!whitelist[0])
                    335:         return 1;               /* no whitelist, anything goes */
                    336: 
                    337:     for (p = whitelist; *p; p++) {
                    338:         if (!strcmp(drv->format_name, *p)) {
                    339:             return 1;
                    340:         }
                    341:     }
                    342:     return 0;
                    343: }
                    344: 
                    345: BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
                    346: {
                    347:     BlockDriver *drv = bdrv_find_format(format_name);
                    348:     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
                    349: }
                    350: 
1.1.1.23! root      351: typedef struct CreateCo {
        !           352:     BlockDriver *drv;
        !           353:     char *filename;
        !           354:     QEMUOptionParameter *options;
        !           355:     int ret;
        !           356: } CreateCo;
        !           357: 
        !           358: static void coroutine_fn bdrv_create_co_entry(void *opaque)
        !           359: {
        !           360:     CreateCo *cco = opaque;
        !           361:     assert(cco->drv);
        !           362: 
        !           363:     cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
        !           364: }
        !           365: 
1.1.1.13  root      366: int bdrv_create(BlockDriver *drv, const char* filename,
                    367:     QEMUOptionParameter *options)
1.1       root      368: {
1.1.1.23! root      369:     int ret;
        !           370: 
        !           371:     Coroutine *co;
        !           372:     CreateCo cco = {
        !           373:         .drv = drv,
        !           374:         .filename = g_strdup(filename),
        !           375:         .options = options,
        !           376:         .ret = NOT_DONE,
        !           377:     };
        !           378: 
        !           379:     if (!drv->bdrv_create) {
1.1       root      380:         return -ENOTSUP;
1.1.1.23! root      381:     }
1.1.1.13  root      382: 
1.1.1.23! root      383:     if (qemu_in_coroutine()) {
        !           384:         /* Fast-path if already in coroutine context */
        !           385:         bdrv_create_co_entry(&cco);
        !           386:     } else {
        !           387:         co = qemu_coroutine_create(bdrv_create_co_entry);
        !           388:         qemu_coroutine_enter(co, &cco);
        !           389:         while (cco.ret == NOT_DONE) {
        !           390:             qemu_aio_wait();
        !           391:         }
        !           392:     }
        !           393: 
        !           394:     ret = cco.ret;
        !           395:     g_free(cco.filename);
        !           396: 
        !           397:     return ret;
1.1       root      398: }
                    399: 
1.1.1.18  root      400: int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
                    401: {
                    402:     BlockDriver *drv;
                    403: 
                    404:     drv = bdrv_find_protocol(filename);
                    405:     if (drv == NULL) {
1.1.1.19  root      406:         return -ENOENT;
1.1.1.18  root      407:     }
                    408: 
                    409:     return bdrv_create(drv, filename, options);
                    410: }
                    411: 
1.1.1.23! root      412: /*
        !           413:  * Create a uniquely-named empty temporary file.
        !           414:  * Return 0 upon success, otherwise a negative errno value.
        !           415:  */
        !           416: int get_tmp_filename(char *filename, int size)
1.1       root      417: {
1.1.1.23! root      418: #ifdef _WIN32
1.1.1.5   root      419:     char temp_dir[MAX_PATH];
1.1.1.23! root      420:     /* GetTempFileName requires that its output buffer (4th param)
        !           421:        have length MAX_PATH or greater.  */
        !           422:     assert(size >= MAX_PATH);
        !           423:     return (GetTempPath(MAX_PATH, temp_dir)
        !           424:             && GetTempFileName(temp_dir, "qem", 0, filename)
        !           425:             ? 0 : -GetLastError());
1.1       root      426: #else
                    427:     int fd;
1.1.1.7   root      428:     const char *tmpdir;
                    429:     tmpdir = getenv("TMPDIR");
                    430:     if (!tmpdir)
                    431:         tmpdir = "/tmp";
1.1.1.23! root      432:     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
        !           433:         return -EOVERFLOW;
        !           434:     }
1.1       root      435:     fd = mkstemp(filename);
1.1.1.23! root      436:     if (fd < 0 || close(fd)) {
        !           437:         return -errno;
        !           438:     }
        !           439:     return 0;
1.1       root      440: #endif
1.1.1.23! root      441: }
1.1       root      442: 
1.1.1.13  root      443: /*
                    444:  * Detect host devices. By convention, /dev/cdrom[N] is always
                    445:  * recognized as a host CDROM.
                    446:  */
                    447: static BlockDriver *find_hdev_driver(const char *filename)
                    448: {
                    449:     int score_max = 0, score;
                    450:     BlockDriver *drv = NULL, *d;
                    451: 
1.1.1.18  root      452:     QLIST_FOREACH(d, &bdrv_drivers, list) {
1.1.1.13  root      453:         if (d->bdrv_probe_device) {
                    454:             score = d->bdrv_probe_device(filename);
                    455:             if (score > score_max) {
                    456:                 score_max = score;
                    457:                 drv = d;
                    458:             }
                    459:         }
                    460:     }
                    461: 
                    462:     return drv;
                    463: }
                    464: 
1.1.1.18  root      465: BlockDriver *bdrv_find_protocol(const char *filename)
                    466: {
                    467:     BlockDriver *drv1;
                    468:     char protocol[128];
                    469:     int len;
                    470:     const char *p;
                    471: 
                    472:     /* TODO Drivers without bdrv_file_open must be specified explicitly */
                    473: 
                    474:     /*
                    475:      * XXX(hch): we really should not let host device detection
                    476:      * override an explicit protocol specification, but moving this
                    477:      * later breaks access to device names with colons in them.
                    478:      * Thanks to the brain-dead persistent naming schemes on udev-
                    479:      * based Linux systems those actually are quite common.
                    480:      */
                    481:     drv1 = find_hdev_driver(filename);
                    482:     if (drv1) {
                    483:         return drv1;
                    484:     }
                    485: 
1.1.1.19  root      486:     if (!path_has_protocol(filename)) {
1.1.1.18  root      487:         return bdrv_find_format("file");
                    488:     }
1.1.1.19  root      489:     p = strchr(filename, ':');
                    490:     assert(p != NULL);
1.1.1.18  root      491:     len = p - filename;
                    492:     if (len > sizeof(protocol) - 1)
                    493:         len = sizeof(protocol) - 1;
                    494:     memcpy(protocol, filename, len);
                    495:     protocol[len] = '\0';
                    496:     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
                    497:         if (drv1->protocol_name &&
                    498:             !strcmp(drv1->protocol_name, protocol)) {
                    499:             return drv1;
                    500:         }
                    501:     }
                    502:     return NULL;
                    503: }
                    504: 
                    505: static int find_image_format(const char *filename, BlockDriver **pdrv)
1.1       root      506: {
1.1.1.5   root      507:     int ret, score, score_max;
1.1       root      508:     BlockDriver *drv1, *drv;
1.1.1.5   root      509:     uint8_t buf[2048];
                    510:     BlockDriverState *bs;
1.1.1.6   root      511: 
1.1.1.18  root      512:     ret = bdrv_file_open(&bs, filename, 0);
                    513:     if (ret < 0) {
                    514:         *pdrv = NULL;
                    515:         return ret;
                    516:     }
                    517: 
                    518:     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
                    519:     if (bs->sg || !bdrv_is_inserted(bs)) {
                    520:         bdrv_delete(bs);
                    521:         drv = bdrv_find_format("raw");
                    522:         if (!drv) {
                    523:             ret = -ENOENT;
                    524:         }
                    525:         *pdrv = drv;
                    526:         return ret;
                    527:     }
1.1.1.5   root      528: 
                    529:     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
                    530:     bdrv_delete(bs);
                    531:     if (ret < 0) {
1.1.1.18  root      532:         *pdrv = NULL;
                    533:         return ret;
1.1.1.5   root      534:     }
                    535: 
1.1       root      536:     score_max = 0;
1.1.1.18  root      537:     drv = NULL;
                    538:     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
1.1.1.5   root      539:         if (drv1->bdrv_probe) {
                    540:             score = drv1->bdrv_probe(buf, ret, filename);
                    541:             if (score > score_max) {
                    542:                 score_max = score;
                    543:                 drv = drv1;
                    544:             }
1.1       root      545:         }
                    546:     }
1.1.1.18  root      547:     if (!drv) {
                    548:         ret = -ENOENT;
                    549:     }
                    550:     *pdrv = drv;
                    551:     return ret;
                    552: }
                    553: 
                    554: /**
                    555:  * Set the current 'total_sectors' value
                    556:  */
                    557: static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
                    558: {
                    559:     BlockDriver *drv = bs->drv;
                    560: 
                    561:     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
                    562:     if (bs->sg)
                    563:         return 0;
                    564: 
                    565:     /* query actual device if possible, otherwise just trust the hint */
                    566:     if (drv->bdrv_getlength) {
                    567:         int64_t length = drv->bdrv_getlength(bs);
                    568:         if (length < 0) {
                    569:             return length;
                    570:         }
                    571:         hint = length >> BDRV_SECTOR_BITS;
                    572:     }
                    573: 
                    574:     bs->total_sectors = hint;
                    575:     return 0;
                    576: }
                    577: 
1.1.1.22  root      578: /**
                    579:  * Set open flags for a given cache mode
                    580:  *
                    581:  * Return 0 on success, -1 if the cache mode was invalid.
                    582:  */
                    583: int bdrv_parse_cache_flags(const char *mode, int *flags)
                    584: {
                    585:     *flags &= ~BDRV_O_CACHE_MASK;
                    586: 
                    587:     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
                    588:         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
                    589:     } else if (!strcmp(mode, "directsync")) {
                    590:         *flags |= BDRV_O_NOCACHE;
                    591:     } else if (!strcmp(mode, "writeback")) {
                    592:         *flags |= BDRV_O_CACHE_WB;
                    593:     } else if (!strcmp(mode, "unsafe")) {
                    594:         *flags |= BDRV_O_CACHE_WB;
                    595:         *flags |= BDRV_O_NO_FLUSH;
                    596:     } else if (!strcmp(mode, "writethrough")) {
                    597:         /* this is the default */
                    598:     } else {
                    599:         return -1;
                    600:     }
                    601: 
                    602:     return 0;
                    603: }
                    604: 
1.1.1.23! root      605: /**
        !           606:  * The copy-on-read flag is actually a reference count so multiple users may
        !           607:  * use the feature without worrying about clobbering its previous state.
        !           608:  * Copy-on-read stays enabled until all users have called to disable it.
        !           609:  */
        !           610: void bdrv_enable_copy_on_read(BlockDriverState *bs)
        !           611: {
        !           612:     bs->copy_on_read++;
        !           613: }
        !           614: 
        !           615: void bdrv_disable_copy_on_read(BlockDriverState *bs)
        !           616: {
        !           617:     assert(bs->copy_on_read > 0);
        !           618:     bs->copy_on_read--;
        !           619: }
        !           620: 
1.1.1.18  root      621: /*
                    622:  * Common part for opening disk images and files
                    623:  */
                    624: static int bdrv_open_common(BlockDriverState *bs, const char *filename,
                    625:     int flags, BlockDriver *drv)
                    626: {
                    627:     int ret, open_flags;
                    628: 
                    629:     assert(drv != NULL);
1.1.1.23! root      630:     assert(bs->file == NULL);
1.1.1.18  root      631: 
1.1.1.22  root      632:     trace_bdrv_open_common(bs, filename, flags, drv->format_name);
                    633: 
1.1.1.18  root      634:     bs->open_flags = flags;
                    635:     bs->buffer_alignment = 512;
                    636: 
1.1.1.23! root      637:     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
        !           638:     if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
        !           639:         bdrv_enable_copy_on_read(bs);
        !           640:     }
        !           641: 
1.1.1.18  root      642:     pstrcpy(bs->filename, sizeof(bs->filename), filename);
                    643: 
                    644:     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
                    645:         return -ENOTSUP;
                    646:     }
                    647: 
                    648:     bs->drv = drv;
1.1.1.22  root      649:     bs->opaque = g_malloc0(drv->instance_size);
1.1.1.18  root      650: 
1.1.1.22  root      651:     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
1.1.1.18  root      652: 
                    653:     /*
                    654:      * Clear flags that are internal to the block layer before opening the
                    655:      * image.
                    656:      */
                    657:     open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
                    658: 
                    659:     /*
1.1.1.21  root      660:      * Snapshots should be writable.
1.1.1.18  root      661:      */
                    662:     if (bs->is_temporary) {
                    663:         open_flags |= BDRV_O_RDWR;
                    664:     }
                    665: 
1.1.1.22  root      666:     bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
                    667: 
1.1.1.18  root      668:     /* Open the image, either directly or using a protocol */
                    669:     if (drv->bdrv_file_open) {
                    670:         ret = drv->bdrv_file_open(bs, filename, open_flags);
                    671:     } else {
                    672:         ret = bdrv_file_open(&bs->file, filename, open_flags);
                    673:         if (ret >= 0) {
                    674:             ret = drv->bdrv_open(bs, open_flags);
                    675:         }
                    676:     }
                    677: 
                    678:     if (ret < 0) {
                    679:         goto free_and_fail;
                    680:     }
                    681: 
                    682:     ret = refresh_total_sectors(bs, bs->total_sectors);
                    683:     if (ret < 0) {
                    684:         goto free_and_fail;
                    685:     }
                    686: 
                    687: #ifndef _WIN32
                    688:     if (bs->is_temporary) {
                    689:         unlink(filename);
                    690:     }
                    691: #endif
                    692:     return 0;
                    693: 
                    694: free_and_fail:
                    695:     if (bs->file) {
                    696:         bdrv_delete(bs->file);
                    697:         bs->file = NULL;
                    698:     }
1.1.1.22  root      699:     g_free(bs->opaque);
1.1.1.18  root      700:     bs->opaque = NULL;
                    701:     bs->drv = NULL;
                    702:     return ret;
1.1       root      703: }
                    704: 
1.1.1.18  root      705: /*
                    706:  * Opens a file using a protocol (file, host_device, nbd, ...)
                    707:  */
1.1.1.5   root      708: int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
1.1       root      709: {
1.1.1.5   root      710:     BlockDriverState *bs;
1.1.1.18  root      711:     BlockDriver *drv;
1.1.1.5   root      712:     int ret;
                    713: 
1.1.1.18  root      714:     drv = bdrv_find_protocol(filename);
                    715:     if (!drv) {
                    716:         return -ENOENT;
                    717:     }
                    718: 
1.1.1.5   root      719:     bs = bdrv_new("");
1.1.1.18  root      720:     ret = bdrv_open_common(bs, filename, flags, drv);
1.1.1.5   root      721:     if (ret < 0) {
                    722:         bdrv_delete(bs);
                    723:         return ret;
1.1.1.2   root      724:     }
1.1.1.7   root      725:     bs->growable = 1;
1.1.1.5   root      726:     *pbs = bs;
                    727:     return 0;
                    728: }
                    729: 
1.1.1.18  root      730: /*
                    731:  * Opens a disk image (raw, qcow2, vmdk, ...)
                    732:  */
                    733: int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
                    734:               BlockDriver *drv)
1.1       root      735: {
1.1.1.18  root      736:     int ret;
1.1.1.22  root      737:     char tmp_filename[PATH_MAX];
1.1       root      738: 
1.1.1.5   root      739:     if (flags & BDRV_O_SNAPSHOT) {
1.1       root      740:         BlockDriverState *bs1;
                    741:         int64_t total_size;
1.1.1.7   root      742:         int is_protocol = 0;
1.1.1.13  root      743:         BlockDriver *bdrv_qcow2;
                    744:         QEMUOptionParameter *options;
1.1.1.18  root      745:         char backing_filename[PATH_MAX];
1.1.1.6   root      746: 
1.1       root      747:         /* if snapshot, we create a temporary backing file and open it
                    748:            instead of opening 'filename' directly */
                    749: 
                    750:         /* if there is a backing file, use it */
                    751:         bs1 = bdrv_new("");
1.1.1.18  root      752:         ret = bdrv_open(bs1, filename, 0, drv);
1.1.1.8   root      753:         if (ret < 0) {
1.1       root      754:             bdrv_delete(bs1);
1.1.1.8   root      755:             return ret;
1.1       root      756:         }
1.1.1.18  root      757:         total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
1.1.1.7   root      758: 
                    759:         if (bs1->drv && bs1->drv->protocol_name)
                    760:             is_protocol = 1;
                    761: 
1.1       root      762:         bdrv_delete(bs1);
1.1.1.6   root      763: 
1.1.1.23! root      764:         ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
        !           765:         if (ret < 0) {
        !           766:             return ret;
        !           767:         }
1.1.1.7   root      768: 
                    769:         /* Real path is meaningless for protocols */
                    770:         if (is_protocol)
                    771:             snprintf(backing_filename, sizeof(backing_filename),
                    772:                      "%s", filename);
1.1.1.18  root      773:         else if (!realpath(filename, backing_filename))
                    774:             return -errno;
1.1.1.7   root      775: 
1.1.1.13  root      776:         bdrv_qcow2 = bdrv_find_format("qcow2");
                    777:         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
                    778: 
1.1.1.18  root      779:         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
1.1.1.13  root      780:         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
                    781:         if (drv) {
                    782:             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
                    783:                 drv->format_name);
                    784:         }
                    785: 
                    786:         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
1.1.1.18  root      787:         free_option_parameters(options);
1.1.1.8   root      788:         if (ret < 0) {
                    789:             return ret;
1.1       root      790:         }
1.1.1.13  root      791: 
1.1       root      792:         filename = tmp_filename;
1.1.1.13  root      793:         drv = bdrv_qcow2;
1.1       root      794:         bs->is_temporary = 1;
                    795:     }
                    796: 
1.1.1.18  root      797:     /* Find the right image format driver */
                    798:     if (!drv) {
                    799:         ret = find_image_format(filename, &drv);
1.1.1.8   root      800:     }
1.1.1.18  root      801: 
1.1.1.8   root      802:     if (!drv) {
                    803:         goto unlink_and_fail;
1.1       root      804:     }
1.1.1.14  root      805: 
1.1.1.18  root      806:     /* Open the image */
                    807:     ret = bdrv_open_common(bs, filename, flags, drv);
1.1       root      808:     if (ret < 0) {
1.1.1.18  root      809:         goto unlink_and_fail;
1.1       root      810:     }
1.1.1.18  root      811: 
                    812:     /* If there is a backing file, use it */
                    813:     if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
                    814:         char backing_filename[PATH_MAX];
                    815:         int back_flags;
1.1.1.13  root      816:         BlockDriver *back_drv = NULL;
1.1.1.18  root      817: 
1.1       root      818:         bs->backing_hd = bdrv_new("");
1.1.1.23! root      819:         bdrv_get_full_backing_filename(bs, backing_filename,
        !           820:                                        sizeof(backing_filename));
1.1.1.19  root      821: 
                    822:         if (bs->backing_format[0] != '\0') {
1.1.1.13  root      823:             back_drv = bdrv_find_format(bs->backing_format);
1.1.1.19  root      824:         }
1.1.1.18  root      825: 
                    826:         /* backing files always opened read-only */
                    827:         back_flags =
                    828:             flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
                    829: 
                    830:         ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
1.1.1.8   root      831:         if (ret < 0) {
                    832:             bdrv_close(bs);
                    833:             return ret;
                    834:         }
1.1.1.18  root      835:         if (bs->is_temporary) {
                    836:             bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
                    837:         } else {
                    838:             /* base image inherits from "parent" */
                    839:             bs->backing_hd->keep_read_only = bs->keep_read_only;
                    840:         }
1.1       root      841:     }
                    842: 
1.1.1.13  root      843:     if (!bdrv_key_required(bs)) {
1.1.1.22  root      844:         bdrv_dev_change_media_cb(bs, true);
1.1.1.13  root      845:     }
1.1.1.18  root      846: 
1.1.1.23! root      847:     /* throttling disk I/O limits */
        !           848:     if (bs->io_limits_enabled) {
        !           849:         bdrv_io_limits_enable(bs);
        !           850:     }
        !           851: 
1.1       root      852:     return 0;
1.1.1.18  root      853: 
                    854: unlink_and_fail:
                    855:     if (bs->is_temporary) {
                    856:         unlink(filename);
                    857:     }
                    858:     return ret;
1.1       root      859: }
                    860: 
                    861: void bdrv_close(BlockDriverState *bs)
                    862: {
1.1.1.23! root      863:     bdrv_flush(bs);
1.1.1.5   root      864:     if (bs->drv) {
1.1.1.23! root      865:         if (bs->job) {
        !           866:             block_job_cancel_sync(bs->job);
        !           867:         }
        !           868:         bdrv_drain_all();
        !           869: 
1.1.1.18  root      870:         if (bs == bs_snapshots) {
                    871:             bs_snapshots = NULL;
                    872:         }
                    873:         if (bs->backing_hd) {
1.1       root      874:             bdrv_delete(bs->backing_hd);
1.1.1.18  root      875:             bs->backing_hd = NULL;
                    876:         }
1.1       root      877:         bs->drv->bdrv_close(bs);
1.1.1.22  root      878:         g_free(bs->opaque);
1.1       root      879: #ifdef _WIN32
                    880:         if (bs->is_temporary) {
                    881:             unlink(bs->filename);
                    882:         }
                    883: #endif
                    884:         bs->opaque = NULL;
                    885:         bs->drv = NULL;
1.1.1.23! root      886:         bs->copy_on_read = 0;
        !           887:         bs->backing_file[0] = '\0';
        !           888:         bs->backing_format[0] = '\0';
        !           889:         bs->total_sectors = 0;
        !           890:         bs->encrypted = 0;
        !           891:         bs->valid_key = 0;
        !           892:         bs->sg = 0;
        !           893:         bs->growable = 0;
1.1       root      894: 
1.1.1.18  root      895:         if (bs->file != NULL) {
1.1.1.23! root      896:             bdrv_delete(bs->file);
        !           897:             bs->file = NULL;
1.1.1.18  root      898:         }
                    899: 
1.1.1.22  root      900:         bdrv_dev_change_media_cb(bs, false);
1.1       root      901:     }
1.1.1.23! root      902: 
        !           903:     /*throttling disk I/O limits*/
        !           904:     if (bs->io_limits_enabled) {
        !           905:         bdrv_io_limits_disable(bs);
        !           906:     }
1.1       root      907: }
                    908: 
1.1.1.18  root      909: void bdrv_close_all(void)
                    910: {
                    911:     BlockDriverState *bs;
                    912: 
                    913:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
                    914:         bdrv_close(bs);
                    915:     }
                    916: }
                    917: 
1.1.1.23! root      918: /*
        !           919:  * Wait for pending requests to complete across all BlockDriverStates
        !           920:  *
        !           921:  * This function does not flush data to disk, use bdrv_flush_all() for that
        !           922:  * after calling this function.
        !           923:  *
        !           924:  * Note that completion of an asynchronous I/O operation can trigger any
        !           925:  * number of other I/O operations on other devices---for example a coroutine
        !           926:  * can be arbitrarily complex and a constant flow of I/O can come until the
        !           927:  * coroutine is complete.  Because of this, it is not possible to have a
        !           928:  * function to drain a single device's I/O queue.
        !           929:  */
        !           930: void bdrv_drain_all(void)
        !           931: {
        !           932:     BlockDriverState *bs;
        !           933:     bool busy;
        !           934: 
        !           935:     do {
        !           936:         busy = qemu_aio_wait();
        !           937: 
        !           938:         /* FIXME: We do not have timer support here, so this is effectively
        !           939:          * a busy wait.
        !           940:          */
        !           941:         QTAILQ_FOREACH(bs, &bdrv_states, list) {
        !           942:             if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
        !           943:                 qemu_co_queue_restart_all(&bs->throttled_reqs);
        !           944:                 busy = true;
        !           945:             }
        !           946:         }
        !           947:     } while (busy);
        !           948: 
        !           949:     /* If requests are still pending there is a bug somewhere */
        !           950:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
        !           951:         assert(QLIST_EMPTY(&bs->tracked_requests));
        !           952:         assert(qemu_co_queue_empty(&bs->throttled_reqs));
        !           953:     }
        !           954: }
        !           955: 
1.1.1.20  root      956: /* make a BlockDriverState anonymous by removing from bdrv_state list.
                    957:    Also, NULL terminate the device_name to prevent double remove */
                    958: void bdrv_make_anon(BlockDriverState *bs)
                    959: {
                    960:     if (bs->device_name[0] != '\0') {
                    961:         QTAILQ_REMOVE(&bdrv_states, bs, list);
                    962:     }
                    963:     bs->device_name[0] = '\0';
                    964: }
                    965: 
1.1.1.23! root      966: static void bdrv_rebind(BlockDriverState *bs)
        !           967: {
        !           968:     if (bs->drv && bs->drv->bdrv_rebind) {
        !           969:         bs->drv->bdrv_rebind(bs);
        !           970:     }
        !           971: }
        !           972: 
        !           973: /*
        !           974:  * Add new bs contents at the top of an image chain while the chain is
        !           975:  * live, while keeping required fields on the top layer.
        !           976:  *
        !           977:  * This will modify the BlockDriverState fields, and swap contents
        !           978:  * between bs_new and bs_top. Both bs_new and bs_top are modified.
        !           979:  *
        !           980:  * bs_new is required to be anonymous.
        !           981:  *
        !           982:  * This function does not create any image files.
        !           983:  */
        !           984: void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
        !           985: {
        !           986:     BlockDriverState tmp;
        !           987: 
        !           988:     /* bs_new must be anonymous */
        !           989:     assert(bs_new->device_name[0] == '\0');
        !           990: 
        !           991:     tmp = *bs_new;
        !           992: 
        !           993:     /* there are some fields that need to stay on the top layer: */
        !           994:     tmp.open_flags        = bs_top->open_flags;
        !           995: 
        !           996:     /* dev info */
        !           997:     tmp.dev_ops           = bs_top->dev_ops;
        !           998:     tmp.dev_opaque        = bs_top->dev_opaque;
        !           999:     tmp.dev               = bs_top->dev;
        !          1000:     tmp.buffer_alignment  = bs_top->buffer_alignment;
        !          1001:     tmp.copy_on_read      = bs_top->copy_on_read;
        !          1002: 
        !          1003:     /* i/o timing parameters */
        !          1004:     tmp.slice_time        = bs_top->slice_time;
        !          1005:     tmp.slice_start       = bs_top->slice_start;
        !          1006:     tmp.slice_end         = bs_top->slice_end;
        !          1007:     tmp.io_limits         = bs_top->io_limits;
        !          1008:     tmp.io_base           = bs_top->io_base;
        !          1009:     tmp.throttled_reqs    = bs_top->throttled_reqs;
        !          1010:     tmp.block_timer       = bs_top->block_timer;
        !          1011:     tmp.io_limits_enabled = bs_top->io_limits_enabled;
        !          1012: 
        !          1013:     /* geometry */
        !          1014:     tmp.cyls              = bs_top->cyls;
        !          1015:     tmp.heads             = bs_top->heads;
        !          1016:     tmp.secs              = bs_top->secs;
        !          1017:     tmp.translation       = bs_top->translation;
        !          1018: 
        !          1019:     /* r/w error */
        !          1020:     tmp.on_read_error     = bs_top->on_read_error;
        !          1021:     tmp.on_write_error    = bs_top->on_write_error;
        !          1022: 
        !          1023:     /* i/o status */
        !          1024:     tmp.iostatus_enabled  = bs_top->iostatus_enabled;
        !          1025:     tmp.iostatus          = bs_top->iostatus;
        !          1026: 
        !          1027:     /* keep the same entry in bdrv_states */
        !          1028:     pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
        !          1029:     tmp.list = bs_top->list;
        !          1030: 
        !          1031:     /* The contents of 'tmp' will become bs_top, as we are
        !          1032:      * swapping bs_new and bs_top contents. */
        !          1033:     tmp.backing_hd = bs_new;
        !          1034:     pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
        !          1035:     bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
        !          1036: 
        !          1037:     /* swap contents of the fixed new bs and the current top */
        !          1038:     *bs_new = *bs_top;
        !          1039:     *bs_top = tmp;
        !          1040: 
        !          1041:     /* device_name[] was carried over from the old bs_top.  bs_new
        !          1042:      * shouldn't be in bdrv_states, so we need to make device_name[]
        !          1043:      * reflect the anonymity of bs_new
        !          1044:      */
        !          1045:     bs_new->device_name[0] = '\0';
        !          1046: 
        !          1047:     /* clear the copied fields in the new backing file */
        !          1048:     bdrv_detach_dev(bs_new, bs_new->dev);
        !          1049: 
        !          1050:     qemu_co_queue_init(&bs_new->throttled_reqs);
        !          1051:     memset(&bs_new->io_base,   0, sizeof(bs_new->io_base));
        !          1052:     memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
        !          1053:     bdrv_iostatus_disable(bs_new);
        !          1054: 
        !          1055:     /* we don't use bdrv_io_limits_disable() for this, because we don't want
        !          1056:      * to affect or delete the block_timer, as it has been moved to bs_top */
        !          1057:     bs_new->io_limits_enabled = false;
        !          1058:     bs_new->block_timer       = NULL;
        !          1059:     bs_new->slice_time        = 0;
        !          1060:     bs_new->slice_start       = 0;
        !          1061:     bs_new->slice_end         = 0;
        !          1062: 
        !          1063:     bdrv_rebind(bs_new);
        !          1064:     bdrv_rebind(bs_top);
        !          1065: }
        !          1066: 
1.1       root     1067: void bdrv_delete(BlockDriverState *bs)
                   1068: {
1.1.1.22  root     1069:     assert(!bs->dev);
1.1.1.23! root     1070:     assert(!bs->job);
        !          1071:     assert(!bs->in_use);
1.1.1.7   root     1072: 
1.1.1.18  root     1073:     /* remove from list, if necessary */
1.1.1.20  root     1074:     bdrv_make_anon(bs);
1.1.1.7   root     1075: 
1.1       root     1076:     bdrv_close(bs);
1.1.1.18  root     1077: 
                   1078:     assert(bs != bs_snapshots);
1.1.1.22  root     1079:     g_free(bs);
1.1       root     1080: }
                   1081: 
1.1.1.22  root     1082: int bdrv_attach_dev(BlockDriverState *bs, void *dev)
                   1083: /* TODO change to DeviceState *dev when all users are qdevified */
1.1.1.18  root     1084: {
1.1.1.22  root     1085:     if (bs->dev) {
1.1.1.18  root     1086:         return -EBUSY;
                   1087:     }
1.1.1.22  root     1088:     bs->dev = dev;
                   1089:     bdrv_iostatus_reset(bs);
1.1.1.18  root     1090:     return 0;
                   1091: }
                   1092: 
1.1.1.22  root     1093: /* TODO qdevified devices don't use this, remove when devices are qdevified */
                   1094: void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
1.1.1.18  root     1095: {
1.1.1.22  root     1096:     if (bdrv_attach_dev(bs, dev) < 0) {
                   1097:         abort();
                   1098:     }
1.1.1.18  root     1099: }
                   1100: 
1.1.1.22  root     1101: void bdrv_detach_dev(BlockDriverState *bs, void *dev)
                   1102: /* TODO change to DeviceState *dev when all users are qdevified */
1.1.1.18  root     1103: {
1.1.1.22  root     1104:     assert(bs->dev == dev);
                   1105:     bs->dev = NULL;
                   1106:     bs->dev_ops = NULL;
                   1107:     bs->dev_opaque = NULL;
                   1108:     bs->buffer_alignment = 512;
                   1109: }
                   1110: 
                   1111: /* TODO change to return DeviceState * when all users are qdevified */
                   1112: void *bdrv_get_attached_dev(BlockDriverState *bs)
                   1113: {
                   1114:     return bs->dev;
                   1115: }
                   1116: 
                   1117: void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
                   1118:                       void *opaque)
                   1119: {
                   1120:     bs->dev_ops = ops;
                   1121:     bs->dev_opaque = opaque;
                   1122:     if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
                   1123:         bs_snapshots = NULL;
                   1124:     }
                   1125: }
                   1126: 
1.1.1.23! root     1127: void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
        !          1128:                                BlockQMPEventAction action, int is_read)
        !          1129: {
        !          1130:     QObject *data;
        !          1131:     const char *action_str;
        !          1132: 
        !          1133:     switch (action) {
        !          1134:     case BDRV_ACTION_REPORT:
        !          1135:         action_str = "report";
        !          1136:         break;
        !          1137:     case BDRV_ACTION_IGNORE:
        !          1138:         action_str = "ignore";
        !          1139:         break;
        !          1140:     case BDRV_ACTION_STOP:
        !          1141:         action_str = "stop";
        !          1142:         break;
        !          1143:     default:
        !          1144:         abort();
        !          1145:     }
        !          1146: 
        !          1147:     data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
        !          1148:                               bdrv->device_name,
        !          1149:                               action_str,
        !          1150:                               is_read ? "read" : "write");
        !          1151:     monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
        !          1152: 
        !          1153:     qobject_decref(data);
        !          1154: }
        !          1155: 
        !          1156: static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
        !          1157: {
        !          1158:     QObject *data;
        !          1159: 
        !          1160:     data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
        !          1161:                               bdrv_get_device_name(bs), ejected);
        !          1162:     monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
        !          1163: 
        !          1164:     qobject_decref(data);
        !          1165: }
        !          1166: 
1.1.1.22  root     1167: static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
                   1168: {
                   1169:     if (bs->dev_ops && bs->dev_ops->change_media_cb) {
1.1.1.23! root     1170:         bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
1.1.1.22  root     1171:         bs->dev_ops->change_media_cb(bs->dev_opaque, load);
1.1.1.23! root     1172:         if (tray_was_closed) {
        !          1173:             /* tray open */
        !          1174:             bdrv_emit_qmp_eject_event(bs, true);
        !          1175:         }
        !          1176:         if (load) {
        !          1177:             /* tray close */
        !          1178:             bdrv_emit_qmp_eject_event(bs, false);
        !          1179:         }
1.1.1.22  root     1180:     }
                   1181: }
                   1182: 
                   1183: bool bdrv_dev_has_removable_media(BlockDriverState *bs)
                   1184: {
                   1185:     return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
                   1186: }
                   1187: 
                   1188: void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
                   1189: {
                   1190:     if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
                   1191:         bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
                   1192:     }
                   1193: }
                   1194: 
                   1195: bool bdrv_dev_is_tray_open(BlockDriverState *bs)
                   1196: {
                   1197:     if (bs->dev_ops && bs->dev_ops->is_tray_open) {
                   1198:         return bs->dev_ops->is_tray_open(bs->dev_opaque);
                   1199:     }
                   1200:     return false;
                   1201: }
                   1202: 
                   1203: static void bdrv_dev_resize_cb(BlockDriverState *bs)
                   1204: {
                   1205:     if (bs->dev_ops && bs->dev_ops->resize_cb) {
                   1206:         bs->dev_ops->resize_cb(bs->dev_opaque);
                   1207:     }
                   1208: }
                   1209: 
                   1210: bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
                   1211: {
                   1212:     if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
                   1213:         return bs->dev_ops->is_medium_locked(bs->dev_opaque);
                   1214:     }
                   1215:     return false;
1.1.1.18  root     1216: }
                   1217: 
1.1.1.13  root     1218: /*
                   1219:  * Run consistency checks on an image
                   1220:  *
1.1.1.18  root     1221:  * Returns 0 if the check could be completed (it doesn't mean that the image is
1.1.1.21  root     1222:  * free of errors) or -errno when an internal error occurred. The results of the
1.1.1.18  root     1223:  * check are stored in res.
1.1.1.13  root     1224:  */
1.1.1.18  root     1225: int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
1.1.1.13  root     1226: {
                   1227:     if (bs->drv->bdrv_check == NULL) {
                   1228:         return -ENOTSUP;
                   1229:     }
                   1230: 
1.1.1.18  root     1231:     memset(res, 0, sizeof(*res));
                   1232:     return bs->drv->bdrv_check(bs, res);
1.1.1.13  root     1233: }
                   1234: 
1.1.1.19  root     1235: #define COMMIT_BUF_SECTORS 2048
                   1236: 
1.1       root     1237: /* commit COW file into the raw image */
                   1238: int bdrv_commit(BlockDriverState *bs)
                   1239: {
1.1.1.5   root     1240:     BlockDriver *drv = bs->drv;
1.1.1.18  root     1241:     BlockDriver *backing_drv;
1.1.1.19  root     1242:     int64_t sector, total_sectors;
                   1243:     int n, ro, open_flags;
1.1.1.18  root     1244:     int ret = 0, rw_ret = 0;
1.1.1.19  root     1245:     uint8_t *buf;
1.1.1.18  root     1246:     char filename[1024];
                   1247:     BlockDriverState *bs_rw, *bs_ro;
1.1       root     1248: 
1.1.1.5   root     1249:     if (!drv)
                   1250:         return -ENOMEDIUM;
1.1.1.18  root     1251:     
                   1252:     if (!bs->backing_hd) {
                   1253:         return -ENOTSUP;
                   1254:     }
1.1       root     1255: 
1.1.1.18  root     1256:     if (bs->backing_hd->keep_read_only) {
                   1257:         return -EACCES;
1.1       root     1258:     }
                   1259: 
1.1.1.23! root     1260:     if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
        !          1261:         return -EBUSY;
        !          1262:     }
        !          1263: 
1.1.1.18  root     1264:     backing_drv = bs->backing_hd->drv;
                   1265:     ro = bs->backing_hd->read_only;
                   1266:     strncpy(filename, bs->backing_hd->filename, sizeof(filename));
                   1267:     open_flags =  bs->backing_hd->open_flags;
                   1268: 
                   1269:     if (ro) {
                   1270:         /* re-open as RW */
                   1271:         bdrv_delete(bs->backing_hd);
                   1272:         bs->backing_hd = NULL;
                   1273:         bs_rw = bdrv_new("");
                   1274:         rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
                   1275:             backing_drv);
                   1276:         if (rw_ret < 0) {
                   1277:             bdrv_delete(bs_rw);
                   1278:             /* try to re-open read-only */
                   1279:             bs_ro = bdrv_new("");
                   1280:             ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
                   1281:                 backing_drv);
                   1282:             if (ret < 0) {
                   1283:                 bdrv_delete(bs_ro);
                   1284:                 /* drive not functional anymore */
                   1285:                 bs->drv = NULL;
                   1286:                 return ret;
                   1287:             }
                   1288:             bs->backing_hd = bs_ro;
                   1289:             return rw_ret;
                   1290:         }
                   1291:         bs->backing_hd = bs_rw;
1.1       root     1292:     }
                   1293: 
1.1.1.14  root     1294:     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
1.1.1.22  root     1295:     buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
1.1       root     1296: 
1.1.1.19  root     1297:     for (sector = 0; sector < total_sectors; sector += n) {
1.1.1.23! root     1298:         if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
1.1.1.19  root     1299: 
                   1300:             if (bdrv_read(bs, sector, buf, n) != 0) {
                   1301:                 ret = -EIO;
                   1302:                 goto ro_cleanup;
                   1303:             }
                   1304: 
                   1305:             if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
                   1306:                 ret = -EIO;
                   1307:                 goto ro_cleanup;
                   1308:             }
1.1       root     1309:         }
                   1310:     }
1.1.1.2   root     1311: 
1.1.1.18  root     1312:     if (drv->bdrv_make_empty) {
                   1313:         ret = drv->bdrv_make_empty(bs);
                   1314:         bdrv_flush(bs);
                   1315:     }
1.1.1.2   root     1316: 
1.1.1.18  root     1317:     /*
                   1318:      * Make sure all data we wrote to the backing device is actually
                   1319:      * stable on disk.
                   1320:      */
                   1321:     if (bs->backing_hd)
                   1322:         bdrv_flush(bs->backing_hd);
                   1323: 
                   1324: ro_cleanup:
1.1.1.22  root     1325:     g_free(buf);
1.1.1.18  root     1326: 
                   1327:     if (ro) {
                   1328:         /* re-open as RO */
                   1329:         bdrv_delete(bs->backing_hd);
                   1330:         bs->backing_hd = NULL;
                   1331:         bs_ro = bdrv_new("");
                   1332:         ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
                   1333:             backing_drv);
                   1334:         if (ret < 0) {
                   1335:             bdrv_delete(bs_ro);
                   1336:             /* drive not functional anymore */
                   1337:             bs->drv = NULL;
                   1338:             return ret;
                   1339:         }
                   1340:         bs->backing_hd = bs_ro;
                   1341:         bs->backing_hd->keep_read_only = 0;
                   1342:     }
                   1343: 
                   1344:     return ret;
                   1345: }
                   1346: 
1.1.1.23! root     1347: int bdrv_commit_all(void)
1.1.1.18  root     1348: {
                   1349:     BlockDriverState *bs;
                   1350: 
                   1351:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1.1.1.23! root     1352:         int ret = bdrv_commit(bs);
        !          1353:         if (ret < 0) {
        !          1354:             return ret;
        !          1355:         }
        !          1356:     }
        !          1357:     return 0;
        !          1358: }
        !          1359: 
        !          1360: struct BdrvTrackedRequest {
        !          1361:     BlockDriverState *bs;
        !          1362:     int64_t sector_num;
        !          1363:     int nb_sectors;
        !          1364:     bool is_write;
        !          1365:     QLIST_ENTRY(BdrvTrackedRequest) list;
        !          1366:     Coroutine *co; /* owner, used for deadlock detection */
        !          1367:     CoQueue wait_queue; /* coroutines blocked on this request */
        !          1368: };
        !          1369: 
        !          1370: /**
        !          1371:  * Remove an active request from the tracked requests list
        !          1372:  *
        !          1373:  * This function should be called when a tracked request is completing.
        !          1374:  */
        !          1375: static void tracked_request_end(BdrvTrackedRequest *req)
        !          1376: {
        !          1377:     QLIST_REMOVE(req, list);
        !          1378:     qemu_co_queue_restart_all(&req->wait_queue);
        !          1379: }
        !          1380: 
        !          1381: /**
        !          1382:  * Add an active request to the tracked requests list
        !          1383:  */
        !          1384: static void tracked_request_begin(BdrvTrackedRequest *req,
        !          1385:                                   BlockDriverState *bs,
        !          1386:                                   int64_t sector_num,
        !          1387:                                   int nb_sectors, bool is_write)
        !          1388: {
        !          1389:     *req = (BdrvTrackedRequest){
        !          1390:         .bs = bs,
        !          1391:         .sector_num = sector_num,
        !          1392:         .nb_sectors = nb_sectors,
        !          1393:         .is_write = is_write,
        !          1394:         .co = qemu_coroutine_self(),
        !          1395:     };
        !          1396: 
        !          1397:     qemu_co_queue_init(&req->wait_queue);
        !          1398: 
        !          1399:     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
        !          1400: }
        !          1401: 
        !          1402: /**
        !          1403:  * Round a region to cluster boundaries
        !          1404:  */
        !          1405: static void round_to_clusters(BlockDriverState *bs,
        !          1406:                               int64_t sector_num, int nb_sectors,
        !          1407:                               int64_t *cluster_sector_num,
        !          1408:                               int *cluster_nb_sectors)
        !          1409: {
        !          1410:     BlockDriverInfo bdi;
        !          1411: 
        !          1412:     if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
        !          1413:         *cluster_sector_num = sector_num;
        !          1414:         *cluster_nb_sectors = nb_sectors;
        !          1415:     } else {
        !          1416:         int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
        !          1417:         *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
        !          1418:         *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
        !          1419:                                             nb_sectors, c);
        !          1420:     }
        !          1421: }
        !          1422: 
        !          1423: static bool tracked_request_overlaps(BdrvTrackedRequest *req,
        !          1424:                                      int64_t sector_num, int nb_sectors) {
        !          1425:     /*        aaaa   bbbb */
        !          1426:     if (sector_num >= req->sector_num + req->nb_sectors) {
        !          1427:         return false;
1.1.1.18  root     1428:     }
1.1.1.23! root     1429:     /* bbbb   aaaa        */
        !          1430:     if (req->sector_num >= sector_num + nb_sectors) {
        !          1431:         return false;
        !          1432:     }
        !          1433:     return true;
        !          1434: }
        !          1435: 
        !          1436: static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
        !          1437:         int64_t sector_num, int nb_sectors)
        !          1438: {
        !          1439:     BdrvTrackedRequest *req;
        !          1440:     int64_t cluster_sector_num;
        !          1441:     int cluster_nb_sectors;
        !          1442:     bool retry;
        !          1443: 
        !          1444:     /* If we touch the same cluster it counts as an overlap.  This guarantees
        !          1445:      * that allocating writes will be serialized and not race with each other
        !          1446:      * for the same cluster.  For example, in copy-on-read it ensures that the
        !          1447:      * CoR read and write operations are atomic and guest writes cannot
        !          1448:      * interleave between them.
        !          1449:      */
        !          1450:     round_to_clusters(bs, sector_num, nb_sectors,
        !          1451:                       &cluster_sector_num, &cluster_nb_sectors);
        !          1452: 
        !          1453:     do {
        !          1454:         retry = false;
        !          1455:         QLIST_FOREACH(req, &bs->tracked_requests, list) {
        !          1456:             if (tracked_request_overlaps(req, cluster_sector_num,
        !          1457:                                          cluster_nb_sectors)) {
        !          1458:                 /* Hitting this means there was a reentrant request, for
        !          1459:                  * example, a block driver issuing nested requests.  This must
        !          1460:                  * never happen since it means deadlock.
        !          1461:                  */
        !          1462:                 assert(qemu_coroutine_self() != req->co);
        !          1463: 
        !          1464:                 qemu_co_queue_wait(&req->wait_queue);
        !          1465:                 retry = true;
        !          1466:                 break;
        !          1467:             }
        !          1468:         }
        !          1469:     } while (retry);
1.1.1.18  root     1470: }
                   1471: 
                   1472: /*
                   1473:  * Return values:
                   1474:  * 0        - success
                   1475:  * -EINVAL  - backing format specified, but no file
                   1476:  * -ENOSPC  - can't update the backing file because no space is left in the
                   1477:  *            image file header
                   1478:  * -ENOTSUP - format driver doesn't support changing the backing file
                   1479:  */
                   1480: int bdrv_change_backing_file(BlockDriverState *bs,
                   1481:     const char *backing_file, const char *backing_fmt)
                   1482: {
                   1483:     BlockDriver *drv = bs->drv;
1.1.1.23! root     1484:     int ret;
        !          1485: 
        !          1486:     /* Backing file format doesn't make sense without a backing file */
        !          1487:     if (backing_fmt && !backing_file) {
        !          1488:         return -EINVAL;
        !          1489:     }
1.1.1.18  root     1490: 
                   1491:     if (drv->bdrv_change_backing_file != NULL) {
1.1.1.23! root     1492:         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1.1.1.18  root     1493:     } else {
1.1.1.23! root     1494:         ret = -ENOTSUP;
        !          1495:     }
        !          1496: 
        !          1497:     if (ret == 0) {
        !          1498:         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
        !          1499:         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
1.1.1.18  root     1500:     }
1.1.1.23! root     1501:     return ret;
1.1       root     1502: }
                   1503: 
1.1.1.7   root     1504: static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
                   1505:                                    size_t size)
                   1506: {
                   1507:     int64_t len;
                   1508: 
                   1509:     if (!bdrv_is_inserted(bs))
                   1510:         return -ENOMEDIUM;
                   1511: 
                   1512:     if (bs->growable)
                   1513:         return 0;
                   1514: 
                   1515:     len = bdrv_getlength(bs);
                   1516: 
1.1.1.11  root     1517:     if (offset < 0)
                   1518:         return -EIO;
                   1519: 
                   1520:     if ((offset > len) || (len - offset < size))
1.1.1.7   root     1521:         return -EIO;
                   1522: 
                   1523:     return 0;
                   1524: }
                   1525: 
                   1526: static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
                   1527:                               int nb_sectors)
                   1528: {
1.1.1.18  root     1529:     return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
                   1530:                                    nb_sectors * BDRV_SECTOR_SIZE);
1.1.1.7   root     1531: }
                   1532: 
1.1.1.22  root     1533: typedef struct RwCo {
                   1534:     BlockDriverState *bs;
                   1535:     int64_t sector_num;
                   1536:     int nb_sectors;
                   1537:     QEMUIOVector *qiov;
                   1538:     bool is_write;
                   1539:     int ret;
                   1540: } RwCo;
                   1541: 
                   1542: static void coroutine_fn bdrv_rw_co_entry(void *opaque)
                   1543: {
                   1544:     RwCo *rwco = opaque;
                   1545: 
                   1546:     if (!rwco->is_write) {
                   1547:         rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1.1.1.23! root     1548:                                      rwco->nb_sectors, rwco->qiov, 0);
1.1.1.22  root     1549:     } else {
                   1550:         rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1.1.1.23! root     1551:                                       rwco->nb_sectors, rwco->qiov, 0);
1.1.1.22  root     1552:     }
                   1553: }
                   1554: 
                   1555: /*
                   1556:  * Process a synchronous request using coroutines
                   1557:  */
                   1558: static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
                   1559:                       int nb_sectors, bool is_write)
                   1560: {
                   1561:     QEMUIOVector qiov;
                   1562:     struct iovec iov = {
                   1563:         .iov_base = (void *)buf,
                   1564:         .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
                   1565:     };
                   1566:     Coroutine *co;
                   1567:     RwCo rwco = {
                   1568:         .bs = bs,
                   1569:         .sector_num = sector_num,
                   1570:         .nb_sectors = nb_sectors,
                   1571:         .qiov = &qiov,
                   1572:         .is_write = is_write,
                   1573:         .ret = NOT_DONE,
                   1574:     };
                   1575: 
                   1576:     qemu_iovec_init_external(&qiov, &iov, 1);
                   1577: 
1.1.1.23! root     1578:     /**
        !          1579:      * In sync call context, when the vcpu is blocked, this throttling timer
        !          1580:      * will not fire; so the I/O throttling function has to be disabled here
        !          1581:      * if it has been enabled.
        !          1582:      */
        !          1583:     if (bs->io_limits_enabled) {
        !          1584:         fprintf(stderr, "Disabling I/O throttling on '%s' due "
        !          1585:                         "to synchronous I/O.\n", bdrv_get_device_name(bs));
        !          1586:         bdrv_io_limits_disable(bs);
        !          1587:     }
        !          1588: 
1.1.1.22  root     1589:     if (qemu_in_coroutine()) {
                   1590:         /* Fast-path if already in coroutine context */
                   1591:         bdrv_rw_co_entry(&rwco);
                   1592:     } else {
                   1593:         co = qemu_coroutine_create(bdrv_rw_co_entry);
                   1594:         qemu_coroutine_enter(co, &rwco);
                   1595:         while (rwco.ret == NOT_DONE) {
                   1596:             qemu_aio_wait();
                   1597:         }
                   1598:     }
                   1599:     return rwco.ret;
                   1600: }
                   1601: 
1.1.1.5   root     1602: /* return < 0 if error. See bdrv_write() for the return codes */
1.1.1.6   root     1603: int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1.1       root     1604:               uint8_t *buf, int nb_sectors)
                   1605: {
1.1.1.22  root     1606:     return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1.1       root     1607: }
                   1608: 
1.1.1.23! root     1609: #define BITS_PER_LONG  (sizeof(unsigned long) * 8)
        !          1610: 
1.1.1.14  root     1611: static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
                   1612:                              int nb_sectors, int dirty)
                   1613: {
                   1614:     int64_t start, end;
                   1615:     unsigned long val, idx, bit;
                   1616: 
                   1617:     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
                   1618:     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
                   1619: 
                   1620:     for (; start <= end; start++) {
1.1.1.23! root     1621:         idx = start / BITS_PER_LONG;
        !          1622:         bit = start % BITS_PER_LONG;
1.1.1.14  root     1623:         val = bs->dirty_bitmap[idx];
                   1624:         if (dirty) {
1.1.1.19  root     1625:             if (!(val & (1UL << bit))) {
1.1.1.18  root     1626:                 bs->dirty_count++;
1.1.1.19  root     1627:                 val |= 1UL << bit;
1.1.1.18  root     1628:             }
1.1.1.14  root     1629:         } else {
1.1.1.19  root     1630:             if (val & (1UL << bit)) {
1.1.1.18  root     1631:                 bs->dirty_count--;
1.1.1.19  root     1632:                 val &= ~(1UL << bit);
1.1.1.18  root     1633:             }
1.1.1.14  root     1634:         }
                   1635:         bs->dirty_bitmap[idx] = val;
                   1636:     }
                   1637: }
                   1638: 
1.1.1.6   root     1639: /* Return < 0 if error. Important errors are:
1.1.1.5   root     1640:   -EIO         generic I/O error (may happen for all errors)
                   1641:   -ENOMEDIUM   No media inserted.
                   1642:   -EINVAL      Invalid sector number or nb_sectors
                   1643:   -EACCES      Trying to write a read-only device
                   1644: */
1.1.1.6   root     1645: int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1.1       root     1646:                const uint8_t *buf, int nb_sectors)
                   1647: {
1.1.1.22  root     1648:     return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1.1       root     1649: }
                   1650: 
1.1.1.13  root     1651: int bdrv_pread(BlockDriverState *bs, int64_t offset,
                   1652:                void *buf, int count1)
1.1.1.5   root     1653: {
1.1.1.14  root     1654:     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1.1.1.5   root     1655:     int len, nb_sectors, count;
                   1656:     int64_t sector_num;
1.1.1.15  root     1657:     int ret;
1.1.1.5   root     1658: 
                   1659:     count = count1;
                   1660:     /* first read to align to sector start */
1.1.1.14  root     1661:     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1.1.1.5   root     1662:     if (len > count)
                   1663:         len = count;
1.1.1.14  root     1664:     sector_num = offset >> BDRV_SECTOR_BITS;
1.1.1.5   root     1665:     if (len > 0) {
1.1.1.15  root     1666:         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
                   1667:             return ret;
1.1.1.14  root     1668:         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1.1.1.5   root     1669:         count -= len;
                   1670:         if (count == 0)
                   1671:             return count1;
                   1672:         sector_num++;
                   1673:         buf += len;
                   1674:     }
                   1675: 
                   1676:     /* read the sectors "in place" */
1.1.1.14  root     1677:     nb_sectors = count >> BDRV_SECTOR_BITS;
1.1.1.5   root     1678:     if (nb_sectors > 0) {
1.1.1.15  root     1679:         if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
                   1680:             return ret;
1.1.1.5   root     1681:         sector_num += nb_sectors;
1.1.1.14  root     1682:         len = nb_sectors << BDRV_SECTOR_BITS;
1.1.1.5   root     1683:         buf += len;
                   1684:         count -= len;
                   1685:     }
                   1686: 
                   1687:     /* add data from the last sector */
                   1688:     if (count > 0) {
1.1.1.15  root     1689:         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
                   1690:             return ret;
1.1.1.5   root     1691:         memcpy(buf, tmp_buf, count);
                   1692:     }
                   1693:     return count1;
                   1694: }
                   1695: 
1.1.1.13  root     1696: int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
                   1697:                 const void *buf, int count1)
1.1.1.5   root     1698: {
1.1.1.14  root     1699:     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1.1.1.5   root     1700:     int len, nb_sectors, count;
                   1701:     int64_t sector_num;
1.1.1.15  root     1702:     int ret;
1.1.1.5   root     1703: 
                   1704:     count = count1;
                   1705:     /* first write to align to sector start */
1.1.1.14  root     1706:     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1.1.1.5   root     1707:     if (len > count)
                   1708:         len = count;
1.1.1.14  root     1709:     sector_num = offset >> BDRV_SECTOR_BITS;
1.1.1.5   root     1710:     if (len > 0) {
1.1.1.15  root     1711:         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
                   1712:             return ret;
1.1.1.14  root     1713:         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1.1.1.15  root     1714:         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
                   1715:             return ret;
1.1.1.5   root     1716:         count -= len;
                   1717:         if (count == 0)
                   1718:             return count1;
                   1719:         sector_num++;
                   1720:         buf += len;
                   1721:     }
                   1722: 
                   1723:     /* write the sectors "in place" */
1.1.1.14  root     1724:     nb_sectors = count >> BDRV_SECTOR_BITS;
1.1.1.5   root     1725:     if (nb_sectors > 0) {
1.1.1.15  root     1726:         if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
                   1727:             return ret;
1.1.1.5   root     1728:         sector_num += nb_sectors;
1.1.1.14  root     1729:         len = nb_sectors << BDRV_SECTOR_BITS;
1.1.1.5   root     1730:         buf += len;
                   1731:         count -= len;
                   1732:     }
                   1733: 
                   1734:     /* add data from the last sector */
                   1735:     if (count > 0) {
1.1.1.15  root     1736:         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
                   1737:             return ret;
1.1.1.5   root     1738:         memcpy(tmp_buf, buf, count);
1.1.1.15  root     1739:         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
                   1740:             return ret;
1.1.1.5   root     1741:     }
                   1742:     return count1;
                   1743: }
                   1744: 
1.1.1.17  root     1745: /*
                   1746:  * Writes to the file and ensures that no writes are reordered across this
                   1747:  * request (acts as a barrier)
                   1748:  *
                   1749:  * Returns 0 on success, -errno in error cases.
                   1750:  */
                   1751: int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
                   1752:     const void *buf, int count)
                   1753: {
                   1754:     int ret;
                   1755: 
                   1756:     ret = bdrv_pwrite(bs, offset, buf, count);
                   1757:     if (ret < 0) {
                   1758:         return ret;
                   1759:     }
                   1760: 
1.1.1.22  root     1761:     /* No flush needed for cache modes that use O_DSYNC */
                   1762:     if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1.1.1.17  root     1763:         bdrv_flush(bs);
                   1764:     }
                   1765: 
1.1.1.23! root     1766:     return 0;
        !          1767: }
        !          1768: 
        !          1769: static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
        !          1770:         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
        !          1771: {
        !          1772:     /* Perform I/O through a temporary buffer so that users who scribble over
        !          1773:      * their read buffer while the operation is in progress do not end up
        !          1774:      * modifying the image file.  This is critical for zero-copy guest I/O
        !          1775:      * where anything might happen inside guest memory.
        !          1776:      */
        !          1777:     void *bounce_buffer;
        !          1778: 
        !          1779:     BlockDriver *drv = bs->drv;
        !          1780:     struct iovec iov;
        !          1781:     QEMUIOVector bounce_qiov;
        !          1782:     int64_t cluster_sector_num;
        !          1783:     int cluster_nb_sectors;
        !          1784:     size_t skip_bytes;
        !          1785:     int ret;
        !          1786: 
        !          1787:     /* Cover entire cluster so no additional backing file I/O is required when
        !          1788:      * allocating cluster in the image file.
        !          1789:      */
        !          1790:     round_to_clusters(bs, sector_num, nb_sectors,
        !          1791:                       &cluster_sector_num, &cluster_nb_sectors);
        !          1792: 
        !          1793:     trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
        !          1794:                                    cluster_sector_num, cluster_nb_sectors);
        !          1795: 
        !          1796:     iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
        !          1797:     iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
        !          1798:     qemu_iovec_init_external(&bounce_qiov, &iov, 1);
        !          1799: 
        !          1800:     ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
        !          1801:                              &bounce_qiov);
        !          1802:     if (ret < 0) {
        !          1803:         goto err;
        !          1804:     }
        !          1805: 
        !          1806:     if (drv->bdrv_co_write_zeroes &&
        !          1807:         buffer_is_zero(bounce_buffer, iov.iov_len)) {
        !          1808:         ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
        !          1809:                                       cluster_nb_sectors);
        !          1810:     } else {
        !          1811:         ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
        !          1812:                                   &bounce_qiov);
        !          1813:     }
        !          1814: 
        !          1815:     if (ret < 0) {
        !          1816:         /* It might be okay to ignore write errors for guest requests.  If this
        !          1817:          * is a deliberate copy-on-read then we don't want to ignore the error.
        !          1818:          * Simply report it in all cases.
        !          1819:          */
        !          1820:         goto err;
        !          1821:     }
        !          1822: 
        !          1823:     skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
        !          1824:     qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
        !          1825:                            nb_sectors * BDRV_SECTOR_SIZE);
        !          1826: 
        !          1827: err:
        !          1828:     qemu_vfree(bounce_buffer);
        !          1829:     return ret;
        !          1830: }
        !          1831: 
        !          1832: /*
        !          1833:  * Handle a read request in coroutine context
        !          1834:  */
        !          1835: static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
        !          1836:     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
        !          1837:     BdrvRequestFlags flags)
        !          1838: {
        !          1839:     BlockDriver *drv = bs->drv;
        !          1840:     BdrvTrackedRequest req;
        !          1841:     int ret;
        !          1842: 
        !          1843:     if (!drv) {
        !          1844:         return -ENOMEDIUM;
        !          1845:     }
        !          1846:     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
        !          1847:         return -EIO;
        !          1848:     }
        !          1849: 
        !          1850:     /* throttling disk read I/O */
        !          1851:     if (bs->io_limits_enabled) {
        !          1852:         bdrv_io_limits_intercept(bs, false, nb_sectors);
        !          1853:     }
        !          1854: 
        !          1855:     if (bs->copy_on_read) {
        !          1856:         flags |= BDRV_REQ_COPY_ON_READ;
        !          1857:     }
        !          1858:     if (flags & BDRV_REQ_COPY_ON_READ) {
        !          1859:         bs->copy_on_read_in_flight++;
        !          1860:     }
        !          1861: 
        !          1862:     if (bs->copy_on_read_in_flight) {
        !          1863:         wait_for_overlapping_requests(bs, sector_num, nb_sectors);
        !          1864:     }
        !          1865: 
        !          1866:     tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
        !          1867: 
        !          1868:     if (flags & BDRV_REQ_COPY_ON_READ) {
        !          1869:         int pnum;
        !          1870: 
        !          1871:         ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
        !          1872:         if (ret < 0) {
        !          1873:             goto out;
        !          1874:         }
        !          1875: 
        !          1876:         if (!ret || pnum != nb_sectors) {
        !          1877:             ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
        !          1878:             goto out;
        !          1879:         }
        !          1880:     }
        !          1881: 
        !          1882:     ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1.1.1.17  root     1883: 
1.1.1.23! root     1884: out:
        !          1885:     tracked_request_end(&req);
1.1.1.22  root     1886: 
1.1.1.23! root     1887:     if (flags & BDRV_REQ_COPY_ON_READ) {
        !          1888:         bs->copy_on_read_in_flight--;
1.1.1.22  root     1889:     }
                   1890: 
1.1.1.23! root     1891:     return ret;
1.1.1.22  root     1892: }
                   1893: 
                   1894: int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
                   1895:     int nb_sectors, QEMUIOVector *qiov)
                   1896: {
                   1897:     trace_bdrv_co_readv(bs, sector_num, nb_sectors);
                   1898: 
1.1.1.23! root     1899:     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
        !          1900: }
        !          1901: 
        !          1902: int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
        !          1903:     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
        !          1904: {
        !          1905:     trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
        !          1906: 
        !          1907:     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
        !          1908:                             BDRV_REQ_COPY_ON_READ);
        !          1909: }
        !          1910: 
        !          1911: static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
        !          1912:     int64_t sector_num, int nb_sectors)
        !          1913: {
        !          1914:     BlockDriver *drv = bs->drv;
        !          1915:     QEMUIOVector qiov;
        !          1916:     struct iovec iov;
        !          1917:     int ret;
        !          1918: 
        !          1919:     /* TODO Emulate only part of misaligned requests instead of letting block
        !          1920:      * drivers return -ENOTSUP and emulate everything */
        !          1921: 
        !          1922:     /* First try the efficient write zeroes operation */
        !          1923:     if (drv->bdrv_co_write_zeroes) {
        !          1924:         ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
        !          1925:         if (ret != -ENOTSUP) {
        !          1926:             return ret;
        !          1927:         }
        !          1928:     }
        !          1929: 
        !          1930:     /* Fall back to bounce buffer if write zeroes is unsupported */
        !          1931:     iov.iov_len  = nb_sectors * BDRV_SECTOR_SIZE;
        !          1932:     iov.iov_base = qemu_blockalign(bs, iov.iov_len);
        !          1933:     memset(iov.iov_base, 0, iov.iov_len);
        !          1934:     qemu_iovec_init_external(&qiov, &iov, 1);
        !          1935: 
        !          1936:     ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
        !          1937: 
        !          1938:     qemu_vfree(iov.iov_base);
        !          1939:     return ret;
1.1.1.22  root     1940: }
                   1941: 
                   1942: /*
                   1943:  * Handle a write request in coroutine context
                   1944:  */
                   1945: static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1.1.1.23! root     1946:     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
        !          1947:     BdrvRequestFlags flags)
1.1.1.22  root     1948: {
                   1949:     BlockDriver *drv = bs->drv;
1.1.1.23! root     1950:     BdrvTrackedRequest req;
1.1.1.22  root     1951:     int ret;
                   1952: 
                   1953:     if (!bs->drv) {
                   1954:         return -ENOMEDIUM;
                   1955:     }
                   1956:     if (bs->read_only) {
                   1957:         return -EACCES;
                   1958:     }
                   1959:     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
                   1960:         return -EIO;
                   1961:     }
                   1962: 
1.1.1.23! root     1963:     /* throttling disk write I/O */
        !          1964:     if (bs->io_limits_enabled) {
        !          1965:         bdrv_io_limits_intercept(bs, true, nb_sectors);
        !          1966:     }
        !          1967: 
        !          1968:     if (bs->copy_on_read_in_flight) {
        !          1969:         wait_for_overlapping_requests(bs, sector_num, nb_sectors);
        !          1970:     }
        !          1971: 
        !          1972:     tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
        !          1973: 
        !          1974:     if (flags & BDRV_REQ_ZERO_WRITE) {
        !          1975:         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
        !          1976:     } else {
        !          1977:         ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
        !          1978:     }
1.1.1.22  root     1979: 
                   1980:     if (bs->dirty_bitmap) {
                   1981:         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
                   1982:     }
                   1983: 
                   1984:     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
                   1985:         bs->wr_highest_sector = sector_num + nb_sectors - 1;
                   1986:     }
                   1987: 
1.1.1.23! root     1988:     tracked_request_end(&req);
        !          1989: 
1.1.1.22  root     1990:     return ret;
                   1991: }
                   1992: 
                   1993: int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
                   1994:     int nb_sectors, QEMUIOVector *qiov)
                   1995: {
                   1996:     trace_bdrv_co_writev(bs, sector_num, nb_sectors);
                   1997: 
1.1.1.23! root     1998:     return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
        !          1999: }
        !          2000: 
        !          2001: int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
        !          2002:                                       int64_t sector_num, int nb_sectors)
        !          2003: {
        !          2004:     trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
        !          2005: 
        !          2006:     return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
        !          2007:                              BDRV_REQ_ZERO_WRITE);
1.1.1.17  root     2008: }
                   2009: 
1.1.1.5   root     2010: /**
                   2011:  * Truncate file to 'offset' bytes (needed only for file protocols)
                   2012:  */
                   2013: int bdrv_truncate(BlockDriverState *bs, int64_t offset)
                   2014: {
                   2015:     BlockDriver *drv = bs->drv;
1.1.1.18  root     2016:     int ret;
1.1.1.5   root     2017:     if (!drv)
                   2018:         return -ENOMEDIUM;
                   2019:     if (!drv->bdrv_truncate)
                   2020:         return -ENOTSUP;
1.1.1.14  root     2021:     if (bs->read_only)
                   2022:         return -EACCES;
1.1.1.19  root     2023:     if (bdrv_in_use(bs))
                   2024:         return -EBUSY;
1.1.1.18  root     2025:     ret = drv->bdrv_truncate(bs, offset);
                   2026:     if (ret == 0) {
                   2027:         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1.1.1.22  root     2028:         bdrv_dev_resize_cb(bs);
1.1.1.18  root     2029:     }
                   2030:     return ret;
1.1.1.5   root     2031: }
                   2032: 
                   2033: /**
1.1.1.21  root     2034:  * Length of a allocated file in bytes. Sparse files are counted by actual
                   2035:  * allocated space. Return < 0 if error or unknown.
                   2036:  */
                   2037: int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
                   2038: {
                   2039:     BlockDriver *drv = bs->drv;
                   2040:     if (!drv) {
                   2041:         return -ENOMEDIUM;
                   2042:     }
                   2043:     if (drv->bdrv_get_allocated_file_size) {
                   2044:         return drv->bdrv_get_allocated_file_size(bs);
                   2045:     }
                   2046:     if (bs->file) {
                   2047:         return bdrv_get_allocated_file_size(bs->file);
                   2048:     }
                   2049:     return -ENOTSUP;
                   2050: }
                   2051: 
                   2052: /**
1.1.1.5   root     2053:  * Length of a file in bytes. Return < 0 if error or unknown.
                   2054:  */
                   2055: int64_t bdrv_getlength(BlockDriverState *bs)
                   2056: {
                   2057:     BlockDriver *drv = bs->drv;
                   2058:     if (!drv)
                   2059:         return -ENOMEDIUM;
1.1.1.18  root     2060: 
1.1.1.22  root     2061:     if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1.1.1.21  root     2062:         if (drv->bdrv_getlength) {
                   2063:             return drv->bdrv_getlength(bs);
                   2064:         }
1.1.1.5   root     2065:     }
1.1.1.21  root     2066:     return bs->total_sectors * BDRV_SECTOR_SIZE;
1.1.1.5   root     2067: }
                   2068: 
                   2069: /* return 0 as number of sectors if no device present or error */
1.1.1.6   root     2070: void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1.1       root     2071: {
1.1.1.5   root     2072:     int64_t length;
                   2073:     length = bdrv_getlength(bs);
                   2074:     if (length < 0)
                   2075:         length = 0;
                   2076:     else
1.1.1.14  root     2077:         length = length >> BDRV_SECTOR_BITS;
1.1.1.5   root     2078:     *nb_sectors_ptr = length;
1.1       root     2079: }
                   2080: 
1.1.1.7   root     2081: struct partition {
                   2082:         uint8_t boot_ind;           /* 0x80 - active */
                   2083:         uint8_t head;               /* starting head */
                   2084:         uint8_t sector;             /* starting sector */
                   2085:         uint8_t cyl;                /* starting cylinder */
                   2086:         uint8_t sys_ind;            /* What partition type */
                   2087:         uint8_t end_head;           /* end head */
                   2088:         uint8_t end_sector;         /* end sector */
                   2089:         uint8_t end_cyl;            /* end cylinder */
                   2090:         uint32_t start_sect;        /* starting sector counting from 0 */
                   2091:         uint32_t nr_sects;          /* nr of sectors in partition */
1.1.1.22  root     2092: } QEMU_PACKED;
1.1.1.7   root     2093: 
                   2094: /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
                   2095: static int guess_disk_lchs(BlockDriverState *bs,
                   2096:                            int *pcylinders, int *pheads, int *psectors)
                   2097: {
1.1.1.18  root     2098:     uint8_t buf[BDRV_SECTOR_SIZE];
1.1.1.7   root     2099:     int ret, i, heads, sectors, cylinders;
                   2100:     struct partition *p;
                   2101:     uint32_t nr_sects;
                   2102:     uint64_t nb_sectors;
1.1.1.23! root     2103:     bool enabled;
1.1.1.7   root     2104: 
                   2105:     bdrv_get_geometry(bs, &nb_sectors);
                   2106: 
1.1.1.23! root     2107:     /**
        !          2108:      * The function will be invoked during startup not only in sync I/O mode,
        !          2109:      * but also in async I/O mode. So the I/O throttling function has to
        !          2110:      * be disabled temporarily here, not permanently.
        !          2111:      */
        !          2112:     enabled = bs->io_limits_enabled;
        !          2113:     bs->io_limits_enabled = false;
1.1.1.7   root     2114:     ret = bdrv_read(bs, 0, buf, 1);
1.1.1.23! root     2115:     bs->io_limits_enabled = enabled;
1.1.1.7   root     2116:     if (ret < 0)
                   2117:         return -1;
                   2118:     /* test msdos magic */
                   2119:     if (buf[510] != 0x55 || buf[511] != 0xaa)
                   2120:         return -1;
                   2121:     for(i = 0; i < 4; i++) {
                   2122:         p = ((struct partition *)(buf + 0x1be)) + i;
                   2123:         nr_sects = le32_to_cpu(p->nr_sects);
                   2124:         if (nr_sects && p->end_head) {
                   2125:             /* We make the assumption that the partition terminates on
                   2126:                a cylinder boundary */
                   2127:             heads = p->end_head + 1;
                   2128:             sectors = p->end_sector & 63;
                   2129:             if (sectors == 0)
                   2130:                 continue;
                   2131:             cylinders = nb_sectors / (heads * sectors);
                   2132:             if (cylinders < 1 || cylinders > 16383)
                   2133:                 continue;
                   2134:             *pheads = heads;
                   2135:             *psectors = sectors;
                   2136:             *pcylinders = cylinders;
                   2137: #if 0
                   2138:             printf("guessed geometry: LCHS=%d %d %d\n",
                   2139:                    cylinders, heads, sectors);
                   2140: #endif
                   2141:             return 0;
                   2142:         }
                   2143:     }
                   2144:     return -1;
                   2145: }
                   2146: 
                   2147: void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1.1       root     2148: {
1.1.1.7   root     2149:     int translation, lba_detected = 0;
                   2150:     int cylinders, heads, secs;
                   2151:     uint64_t nb_sectors;
                   2152: 
                   2153:     /* if a geometry hint is available, use it */
                   2154:     bdrv_get_geometry(bs, &nb_sectors);
                   2155:     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
                   2156:     translation = bdrv_get_translation_hint(bs);
                   2157:     if (cylinders != 0) {
                   2158:         *pcyls = cylinders;
                   2159:         *pheads = heads;
                   2160:         *psecs = secs;
                   2161:     } else {
                   2162:         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
                   2163:             if (heads > 16) {
                   2164:                 /* if heads > 16, it means that a BIOS LBA
                   2165:                    translation was active, so the default
                   2166:                    hardware geometry is OK */
                   2167:                 lba_detected = 1;
                   2168:                 goto default_geometry;
                   2169:             } else {
                   2170:                 *pcyls = cylinders;
                   2171:                 *pheads = heads;
                   2172:                 *psecs = secs;
                   2173:                 /* disable any translation to be in sync with
                   2174:                    the logical geometry */
                   2175:                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
                   2176:                     bdrv_set_translation_hint(bs,
                   2177:                                               BIOS_ATA_TRANSLATION_NONE);
                   2178:                 }
                   2179:             }
                   2180:         } else {
                   2181:         default_geometry:
                   2182:             /* if no geometry, use a standard physical disk geometry */
                   2183:             cylinders = nb_sectors / (16 * 63);
                   2184: 
                   2185:             if (cylinders > 16383)
                   2186:                 cylinders = 16383;
                   2187:             else if (cylinders < 2)
                   2188:                 cylinders = 2;
                   2189:             *pcyls = cylinders;
                   2190:             *pheads = 16;
                   2191:             *psecs = 63;
                   2192:             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
                   2193:                 if ((*pcyls * *pheads) <= 131072) {
                   2194:                     bdrv_set_translation_hint(bs,
                   2195:                                               BIOS_ATA_TRANSLATION_LARGE);
                   2196:                 } else {
                   2197:                     bdrv_set_translation_hint(bs,
                   2198:                                               BIOS_ATA_TRANSLATION_LBA);
                   2199:                 }
                   2200:             }
                   2201:         }
                   2202:         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
                   2203:     }
1.1       root     2204: }
                   2205: 
1.1.1.6   root     2206: void bdrv_set_geometry_hint(BlockDriverState *bs,
1.1       root     2207:                             int cyls, int heads, int secs)
                   2208: {
                   2209:     bs->cyls = cyls;
                   2210:     bs->heads = heads;
                   2211:     bs->secs = secs;
                   2212: }
                   2213: 
                   2214: void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
                   2215: {
                   2216:     bs->translation = translation;
                   2217: }
                   2218: 
1.1.1.6   root     2219: void bdrv_get_geometry_hint(BlockDriverState *bs,
1.1       root     2220:                             int *pcyls, int *pheads, int *psecs)
                   2221: {
                   2222:     *pcyls = bs->cyls;
                   2223:     *pheads = bs->heads;
                   2224:     *psecs = bs->secs;
                   2225: }
                   2226: 
1.1.1.23! root     2227: /* throttling disk io limits */
        !          2228: void bdrv_set_io_limits(BlockDriverState *bs,
        !          2229:                         BlockIOLimit *io_limits)
        !          2230: {
        !          2231:     bs->io_limits = *io_limits;
        !          2232:     bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
        !          2233: }
        !          2234: 
1.1.1.21  root     2235: /* Recognize floppy formats */
                   2236: typedef struct FDFormat {
                   2237:     FDriveType drive;
                   2238:     uint8_t last_sect;
                   2239:     uint8_t max_track;
                   2240:     uint8_t max_head;
1.1.1.23! root     2241:     FDriveRate rate;
1.1.1.21  root     2242: } FDFormat;
                   2243: 
                   2244: static const FDFormat fd_formats[] = {
                   2245:     /* First entry is default format */
                   2246:     /* 1.44 MB 3"1/2 floppy disks */
1.1.1.23! root     2247:     { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
        !          2248:     { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
        !          2249:     { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
        !          2250:     { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
        !          2251:     { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
        !          2252:     { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
        !          2253:     { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
        !          2254:     { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
1.1.1.21  root     2255:     /* 2.88 MB 3"1/2 floppy disks */
1.1.1.23! root     2256:     { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
        !          2257:     { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
        !          2258:     { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
        !          2259:     { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
        !          2260:     { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
1.1.1.21  root     2261:     /* 720 kB 3"1/2 floppy disks */
1.1.1.23! root     2262:     { FDRIVE_DRV_144,  9, 80, 1, FDRIVE_RATE_250K, },
        !          2263:     { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
        !          2264:     { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
        !          2265:     { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
        !          2266:     { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
        !          2267:     { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
1.1.1.21  root     2268:     /* 1.2 MB 5"1/4 floppy disks */
1.1.1.23! root     2269:     { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
        !          2270:     { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
        !          2271:     { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
        !          2272:     { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
        !          2273:     { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
1.1.1.21  root     2274:     /* 720 kB 5"1/4 floppy disks */
1.1.1.23! root     2275:     { FDRIVE_DRV_120,  9, 80, 1, FDRIVE_RATE_250K, },
        !          2276:     { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
1.1.1.21  root     2277:     /* 360 kB 5"1/4 floppy disks */
1.1.1.23! root     2278:     { FDRIVE_DRV_120,  9, 40, 1, FDRIVE_RATE_300K, },
        !          2279:     { FDRIVE_DRV_120,  9, 40, 0, FDRIVE_RATE_300K, },
        !          2280:     { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
        !          2281:     { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
1.1.1.21  root     2282:     /* 320 kB 5"1/4 floppy disks */
1.1.1.23! root     2283:     { FDRIVE_DRV_120,  8, 40, 1, FDRIVE_RATE_250K, },
        !          2284:     { FDRIVE_DRV_120,  8, 40, 0, FDRIVE_RATE_250K, },
1.1.1.21  root     2285:     /* 360 kB must match 5"1/4 better than 3"1/2... */
1.1.1.23! root     2286:     { FDRIVE_DRV_144,  9, 80, 0, FDRIVE_RATE_250K, },
1.1.1.21  root     2287:     /* end */
1.1.1.23! root     2288:     { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
1.1.1.21  root     2289: };
                   2290: 
                   2291: void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
                   2292:                                    int *max_track, int *last_sect,
1.1.1.23! root     2293:                                    FDriveType drive_in, FDriveType *drive,
        !          2294:                                    FDriveRate *rate)
1.1.1.21  root     2295: {
                   2296:     const FDFormat *parse;
                   2297:     uint64_t nb_sectors, size;
                   2298:     int i, first_match, match;
                   2299: 
                   2300:     bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
                   2301:     if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
                   2302:         /* User defined disk */
1.1.1.23! root     2303:         *rate = FDRIVE_RATE_500K;
1.1.1.21  root     2304:     } else {
                   2305:         bdrv_get_geometry(bs, &nb_sectors);
                   2306:         match = -1;
                   2307:         first_match = -1;
                   2308:         for (i = 0; ; i++) {
                   2309:             parse = &fd_formats[i];
                   2310:             if (parse->drive == FDRIVE_DRV_NONE) {
                   2311:                 break;
                   2312:             }
                   2313:             if (drive_in == parse->drive ||
                   2314:                 drive_in == FDRIVE_DRV_NONE) {
                   2315:                 size = (parse->max_head + 1) * parse->max_track *
                   2316:                     parse->last_sect;
                   2317:                 if (nb_sectors == size) {
                   2318:                     match = i;
                   2319:                     break;
                   2320:                 }
                   2321:                 if (first_match == -1) {
                   2322:                     first_match = i;
                   2323:                 }
                   2324:             }
                   2325:         }
                   2326:         if (match == -1) {
                   2327:             if (first_match == -1) {
                   2328:                 match = 1;
                   2329:             } else {
                   2330:                 match = first_match;
                   2331:             }
                   2332:             parse = &fd_formats[match];
                   2333:         }
                   2334:         *nb_heads = parse->max_head + 1;
                   2335:         *max_track = parse->max_track;
                   2336:         *last_sect = parse->last_sect;
                   2337:         *drive = parse->drive;
1.1.1.23! root     2338:         *rate = parse->rate;
1.1.1.21  root     2339:     }
1.1       root     2340: }
                   2341: 
                   2342: int bdrv_get_translation_hint(BlockDriverState *bs)
                   2343: {
                   2344:     return bs->translation;
                   2345: }
                   2346: 
1.1.1.18  root     2347: void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
                   2348:                        BlockErrorAction on_write_error)
                   2349: {
                   2350:     bs->on_read_error = on_read_error;
                   2351:     bs->on_write_error = on_write_error;
                   2352: }
                   2353: 
                   2354: BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
                   2355: {
                   2356:     return is_read ? bs->on_read_error : bs->on_write_error;
                   2357: }
                   2358: 
1.1       root     2359: int bdrv_is_read_only(BlockDriverState *bs)
                   2360: {
                   2361:     return bs->read_only;
                   2362: }
                   2363: 
1.1.1.6   root     2364: int bdrv_is_sg(BlockDriverState *bs)
                   2365: {
                   2366:     return bs->sg;
                   2367: }
                   2368: 
1.1.1.14  root     2369: int bdrv_enable_write_cache(BlockDriverState *bs)
                   2370: {
                   2371:     return bs->enable_write_cache;
                   2372: }
                   2373: 
1.1       root     2374: int bdrv_is_encrypted(BlockDriverState *bs)
                   2375: {
                   2376:     if (bs->backing_hd && bs->backing_hd->encrypted)
                   2377:         return 1;
                   2378:     return bs->encrypted;
                   2379: }
                   2380: 
1.1.1.8   root     2381: int bdrv_key_required(BlockDriverState *bs)
                   2382: {
                   2383:     BlockDriverState *backing_hd = bs->backing_hd;
                   2384: 
                   2385:     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
                   2386:         return 1;
                   2387:     return (bs->encrypted && !bs->valid_key);
                   2388: }
                   2389: 
1.1       root     2390: int bdrv_set_key(BlockDriverState *bs, const char *key)
                   2391: {
                   2392:     int ret;
                   2393:     if (bs->backing_hd && bs->backing_hd->encrypted) {
                   2394:         ret = bdrv_set_key(bs->backing_hd, key);
                   2395:         if (ret < 0)
                   2396:             return ret;
                   2397:         if (!bs->encrypted)
                   2398:             return 0;
                   2399:     }
1.1.1.18  root     2400:     if (!bs->encrypted) {
                   2401:         return -EINVAL;
                   2402:     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
                   2403:         return -ENOMEDIUM;
                   2404:     }
1.1.1.8   root     2405:     ret = bs->drv->bdrv_set_key(bs, key);
1.1.1.13  root     2406:     if (ret < 0) {
                   2407:         bs->valid_key = 0;
                   2408:     } else if (!bs->valid_key) {
                   2409:         bs->valid_key = 1;
                   2410:         /* call the change callback now, we skipped it on open */
1.1.1.22  root     2411:         bdrv_dev_change_media_cb(bs, true);
1.1.1.13  root     2412:     }
1.1.1.8   root     2413:     return ret;
1.1       root     2414: }
                   2415: 
                   2416: void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
                   2417: {
1.1.1.5   root     2418:     if (!bs->drv) {
1.1       root     2419:         buf[0] = '\0';
                   2420:     } else {
                   2421:         pstrcpy(buf, buf_size, bs->drv->format_name);
                   2422:     }
                   2423: }
                   2424: 
1.1.1.6   root     2425: void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1.1       root     2426:                          void *opaque)
                   2427: {
                   2428:     BlockDriver *drv;
                   2429: 
1.1.1.18  root     2430:     QLIST_FOREACH(drv, &bdrv_drivers, list) {
1.1       root     2431:         it(opaque, drv->format_name);
                   2432:     }
                   2433: }
                   2434: 
                   2435: BlockDriverState *bdrv_find(const char *name)
                   2436: {
                   2437:     BlockDriverState *bs;
                   2438: 
1.1.1.18  root     2439:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
                   2440:         if (!strcmp(name, bs->device_name)) {
1.1       root     2441:             return bs;
1.1.1.18  root     2442:         }
1.1       root     2443:     }
                   2444:     return NULL;
                   2445: }
                   2446: 
1.1.1.18  root     2447: BlockDriverState *bdrv_next(BlockDriverState *bs)
                   2448: {
                   2449:     if (!bs) {
                   2450:         return QTAILQ_FIRST(&bdrv_states);
                   2451:     }
                   2452:     return QTAILQ_NEXT(bs, list);
                   2453: }
                   2454: 
1.1.1.8   root     2455: void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1.1       root     2456: {
                   2457:     BlockDriverState *bs;
                   2458: 
1.1.1.18  root     2459:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1.1.1.8   root     2460:         it(opaque, bs);
1.1       root     2461:     }
                   2462: }
                   2463: 
                   2464: const char *bdrv_get_device_name(BlockDriverState *bs)
                   2465: {
                   2466:     return bs->device_name;
                   2467: }
                   2468: 
1.1.1.7   root     2469: void bdrv_flush_all(void)
                   2470: {
                   2471:     BlockDriverState *bs;
                   2472: 
1.1.1.18  root     2473:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1.1.1.23! root     2474:         bdrv_flush(bs);
1.1.1.18  root     2475:     }
                   2476: }
                   2477: 
                   2478: int bdrv_has_zero_init(BlockDriverState *bs)
                   2479: {
                   2480:     assert(bs->drv);
                   2481: 
                   2482:     if (bs->drv->bdrv_has_zero_init) {
                   2483:         return bs->drv->bdrv_has_zero_init(bs);
                   2484:     }
                   2485: 
                   2486:     return 1;
1.1.1.7   root     2487: }
                   2488: 
1.1.1.23! root     2489: typedef struct BdrvCoIsAllocatedData {
        !          2490:     BlockDriverState *bs;
        !          2491:     int64_t sector_num;
        !          2492:     int nb_sectors;
        !          2493:     int *pnum;
        !          2494:     int ret;
        !          2495:     bool done;
        !          2496: } BdrvCoIsAllocatedData;
        !          2497: 
1.1.1.7   root     2498: /*
                   2499:  * Returns true iff the specified sector is present in the disk image. Drivers
                   2500:  * not implementing the functionality are assumed to not support backing files,
                   2501:  * hence all their sectors are reported as allocated.
                   2502:  *
1.1.1.23! root     2503:  * If 'sector_num' is beyond the end of the disk image the return value is 0
        !          2504:  * and 'pnum' is set to 0.
        !          2505:  *
1.1.1.7   root     2506:  * 'pnum' is set to the number of sectors (including and immediately following
                   2507:  * the specified sector) that are known to be in the same
                   2508:  * allocated/unallocated state.
                   2509:  *
1.1.1.23! root     2510:  * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
        !          2511:  * beyond the end of the disk image it will be clamped.
1.1.1.7   root     2512:  */
1.1.1.23! root     2513: int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
        !          2514:                                       int nb_sectors, int *pnum)
1.1.1.7   root     2515: {
                   2516:     int64_t n;
1.1.1.23! root     2517: 
        !          2518:     if (sector_num >= bs->total_sectors) {
        !          2519:         *pnum = 0;
        !          2520:         return 0;
        !          2521:     }
        !          2522: 
        !          2523:     n = bs->total_sectors - sector_num;
        !          2524:     if (n < nb_sectors) {
        !          2525:         nb_sectors = n;
        !          2526:     }
        !          2527: 
        !          2528:     if (!bs->drv->bdrv_co_is_allocated) {
        !          2529:         *pnum = nb_sectors;
1.1.1.7   root     2530:         return 1;
                   2531:     }
1.1.1.23! root     2532: 
        !          2533:     return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
1.1.1.7   root     2534: }
                   2535: 
1.1.1.23! root     2536: /* Coroutine wrapper for bdrv_is_allocated() */
        !          2537: static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
1.1.1.18  root     2538: {
1.1.1.23! root     2539:     BdrvCoIsAllocatedData *data = opaque;
        !          2540:     BlockDriverState *bs = data->bs;
1.1.1.18  root     2541: 
1.1.1.23! root     2542:     data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
        !          2543:                                      data->pnum);
        !          2544:     data->done = true;
        !          2545: }
1.1.1.18  root     2546: 
1.1.1.23! root     2547: /*
        !          2548:  * Synchronous wrapper around bdrv_co_is_allocated().
        !          2549:  *
        !          2550:  * See bdrv_co_is_allocated() for details.
        !          2551:  */
        !          2552: int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
        !          2553:                       int *pnum)
        !          2554: {
        !          2555:     Coroutine *co;
        !          2556:     BdrvCoIsAllocatedData data = {
        !          2557:         .bs = bs,
        !          2558:         .sector_num = sector_num,
        !          2559:         .nb_sectors = nb_sectors,
        !          2560:         .pnum = pnum,
        !          2561:         .done = false,
        !          2562:     };
1.1.1.18  root     2563: 
1.1.1.23! root     2564:     co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
        !          2565:     qemu_coroutine_enter(co, &data);
        !          2566:     while (!data.done) {
        !          2567:         qemu_aio_wait();
        !          2568:     }
        !          2569:     return data.ret;
1.1.1.18  root     2570: }
                   2571: 
1.1.1.22  root     2572: BlockInfoList *qmp_query_block(Error **errp)
1.1.1.14  root     2573: {
1.1.1.22  root     2574:     BlockInfoList *head = NULL, *cur_item = NULL;
1.1       root     2575:     BlockDriverState *bs;
                   2576: 
1.1.1.18  root     2577:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1.1.1.22  root     2578:         BlockInfoList *info = g_malloc0(sizeof(*info));
1.1.1.14  root     2579: 
1.1.1.22  root     2580:         info->value = g_malloc0(sizeof(*info->value));
                   2581:         info->value->device = g_strdup(bs->device_name);
                   2582:         info->value->type = g_strdup("unknown");
                   2583:         info->value->locked = bdrv_dev_is_medium_locked(bs);
                   2584:         info->value->removable = bdrv_dev_has_removable_media(bs);
                   2585: 
                   2586:         if (bdrv_dev_has_removable_media(bs)) {
                   2587:             info->value->has_tray_open = true;
                   2588:             info->value->tray_open = bdrv_dev_is_tray_open(bs);
                   2589:         }
                   2590: 
                   2591:         if (bdrv_iostatus_is_enabled(bs)) {
                   2592:             info->value->has_io_status = true;
                   2593:             info->value->io_status = bs->iostatus;
                   2594:         }
1.1.1.14  root     2595: 
1.1.1.5   root     2596:         if (bs->drv) {
1.1.1.22  root     2597:             info->value->has_inserted = true;
                   2598:             info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
                   2599:             info->value->inserted->file = g_strdup(bs->filename);
                   2600:             info->value->inserted->ro = bs->read_only;
                   2601:             info->value->inserted->drv = g_strdup(bs->drv->format_name);
                   2602:             info->value->inserted->encrypted = bs->encrypted;
                   2603:             if (bs->backing_file[0]) {
                   2604:                 info->value->inserted->has_backing_file = true;
                   2605:                 info->value->inserted->backing_file = g_strdup(bs->backing_file);
1.1.1.13  root     2606:             }
1.1.1.23! root     2607: 
        !          2608:             if (bs->io_limits_enabled) {
        !          2609:                 info->value->inserted->bps =
        !          2610:                                bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
        !          2611:                 info->value->inserted->bps_rd =
        !          2612:                                bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
        !          2613:                 info->value->inserted->bps_wr =
        !          2614:                                bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
        !          2615:                 info->value->inserted->iops =
        !          2616:                                bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
        !          2617:                 info->value->inserted->iops_rd =
        !          2618:                                bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
        !          2619:                 info->value->inserted->iops_wr =
        !          2620:                                bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
        !          2621:             }
1.1.1.22  root     2622:         }
1.1.1.14  root     2623: 
1.1.1.22  root     2624:         /* XXX: waiting for the qapi to support GSList */
                   2625:         if (!cur_item) {
                   2626:             head = cur_item = info;
                   2627:         } else {
                   2628:             cur_item->next = info;
                   2629:             cur_item = info;
1.1       root     2630:         }
                   2631:     }
1.1.1.14  root     2632: 
1.1.1.22  root     2633:     return head;
1.1.1.14  root     2634: }
                   2635: 
1.1.1.22  root     2636: /* Consider exposing this as a full fledged QMP command */
                   2637: static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
1.1.1.14  root     2638: {
1.1.1.22  root     2639:     BlockStats *s;
1.1.1.18  root     2640: 
1.1.1.22  root     2641:     s = g_malloc0(sizeof(*s));
1.1.1.18  root     2642: 
1.1.1.22  root     2643:     if (bs->device_name[0]) {
                   2644:         s->has_device = true;
                   2645:         s->device = g_strdup(bs->device_name);
1.1.1.18  root     2646:     }
                   2647: 
1.1.1.22  root     2648:     s->stats = g_malloc0(sizeof(*s->stats));
                   2649:     s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
                   2650:     s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
                   2651:     s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
                   2652:     s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
                   2653:     s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
                   2654:     s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
                   2655:     s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
                   2656:     s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
                   2657:     s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
                   2658: 
1.1.1.18  root     2659:     if (bs->file) {
1.1.1.22  root     2660:         s->has_parent = true;
                   2661:         s->parent = qmp_query_blockstat(bs->file, NULL);
1.1.1.18  root     2662:     }
                   2663: 
1.1.1.22  root     2664:     return s;
1.1.1.18  root     2665: }
                   2666: 
1.1.1.22  root     2667: BlockStatsList *qmp_query_blockstats(Error **errp)
1.1.1.6   root     2668: {
1.1.1.22  root     2669:     BlockStatsList *head = NULL, *cur_item = NULL;
1.1.1.6   root     2670:     BlockDriverState *bs;
                   2671: 
1.1.1.18  root     2672:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1.1.1.22  root     2673:         BlockStatsList *info = g_malloc0(sizeof(*info));
                   2674:         info->value = qmp_query_blockstat(bs, NULL);
                   2675: 
                   2676:         /* XXX: waiting for the qapi to support GSList */
                   2677:         if (!cur_item) {
                   2678:             head = cur_item = info;
                   2679:         } else {
                   2680:             cur_item->next = info;
                   2681:             cur_item = info;
                   2682:         }
1.1.1.6   root     2683:     }
1.1.1.14  root     2684: 
1.1.1.22  root     2685:     return head;
1.1.1.6   root     2686: }
                   2687: 
1.1.1.8   root     2688: const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
                   2689: {
                   2690:     if (bs->backing_hd && bs->backing_hd->encrypted)
                   2691:         return bs->backing_file;
                   2692:     else if (bs->encrypted)
                   2693:         return bs->filename;
                   2694:     else
                   2695:         return NULL;
                   2696: }
                   2697: 
1.1.1.6   root     2698: void bdrv_get_backing_filename(BlockDriverState *bs,
1.1.1.5   root     2699:                                char *filename, int filename_size)
                   2700: {
1.1.1.22  root     2701:     pstrcpy(filename, filename_size, bs->backing_file);
1.1.1.5   root     2702: }
                   2703: 
1.1.1.6   root     2704: int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1.1.1.5   root     2705:                           const uint8_t *buf, int nb_sectors)
                   2706: {
                   2707:     BlockDriver *drv = bs->drv;
                   2708:     if (!drv)
                   2709:         return -ENOMEDIUM;
                   2710:     if (!drv->bdrv_write_compressed)
                   2711:         return -ENOTSUP;
1.1.1.11  root     2712:     if (bdrv_check_request(bs, sector_num, nb_sectors))
                   2713:         return -EIO;
1.1.1.14  root     2714: 
                   2715:     if (bs->dirty_bitmap) {
                   2716:         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
                   2717:     }
                   2718: 
1.1.1.5   root     2719:     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
                   2720: }
1.1.1.6   root     2721: 
1.1.1.5   root     2722: int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
                   2723: {
                   2724:     BlockDriver *drv = bs->drv;
                   2725:     if (!drv)
                   2726:         return -ENOMEDIUM;
                   2727:     if (!drv->bdrv_get_info)
                   2728:         return -ENOTSUP;
                   2729:     memset(bdi, 0, sizeof(*bdi));
                   2730:     return drv->bdrv_get_info(bs, bdi);
                   2731: }
                   2732: 
1.1.1.13  root     2733: int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
                   2734:                       int64_t pos, int size)
1.1.1.9   root     2735: {
                   2736:     BlockDriver *drv = bs->drv;
                   2737:     if (!drv)
                   2738:         return -ENOMEDIUM;
1.1.1.18  root     2739:     if (drv->bdrv_save_vmstate)
                   2740:         return drv->bdrv_save_vmstate(bs, buf, pos, size);
                   2741:     if (bs->file)
                   2742:         return bdrv_save_vmstate(bs->file, buf, pos, size);
                   2743:     return -ENOTSUP;
1.1.1.9   root     2744: }
                   2745: 
1.1.1.13  root     2746: int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
                   2747:                       int64_t pos, int size)
1.1.1.9   root     2748: {
                   2749:     BlockDriver *drv = bs->drv;
                   2750:     if (!drv)
                   2751:         return -ENOMEDIUM;
1.1.1.18  root     2752:     if (drv->bdrv_load_vmstate)
                   2753:         return drv->bdrv_load_vmstate(bs, buf, pos, size);
                   2754:     if (bs->file)
                   2755:         return bdrv_load_vmstate(bs->file, buf, pos, size);
                   2756:     return -ENOTSUP;
                   2757: }
                   2758: 
                   2759: void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
                   2760: {
                   2761:     BlockDriver *drv = bs->drv;
                   2762: 
                   2763:     if (!drv || !drv->bdrv_debug_event) {
                   2764:         return;
                   2765:     }
                   2766: 
                   2767:     return drv->bdrv_debug_event(bs, event);
                   2768: 
1.1.1.9   root     2769: }
                   2770: 
1.1       root     2771: /**************************************************************/
1.1.1.5   root     2772: /* handling of snapshots */
1.1       root     2773: 
1.1.1.18  root     2774: int bdrv_can_snapshot(BlockDriverState *bs)
                   2775: {
                   2776:     BlockDriver *drv = bs->drv;
1.1.1.22  root     2777:     if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
1.1.1.18  root     2778:         return 0;
                   2779:     }
                   2780: 
                   2781:     if (!drv->bdrv_snapshot_create) {
                   2782:         if (bs->file != NULL) {
                   2783:             return bdrv_can_snapshot(bs->file);
                   2784:         }
                   2785:         return 0;
                   2786:     }
                   2787: 
                   2788:     return 1;
                   2789: }
                   2790: 
                   2791: int bdrv_is_snapshot(BlockDriverState *bs)
                   2792: {
                   2793:     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
                   2794: }
                   2795: 
                   2796: BlockDriverState *bdrv_snapshots(void)
                   2797: {
                   2798:     BlockDriverState *bs;
                   2799: 
                   2800:     if (bs_snapshots) {
                   2801:         return bs_snapshots;
                   2802:     }
                   2803: 
                   2804:     bs = NULL;
                   2805:     while ((bs = bdrv_next(bs))) {
                   2806:         if (bdrv_can_snapshot(bs)) {
                   2807:             bs_snapshots = bs;
                   2808:             return bs;
                   2809:         }
                   2810:     }
                   2811:     return NULL;
                   2812: }
                   2813: 
1.1.1.6   root     2814: int bdrv_snapshot_create(BlockDriverState *bs,
1.1.1.5   root     2815:                          QEMUSnapshotInfo *sn_info)
                   2816: {
                   2817:     BlockDriver *drv = bs->drv;
                   2818:     if (!drv)
                   2819:         return -ENOMEDIUM;
1.1.1.18  root     2820:     if (drv->bdrv_snapshot_create)
                   2821:         return drv->bdrv_snapshot_create(bs, sn_info);
                   2822:     if (bs->file)
                   2823:         return bdrv_snapshot_create(bs->file, sn_info);
                   2824:     return -ENOTSUP;
1.1.1.5   root     2825: }
1.1       root     2826: 
1.1.1.6   root     2827: int bdrv_snapshot_goto(BlockDriverState *bs,
1.1.1.5   root     2828:                        const char *snapshot_id)
1.1       root     2829: {
1.1.1.5   root     2830:     BlockDriver *drv = bs->drv;
1.1.1.18  root     2831:     int ret, open_ret;
                   2832: 
1.1.1.5   root     2833:     if (!drv)
                   2834:         return -ENOMEDIUM;
1.1.1.18  root     2835:     if (drv->bdrv_snapshot_goto)
                   2836:         return drv->bdrv_snapshot_goto(bs, snapshot_id);
                   2837: 
                   2838:     if (bs->file) {
                   2839:         drv->bdrv_close(bs);
                   2840:         ret = bdrv_snapshot_goto(bs->file, snapshot_id);
                   2841:         open_ret = drv->bdrv_open(bs, bs->open_flags);
                   2842:         if (open_ret < 0) {
                   2843:             bdrv_delete(bs->file);
                   2844:             bs->drv = NULL;
                   2845:             return open_ret;
                   2846:         }
                   2847:         return ret;
                   2848:     }
                   2849: 
                   2850:     return -ENOTSUP;
1.1       root     2851: }
                   2852: 
1.1.1.5   root     2853: int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1.1       root     2854: {
1.1.1.5   root     2855:     BlockDriver *drv = bs->drv;
                   2856:     if (!drv)
                   2857:         return -ENOMEDIUM;
1.1.1.18  root     2858:     if (drv->bdrv_snapshot_delete)
                   2859:         return drv->bdrv_snapshot_delete(bs, snapshot_id);
                   2860:     if (bs->file)
                   2861:         return bdrv_snapshot_delete(bs->file, snapshot_id);
                   2862:     return -ENOTSUP;
1.1.1.5   root     2863: }
1.1       root     2864: 
1.1.1.6   root     2865: int bdrv_snapshot_list(BlockDriverState *bs,
1.1.1.5   root     2866:                        QEMUSnapshotInfo **psn_info)
                   2867: {
                   2868:     BlockDriver *drv = bs->drv;
                   2869:     if (!drv)
                   2870:         return -ENOMEDIUM;
1.1.1.18  root     2871:     if (drv->bdrv_snapshot_list)
                   2872:         return drv->bdrv_snapshot_list(bs, psn_info);
                   2873:     if (bs->file)
                   2874:         return bdrv_snapshot_list(bs->file, psn_info);
                   2875:     return -ENOTSUP;
1.1.1.5   root     2876: }
                   2877: 
1.1.1.19  root     2878: int bdrv_snapshot_load_tmp(BlockDriverState *bs,
                   2879:         const char *snapshot_name)
                   2880: {
                   2881:     BlockDriver *drv = bs->drv;
                   2882:     if (!drv) {
                   2883:         return -ENOMEDIUM;
                   2884:     }
                   2885:     if (!bs->read_only) {
                   2886:         return -EINVAL;
                   2887:     }
                   2888:     if (drv->bdrv_snapshot_load_tmp) {
                   2889:         return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
                   2890:     }
                   2891:     return -ENOTSUP;
                   2892: }
                   2893: 
1.1.1.23! root     2894: BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
        !          2895:         const char *backing_file)
        !          2896: {
        !          2897:     if (!bs->drv) {
        !          2898:         return NULL;
        !          2899:     }
        !          2900: 
        !          2901:     if (bs->backing_hd) {
        !          2902:         if (strcmp(bs->backing_file, backing_file) == 0) {
        !          2903:             return bs->backing_hd;
        !          2904:         } else {
        !          2905:             return bdrv_find_backing_image(bs->backing_hd, backing_file);
        !          2906:         }
        !          2907:     }
        !          2908: 
        !          2909:     return NULL;
        !          2910: }
        !          2911: 
1.1.1.5   root     2912: #define NB_SUFFIXES 4
                   2913: 
                   2914: char *get_human_readable_size(char *buf, int buf_size, int64_t size)
                   2915: {
                   2916:     static const char suffixes[NB_SUFFIXES] = "KMGT";
                   2917:     int64_t base;
                   2918:     int i;
                   2919: 
                   2920:     if (size <= 999) {
                   2921:         snprintf(buf, buf_size, "%" PRId64, size);
                   2922:     } else {
                   2923:         base = 1024;
                   2924:         for(i = 0; i < NB_SUFFIXES; i++) {
                   2925:             if (size < (10 * base)) {
1.1.1.6   root     2926:                 snprintf(buf, buf_size, "%0.1f%c",
1.1.1.5   root     2927:                          (double)size / base,
                   2928:                          suffixes[i]);
                   2929:                 break;
                   2930:             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1.1.1.6   root     2931:                 snprintf(buf, buf_size, "%" PRId64 "%c",
1.1.1.5   root     2932:                          ((size + (base >> 1)) / base),
                   2933:                          suffixes[i]);
                   2934:                 break;
                   2935:             }
                   2936:             base = base * 1024;
                   2937:         }
1.1       root     2938:     }
1.1.1.5   root     2939:     return buf;
                   2940: }
                   2941: 
                   2942: char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
                   2943: {
                   2944:     char buf1[128], date_buf[128], clock_buf[128];
                   2945: #ifdef _WIN32
                   2946:     struct tm *ptm;
1.1.1.2   root     2947: #else
1.1.1.5   root     2948:     struct tm tm;
1.1.1.2   root     2949: #endif
1.1.1.5   root     2950:     time_t ti;
                   2951:     int64_t secs;
                   2952: 
                   2953:     if (!sn) {
1.1.1.6   root     2954:         snprintf(buf, buf_size,
                   2955:                  "%-10s%-20s%7s%20s%15s",
1.1.1.5   root     2956:                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
                   2957:     } else {
                   2958:         ti = sn->date_sec;
1.1       root     2959: #ifdef _WIN32
1.1.1.5   root     2960:         ptm = localtime(&ti);
                   2961:         strftime(date_buf, sizeof(date_buf),
                   2962:                  "%Y-%m-%d %H:%M:%S", ptm);
                   2963: #else
                   2964:         localtime_r(&ti, &tm);
                   2965:         strftime(date_buf, sizeof(date_buf),
                   2966:                  "%Y-%m-%d %H:%M:%S", &tm);
1.1       root     2967: #endif
1.1.1.5   root     2968:         secs = sn->vm_clock_nsec / 1000000000;
                   2969:         snprintf(clock_buf, sizeof(clock_buf),
                   2970:                  "%02d:%02d:%02d.%03d",
                   2971:                  (int)(secs / 3600),
                   2972:                  (int)((secs / 60) % 60),
1.1.1.6   root     2973:                  (int)(secs % 60),
1.1.1.5   root     2974:                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
                   2975:         snprintf(buf, buf_size,
1.1.1.6   root     2976:                  "%-10s%-20s%7s%20s%15s",
1.1.1.5   root     2977:                  sn->id_str, sn->name,
                   2978:                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
                   2979:                  date_buf,
                   2980:                  clock_buf);
                   2981:     }
                   2982:     return buf;
1.1       root     2983: }
                   2984: 
1.1.1.5   root     2985: /**************************************************************/
                   2986: /* async I/Os */
                   2987: 
1.1.1.7   root     2988: BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1.1.1.13  root     2989:                                  QEMUIOVector *qiov, int nb_sectors,
1.1.1.7   root     2990:                                  BlockDriverCompletionFunc *cb, void *opaque)
                   2991: {
1.1.1.19  root     2992:     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
                   2993: 
1.1.1.22  root     2994:     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
                   2995:                                  cb, opaque, false);
1.1.1.19  root     2996: }
                   2997: 
1.1.1.13  root     2998: BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
                   2999:                                   QEMUIOVector *qiov, int nb_sectors,
                   3000:                                   BlockDriverCompletionFunc *cb, void *opaque)
1.1       root     3001: {
1.1.1.19  root     3002:     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
1.1       root     3003: 
1.1.1.22  root     3004:     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
                   3005:                                  cb, opaque, true);
1.1.1.5   root     3006: }
                   3007: 
1.1.1.14  root     3008: 
                   3009: typedef struct MultiwriteCB {
                   3010:     int error;
                   3011:     int num_requests;
                   3012:     int num_callbacks;
                   3013:     struct {
                   3014:         BlockDriverCompletionFunc *cb;
                   3015:         void *opaque;
                   3016:         QEMUIOVector *free_qiov;
                   3017:     } callbacks[];
                   3018: } MultiwriteCB;
                   3019: 
                   3020: static void multiwrite_user_cb(MultiwriteCB *mcb)
                   3021: {
                   3022:     int i;
                   3023: 
                   3024:     for (i = 0; i < mcb->num_callbacks; i++) {
                   3025:         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1.1.1.16  root     3026:         if (mcb->callbacks[i].free_qiov) {
                   3027:             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
                   3028:         }
1.1.1.22  root     3029:         g_free(mcb->callbacks[i].free_qiov);
1.1.1.14  root     3030:     }
                   3031: }
                   3032: 
                   3033: static void multiwrite_cb(void *opaque, int ret)
                   3034: {
                   3035:     MultiwriteCB *mcb = opaque;
                   3036: 
1.1.1.19  root     3037:     trace_multiwrite_cb(mcb, ret);
                   3038: 
1.1.1.16  root     3039:     if (ret < 0 && !mcb->error) {
1.1.1.14  root     3040:         mcb->error = ret;
                   3041:     }
                   3042: 
                   3043:     mcb->num_requests--;
                   3044:     if (mcb->num_requests == 0) {
1.1.1.17  root     3045:         multiwrite_user_cb(mcb);
1.1.1.22  root     3046:         g_free(mcb);
1.1.1.14  root     3047:     }
                   3048: }
                   3049: 
                   3050: static int multiwrite_req_compare(const void *a, const void *b)
                   3051: {
1.1.1.17  root     3052:     const BlockRequest *req1 = a, *req2 = b;
                   3053: 
                   3054:     /*
                   3055:      * Note that we can't simply subtract req2->sector from req1->sector
                   3056:      * here as that could overflow the return value.
                   3057:      */
                   3058:     if (req1->sector > req2->sector) {
                   3059:         return 1;
                   3060:     } else if (req1->sector < req2->sector) {
                   3061:         return -1;
                   3062:     } else {
                   3063:         return 0;
                   3064:     }
1.1.1.14  root     3065: }
                   3066: 
                   3067: /*
                   3068:  * Takes a bunch of requests and tries to merge them. Returns the number of
                   3069:  * requests that remain after merging.
                   3070:  */
                   3071: static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
                   3072:     int num_reqs, MultiwriteCB *mcb)
                   3073: {
                   3074:     int i, outidx;
                   3075: 
                   3076:     // Sort requests by start sector
                   3077:     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
                   3078: 
                   3079:     // Check if adjacent requests touch the same clusters. If so, combine them,
                   3080:     // filling up gaps with zero sectors.
                   3081:     outidx = 0;
                   3082:     for (i = 1; i < num_reqs; i++) {
                   3083:         int merge = 0;
                   3084:         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
                   3085: 
1.1.1.23! root     3086:         // Handle exactly sequential writes and overlapping writes.
1.1.1.14  root     3087:         if (reqs[i].sector <= oldreq_last) {
                   3088:             merge = 1;
                   3089:         }
                   3090: 
1.1.1.16  root     3091:         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
                   3092:             merge = 0;
                   3093:         }
                   3094: 
1.1.1.14  root     3095:         if (merge) {
                   3096:             size_t size;
1.1.1.22  root     3097:             QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
1.1.1.14  root     3098:             qemu_iovec_init(qiov,
                   3099:                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
                   3100: 
                   3101:             // Add the first request to the merged one. If the requests are
                   3102:             // overlapping, drop the last sectors of the first request.
                   3103:             size = (reqs[i].sector - reqs[outidx].sector) << 9;
                   3104:             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
                   3105: 
1.1.1.23! root     3106:             // We should need to add any zeros between the two requests
        !          3107:             assert (reqs[i].sector <= oldreq_last);
1.1.1.14  root     3108: 
                   3109:             // Add the second request
                   3110:             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
                   3111: 
1.1.1.17  root     3112:             reqs[outidx].nb_sectors = qiov->size >> 9;
1.1.1.14  root     3113:             reqs[outidx].qiov = qiov;
                   3114: 
                   3115:             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
                   3116:         } else {
                   3117:             outidx++;
                   3118:             reqs[outidx].sector     = reqs[i].sector;
                   3119:             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
                   3120:             reqs[outidx].qiov       = reqs[i].qiov;
                   3121:         }
                   3122:     }
                   3123: 
                   3124:     return outidx + 1;
                   3125: }
                   3126: 
                   3127: /*
                   3128:  * Submit multiple AIO write requests at once.
                   3129:  *
                   3130:  * On success, the function returns 0 and all requests in the reqs array have
                   3131:  * been submitted. In error case this function returns -1, and any of the
                   3132:  * requests may or may not be submitted yet. In particular, this means that the
                   3133:  * callback will be called for some of the requests, for others it won't. The
                   3134:  * caller must check the error field of the BlockRequest to wait for the right
                   3135:  * callbacks (if error != 0, no callback will be called).
                   3136:  *
                   3137:  * The implementation may modify the contents of the reqs array, e.g. to merge
                   3138:  * requests. However, the fields opaque and error are left unmodified as they
                   3139:  * are used to signal failure for a single request to the caller.
                   3140:  */
                   3141: int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
                   3142: {
                   3143:     MultiwriteCB *mcb;
                   3144:     int i;
                   3145: 
1.1.1.20  root     3146:     /* don't submit writes if we don't have a medium */
                   3147:     if (bs->drv == NULL) {
                   3148:         for (i = 0; i < num_reqs; i++) {
                   3149:             reqs[i].error = -ENOMEDIUM;
                   3150:         }
                   3151:         return -1;
                   3152:     }
                   3153: 
1.1.1.14  root     3154:     if (num_reqs == 0) {
                   3155:         return 0;
                   3156:     }
                   3157: 
                   3158:     // Create MultiwriteCB structure
1.1.1.22  root     3159:     mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
1.1.1.14  root     3160:     mcb->num_requests = 0;
                   3161:     mcb->num_callbacks = num_reqs;
                   3162: 
                   3163:     for (i = 0; i < num_reqs; i++) {
                   3164:         mcb->callbacks[i].cb = reqs[i].cb;
                   3165:         mcb->callbacks[i].opaque = reqs[i].opaque;
                   3166:     }
                   3167: 
                   3168:     // Check for mergable requests
                   3169:     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
                   3170: 
1.1.1.19  root     3171:     trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
                   3172: 
1.1.1.23! root     3173:     /* Run the aio requests. */
        !          3174:     mcb->num_requests = num_reqs;
1.1.1.14  root     3175:     for (i = 0; i < num_reqs; i++) {
1.1.1.23! root     3176:         bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
1.1.1.14  root     3177:             reqs[i].nb_sectors, multiwrite_cb, mcb);
1.1.1.23! root     3178:     }
1.1.1.14  root     3179: 
1.1.1.23! root     3180:     return 0;
        !          3181: }
        !          3182: 
        !          3183: void bdrv_aio_cancel(BlockDriverAIOCB *acb)
        !          3184: {
        !          3185:     acb->pool->cancel(acb);
        !          3186: }
        !          3187: 
        !          3188: /* block I/O throttling */
        !          3189: static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
        !          3190:                  bool is_write, double elapsed_time, uint64_t *wait)
        !          3191: {
        !          3192:     uint64_t bps_limit = 0;
        !          3193:     double   bytes_limit, bytes_base, bytes_res;
        !          3194:     double   slice_time, wait_time;
        !          3195: 
        !          3196:     if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
        !          3197:         bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
        !          3198:     } else if (bs->io_limits.bps[is_write]) {
        !          3199:         bps_limit = bs->io_limits.bps[is_write];
        !          3200:     } else {
        !          3201:         if (wait) {
        !          3202:             *wait = 0;
1.1.1.14  root     3203:         }
1.1.1.23! root     3204: 
        !          3205:         return false;
1.1.1.14  root     3206:     }
                   3207: 
1.1.1.23! root     3208:     slice_time = bs->slice_end - bs->slice_start;
        !          3209:     slice_time /= (NANOSECONDS_PER_SECOND);
        !          3210:     bytes_limit = bps_limit * slice_time;
        !          3211:     bytes_base  = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
        !          3212:     if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
        !          3213:         bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
        !          3214:     }
1.1.1.17  root     3215: 
1.1.1.23! root     3216:     /* bytes_base: the bytes of data which have been read/written; and
        !          3217:      *             it is obtained from the history statistic info.
        !          3218:      * bytes_res: the remaining bytes of data which need to be read/written.
        !          3219:      * (bytes_base + bytes_res) / bps_limit: used to calcuate
        !          3220:      *             the total time for completing reading/writting all data.
        !          3221:      */
        !          3222:     bytes_res   = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1.1.1.14  root     3223: 
1.1.1.23! root     3224:     if (bytes_base + bytes_res <= bytes_limit) {
        !          3225:         if (wait) {
        !          3226:             *wait = 0;
        !          3227:         }
        !          3228: 
        !          3229:         return false;
1.1.1.17  root     3230:     }
1.1.1.23! root     3231: 
        !          3232:     /* Calc approx time to dispatch */
        !          3233:     wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
        !          3234: 
        !          3235:     /* When the I/O rate at runtime exceeds the limits,
        !          3236:      * bs->slice_end need to be extended in order that the current statistic
        !          3237:      * info can be kept until the timer fire, so it is increased and tuned
        !          3238:      * based on the result of experiment.
        !          3239:      */
        !          3240:     bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
        !          3241:     bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
        !          3242:     if (wait) {
        !          3243:         *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
        !          3244:     }
        !          3245: 
        !          3246:     return true;
1.1.1.14  root     3247: }
                   3248: 
1.1.1.23! root     3249: static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
        !          3250:                              double elapsed_time, uint64_t *wait)
1.1.1.4   root     3251: {
1.1.1.23! root     3252:     uint64_t iops_limit = 0;
        !          3253:     double   ios_limit, ios_base;
        !          3254:     double   slice_time, wait_time;
        !          3255: 
        !          3256:     if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
        !          3257:         iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
        !          3258:     } else if (bs->io_limits.iops[is_write]) {
        !          3259:         iops_limit = bs->io_limits.iops[is_write];
        !          3260:     } else {
        !          3261:         if (wait) {
        !          3262:             *wait = 0;
        !          3263:         }
        !          3264: 
        !          3265:         return false;
        !          3266:     }
        !          3267: 
        !          3268:     slice_time = bs->slice_end - bs->slice_start;
        !          3269:     slice_time /= (NANOSECONDS_PER_SECOND);
        !          3270:     ios_limit  = iops_limit * slice_time;
        !          3271:     ios_base   = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
        !          3272:     if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
        !          3273:         ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
        !          3274:     }
        !          3275: 
        !          3276:     if (ios_base + 1 <= ios_limit) {
        !          3277:         if (wait) {
        !          3278:             *wait = 0;
        !          3279:         }
        !          3280: 
        !          3281:         return false;
        !          3282:     }
        !          3283: 
        !          3284:     /* Calc approx time to dispatch */
        !          3285:     wait_time = (ios_base + 1) / iops_limit;
        !          3286:     if (wait_time > elapsed_time) {
        !          3287:         wait_time = wait_time - elapsed_time;
        !          3288:     } else {
        !          3289:         wait_time = 0;
        !          3290:     }
        !          3291: 
        !          3292:     bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
        !          3293:     bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
        !          3294:     if (wait) {
        !          3295:         *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
        !          3296:     }
        !          3297: 
        !          3298:     return true;
1.1.1.5   root     3299: }
1.1.1.4   root     3300: 
1.1.1.23! root     3301: static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
        !          3302:                            bool is_write, int64_t *wait)
        !          3303: {
        !          3304:     int64_t  now, max_wait;
        !          3305:     uint64_t bps_wait = 0, iops_wait = 0;
        !          3306:     double   elapsed_time;
        !          3307:     int      bps_ret, iops_ret;
        !          3308: 
        !          3309:     now = qemu_get_clock_ns(vm_clock);
        !          3310:     if ((bs->slice_start < now)
        !          3311:         && (bs->slice_end > now)) {
        !          3312:         bs->slice_end = now + bs->slice_time;
        !          3313:     } else {
        !          3314:         bs->slice_time  =  5 * BLOCK_IO_SLICE_TIME;
        !          3315:         bs->slice_start = now;
        !          3316:         bs->slice_end   = now + bs->slice_time;
        !          3317: 
        !          3318:         bs->io_base.bytes[is_write]  = bs->nr_bytes[is_write];
        !          3319:         bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
        !          3320: 
        !          3321:         bs->io_base.ios[is_write]    = bs->nr_ops[is_write];
        !          3322:         bs->io_base.ios[!is_write]   = bs->nr_ops[!is_write];
        !          3323:     }
        !          3324: 
        !          3325:     elapsed_time  = now - bs->slice_start;
        !          3326:     elapsed_time  /= (NANOSECONDS_PER_SECOND);
        !          3327: 
        !          3328:     bps_ret  = bdrv_exceed_bps_limits(bs, nb_sectors,
        !          3329:                                       is_write, elapsed_time, &bps_wait);
        !          3330:     iops_ret = bdrv_exceed_iops_limits(bs, is_write,
        !          3331:                                       elapsed_time, &iops_wait);
        !          3332:     if (bps_ret || iops_ret) {
        !          3333:         max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
        !          3334:         if (wait) {
        !          3335:             *wait = max_wait;
        !          3336:         }
        !          3337: 
        !          3338:         now = qemu_get_clock_ns(vm_clock);
        !          3339:         if (bs->slice_end < now + max_wait) {
        !          3340:             bs->slice_end = now + max_wait;
        !          3341:         }
        !          3342: 
        !          3343:         return true;
        !          3344:     }
        !          3345: 
        !          3346:     if (wait) {
        !          3347:         *wait = 0;
        !          3348:     }
        !          3349: 
        !          3350:     return false;
        !          3351: }
1.1.1.4   root     3352: 
1.1.1.5   root     3353: /**************************************************************/
                   3354: /* async block device emulation */
1.1.1.4   root     3355: 
1.1.1.13  root     3356: typedef struct BlockDriverAIOCBSync {
                   3357:     BlockDriverAIOCB common;
                   3358:     QEMUBH *bh;
                   3359:     int ret;
                   3360:     /* vector translation state */
                   3361:     QEMUIOVector *qiov;
                   3362:     uint8_t *bounce;
                   3363:     int is_write;
                   3364: } BlockDriverAIOCBSync;
                   3365: 
                   3366: static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
                   3367: {
1.1.1.18  root     3368:     BlockDriverAIOCBSync *acb =
                   3369:         container_of(blockacb, BlockDriverAIOCBSync, common);
1.1.1.13  root     3370:     qemu_bh_delete(acb->bh);
                   3371:     acb->bh = NULL;
                   3372:     qemu_aio_release(acb);
                   3373: }
                   3374: 
                   3375: static AIOPool bdrv_em_aio_pool = {
                   3376:     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
                   3377:     .cancel             = bdrv_aio_cancel_em,
                   3378: };
                   3379: 
1.1.1.5   root     3380: static void bdrv_aio_bh_cb(void *opaque)
1.1.1.4   root     3381: {
1.1.1.5   root     3382:     BlockDriverAIOCBSync *acb = opaque;
1.1.1.13  root     3383: 
                   3384:     if (!acb->is_write)
                   3385:         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
                   3386:     qemu_vfree(acb->bounce);
1.1.1.5   root     3387:     acb->common.cb(acb->common.opaque, acb->ret);
1.1.1.13  root     3388:     qemu_bh_delete(acb->bh);
                   3389:     acb->bh = NULL;
1.1.1.5   root     3390:     qemu_aio_release(acb);
1.1.1.4   root     3391: }
                   3392: 
1.1.1.13  root     3393: static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
                   3394:                                             int64_t sector_num,
                   3395:                                             QEMUIOVector *qiov,
                   3396:                                             int nb_sectors,
                   3397:                                             BlockDriverCompletionFunc *cb,
                   3398:                                             void *opaque,
                   3399:                                             int is_write)
                   3400: 
1.1       root     3401: {
1.1.1.5   root     3402:     BlockDriverAIOCBSync *acb;
1.1       root     3403: 
1.1.1.13  root     3404:     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
                   3405:     acb->is_write = is_write;
                   3406:     acb->qiov = qiov;
                   3407:     acb->bounce = qemu_blockalign(bs, qiov->size);
1.1.1.23! root     3408:     acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1.1.1.13  root     3409: 
                   3410:     if (is_write) {
                   3411:         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1.1.1.22  root     3412:         acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
1.1.1.13  root     3413:     } else {
1.1.1.22  root     3414:         acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
1.1.1.13  root     3415:     }
                   3416: 
1.1.1.5   root     3417:     qemu_bh_schedule(acb->bh);
1.1.1.13  root     3418: 
1.1.1.5   root     3419:     return &acb->common;
                   3420: }
1.1       root     3421: 
1.1.1.13  root     3422: static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
                   3423:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1.1.1.5   root     3424:         BlockDriverCompletionFunc *cb, void *opaque)
                   3425: {
1.1.1.13  root     3426:     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
1.1.1.5   root     3427: }
                   3428: 
1.1.1.13  root     3429: static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
                   3430:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
                   3431:         BlockDriverCompletionFunc *cb, void *opaque)
1.1.1.5   root     3432: {
1.1.1.13  root     3433:     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
1.1       root     3434: }
                   3435: 
1.1.1.22  root     3436: 
                   3437: typedef struct BlockDriverAIOCBCoroutine {
                   3438:     BlockDriverAIOCB common;
                   3439:     BlockRequest req;
                   3440:     bool is_write;
                   3441:     QEMUBH* bh;
                   3442: } BlockDriverAIOCBCoroutine;
                   3443: 
                   3444: static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
1.1.1.14  root     3445: {
1.1.1.22  root     3446:     qemu_aio_flush();
                   3447: }
1.1.1.14  root     3448: 
1.1.1.22  root     3449: static AIOPool bdrv_em_co_aio_pool = {
                   3450:     .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
                   3451:     .cancel             = bdrv_aio_co_cancel_em,
                   3452: };
1.1.1.14  root     3453: 
1.1.1.22  root     3454: static void bdrv_co_em_bh(void *opaque)
                   3455: {
                   3456:     BlockDriverAIOCBCoroutine *acb = opaque;
1.1.1.14  root     3457: 
1.1.1.22  root     3458:     acb->common.cb(acb->common.opaque, acb->req.error);
                   3459:     qemu_bh_delete(acb->bh);
                   3460:     qemu_aio_release(acb);
1.1.1.14  root     3461: }
                   3462: 
1.1.1.22  root     3463: /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
                   3464: static void coroutine_fn bdrv_co_do_rw(void *opaque)
1.1.1.18  root     3465: {
1.1.1.22  root     3466:     BlockDriverAIOCBCoroutine *acb = opaque;
                   3467:     BlockDriverState *bs = acb->common.bs;
1.1.1.18  root     3468: 
1.1.1.22  root     3469:     if (!acb->is_write) {
                   3470:         acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
1.1.1.23! root     3471:             acb->req.nb_sectors, acb->req.qiov, 0);
1.1.1.22  root     3472:     } else {
                   3473:         acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
1.1.1.23! root     3474:             acb->req.nb_sectors, acb->req.qiov, 0);
1.1.1.18  root     3475:     }
                   3476: 
1.1.1.22  root     3477:     acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
1.1.1.18  root     3478:     qemu_bh_schedule(acb->bh);
                   3479: }
                   3480: 
1.1.1.22  root     3481: static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                   3482:                                                int64_t sector_num,
                   3483:                                                QEMUIOVector *qiov,
                   3484:                                                int nb_sectors,
                   3485:                                                BlockDriverCompletionFunc *cb,
                   3486:                                                void *opaque,
                   3487:                                                bool is_write)
                   3488: {
                   3489:     Coroutine *co;
                   3490:     BlockDriverAIOCBCoroutine *acb;
                   3491: 
                   3492:     acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
                   3493:     acb->req.sector = sector_num;
                   3494:     acb->req.nb_sectors = nb_sectors;
                   3495:     acb->req.qiov = qiov;
                   3496:     acb->is_write = is_write;
1.1.1.5   root     3497: 
1.1.1.22  root     3498:     co = qemu_coroutine_create(bdrv_co_do_rw);
                   3499:     qemu_coroutine_enter(co, acb);
                   3500: 
                   3501:     return &acb->common;
1.1.1.4   root     3502: }
                   3503: 
1.1.1.22  root     3504: static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
                   3505: {
                   3506:     BlockDriverAIOCBCoroutine *acb = opaque;
                   3507:     BlockDriverState *bs = acb->common.bs;
                   3508: 
                   3509:     acb->req.error = bdrv_co_flush(bs);
                   3510:     acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
                   3511:     qemu_bh_schedule(acb->bh);
                   3512: }
1.1.1.5   root     3513: 
1.1.1.22  root     3514: BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
                   3515:         BlockDriverCompletionFunc *cb, void *opaque)
1.1.1.5   root     3516: {
1.1.1.22  root     3517:     trace_bdrv_aio_flush(bs, opaque);
1.1.1.5   root     3518: 
1.1.1.22  root     3519:     Coroutine *co;
                   3520:     BlockDriverAIOCBCoroutine *acb;
1.1.1.14  root     3521: 
1.1.1.22  root     3522:     acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
                   3523:     co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
                   3524:     qemu_coroutine_enter(co, acb);
1.1.1.7   root     3525: 
1.1.1.22  root     3526:     return &acb->common;
                   3527: }
1.1.1.7   root     3528: 
1.1.1.22  root     3529: static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
                   3530: {
                   3531:     BlockDriverAIOCBCoroutine *acb = opaque;
                   3532:     BlockDriverState *bs = acb->common.bs;
1.1.1.14  root     3533: 
1.1.1.22  root     3534:     acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
                   3535:     acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
                   3536:     qemu_bh_schedule(acb->bh);
1.1.1.5   root     3537: }
                   3538: 
1.1.1.22  root     3539: BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
                   3540:         int64_t sector_num, int nb_sectors,
                   3541:         BlockDriverCompletionFunc *cb, void *opaque)
1.1.1.5   root     3542: {
1.1.1.22  root     3543:     Coroutine *co;
                   3544:     BlockDriverAIOCBCoroutine *acb;
1.1.1.5   root     3545: 
1.1.1.22  root     3546:     trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
1.1.1.14  root     3547: 
1.1.1.22  root     3548:     acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
                   3549:     acb->req.sector = sector_num;
                   3550:     acb->req.nb_sectors = nb_sectors;
                   3551:     co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
                   3552:     qemu_coroutine_enter(co, acb);
1.1.1.14  root     3553: 
1.1.1.22  root     3554:     return &acb->common;
1.1.1.5   root     3555: }
1.1       root     3556: 
                   3557: void bdrv_init(void)
                   3558: {
1.1.1.13  root     3559:     module_call_init(MODULE_INIT_BLOCK);
1.1.1.10  root     3560: }
                   3561: 
1.1.1.14  root     3562: void bdrv_init_with_whitelist(void)
                   3563: {
                   3564:     use_bdrv_whitelist = 1;
                   3565:     bdrv_init();
                   3566: }
                   3567: 
1.1.1.13  root     3568: void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
                   3569:                    BlockDriverCompletionFunc *cb, void *opaque)
1.1.1.5   root     3570: {
                   3571:     BlockDriverAIOCB *acb;
                   3572: 
1.1.1.10  root     3573:     if (pool->free_aiocb) {
                   3574:         acb = pool->free_aiocb;
                   3575:         pool->free_aiocb = acb->next;
1.1.1.5   root     3576:     } else {
1.1.1.22  root     3577:         acb = g_malloc0(pool->aiocb_size);
1.1.1.10  root     3578:         acb->pool = pool;
1.1.1.5   root     3579:     }
                   3580:     acb->bs = bs;
                   3581:     acb->cb = cb;
                   3582:     acb->opaque = opaque;
                   3583:     return acb;
                   3584: }
                   3585: 
                   3586: void qemu_aio_release(void *p)
                   3587: {
1.1.1.10  root     3588:     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
                   3589:     AIOPool *pool = acb->pool;
                   3590:     acb->next = pool->free_aiocb;
                   3591:     pool->free_aiocb = acb;
1.1.1.5   root     3592: }
                   3593: 
                   3594: /**************************************************************/
1.1.1.22  root     3595: /* Coroutine block device emulation */
                   3596: 
                   3597: typedef struct CoroutineIOCompletion {
                   3598:     Coroutine *coroutine;
                   3599:     int ret;
                   3600: } CoroutineIOCompletion;
                   3601: 
                   3602: static void bdrv_co_io_em_complete(void *opaque, int ret)
                   3603: {
                   3604:     CoroutineIOCompletion *co = opaque;
                   3605: 
                   3606:     co->ret = ret;
                   3607:     qemu_coroutine_enter(co->coroutine, NULL);
                   3608: }
                   3609: 
                   3610: static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
                   3611:                                       int nb_sectors, QEMUIOVector *iov,
                   3612:                                       bool is_write)
                   3613: {
                   3614:     CoroutineIOCompletion co = {
                   3615:         .coroutine = qemu_coroutine_self(),
                   3616:     };
                   3617:     BlockDriverAIOCB *acb;
                   3618: 
                   3619:     if (is_write) {
                   3620:         acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
                   3621:                                        bdrv_co_io_em_complete, &co);
                   3622:     } else {
                   3623:         acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
                   3624:                                       bdrv_co_io_em_complete, &co);
                   3625:     }
                   3626: 
                   3627:     trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
                   3628:     if (!acb) {
                   3629:         return -EIO;
                   3630:     }
                   3631:     qemu_coroutine_yield();
                   3632: 
                   3633:     return co.ret;
                   3634: }
                   3635: 
                   3636: static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
                   3637:                                          int64_t sector_num, int nb_sectors,
                   3638:                                          QEMUIOVector *iov)
                   3639: {
                   3640:     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
                   3641: }
                   3642: 
                   3643: static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                   3644:                                          int64_t sector_num, int nb_sectors,
                   3645:                                          QEMUIOVector *iov)
                   3646: {
                   3647:     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
                   3648: }
                   3649: 
                   3650: static void coroutine_fn bdrv_flush_co_entry(void *opaque)
                   3651: {
                   3652:     RwCo *rwco = opaque;
                   3653: 
                   3654:     rwco->ret = bdrv_co_flush(rwco->bs);
                   3655: }
                   3656: 
                   3657: int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
                   3658: {
                   3659:     int ret;
                   3660: 
1.1.1.23! root     3661:     if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
1.1.1.22  root     3662:         return 0;
                   3663:     }
                   3664: 
                   3665:     /* Write back cached data to the OS even with cache=unsafe */
                   3666:     if (bs->drv->bdrv_co_flush_to_os) {
                   3667:         ret = bs->drv->bdrv_co_flush_to_os(bs);
                   3668:         if (ret < 0) {
                   3669:             return ret;
                   3670:         }
                   3671:     }
                   3672: 
                   3673:     /* But don't actually force it to the disk with cache=unsafe */
                   3674:     if (bs->open_flags & BDRV_O_NO_FLUSH) {
                   3675:         return 0;
                   3676:     }
                   3677: 
                   3678:     if (bs->drv->bdrv_co_flush_to_disk) {
1.1.1.23! root     3679:         ret = bs->drv->bdrv_co_flush_to_disk(bs);
1.1.1.22  root     3680:     } else if (bs->drv->bdrv_aio_flush) {
                   3681:         BlockDriverAIOCB *acb;
                   3682:         CoroutineIOCompletion co = {
                   3683:             .coroutine = qemu_coroutine_self(),
                   3684:         };
                   3685: 
                   3686:         acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
                   3687:         if (acb == NULL) {
1.1.1.23! root     3688:             ret = -EIO;
1.1.1.22  root     3689:         } else {
                   3690:             qemu_coroutine_yield();
1.1.1.23! root     3691:             ret = co.ret;
1.1.1.22  root     3692:         }
                   3693:     } else {
                   3694:         /*
                   3695:          * Some block drivers always operate in either writethrough or unsafe
                   3696:          * mode and don't support bdrv_flush therefore. Usually qemu doesn't
                   3697:          * know how the server works (because the behaviour is hardcoded or
                   3698:          * depends on server-side configuration), so we can't ensure that
                   3699:          * everything is safe on disk. Returning an error doesn't work because
                   3700:          * that would break guests even if the server operates in writethrough
                   3701:          * mode.
                   3702:          *
                   3703:          * Let's hope the user knows what he's doing.
                   3704:          */
1.1.1.23! root     3705:         ret = 0;
        !          3706:     }
        !          3707:     if (ret < 0) {
        !          3708:         return ret;
1.1.1.22  root     3709:     }
1.1.1.23! root     3710: 
        !          3711:     /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
        !          3712:      * in the case of cache=unsafe, so there are no useless flushes.
        !          3713:      */
        !          3714:     return bdrv_co_flush(bs->file);
1.1.1.22  root     3715: }
                   3716: 
                   3717: void bdrv_invalidate_cache(BlockDriverState *bs)
                   3718: {
                   3719:     if (bs->drv && bs->drv->bdrv_invalidate_cache) {
                   3720:         bs->drv->bdrv_invalidate_cache(bs);
                   3721:     }
                   3722: }
                   3723: 
                   3724: void bdrv_invalidate_cache_all(void)
                   3725: {
                   3726:     BlockDriverState *bs;
                   3727: 
                   3728:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
                   3729:         bdrv_invalidate_cache(bs);
                   3730:     }
                   3731: }
                   3732: 
1.1.1.23! root     3733: void bdrv_clear_incoming_migration_all(void)
        !          3734: {
        !          3735:     BlockDriverState *bs;
        !          3736: 
        !          3737:     QTAILQ_FOREACH(bs, &bdrv_states, list) {
        !          3738:         bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
        !          3739:     }
        !          3740: }
        !          3741: 
1.1.1.22  root     3742: int bdrv_flush(BlockDriverState *bs)
                   3743: {
                   3744:     Coroutine *co;
                   3745:     RwCo rwco = {
                   3746:         .bs = bs,
                   3747:         .ret = NOT_DONE,
                   3748:     };
                   3749: 
                   3750:     if (qemu_in_coroutine()) {
                   3751:         /* Fast-path if already in coroutine context */
                   3752:         bdrv_flush_co_entry(&rwco);
                   3753:     } else {
                   3754:         co = qemu_coroutine_create(bdrv_flush_co_entry);
                   3755:         qemu_coroutine_enter(co, &rwco);
                   3756:         while (rwco.ret == NOT_DONE) {
                   3757:             qemu_aio_wait();
                   3758:         }
                   3759:     }
                   3760: 
                   3761:     return rwco.ret;
                   3762: }
                   3763: 
                   3764: static void coroutine_fn bdrv_discard_co_entry(void *opaque)
                   3765: {
                   3766:     RwCo *rwco = opaque;
                   3767: 
                   3768:     rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
                   3769: }
                   3770: 
                   3771: int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
                   3772:                                  int nb_sectors)
                   3773: {
                   3774:     if (!bs->drv) {
                   3775:         return -ENOMEDIUM;
                   3776:     } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
                   3777:         return -EIO;
                   3778:     } else if (bs->read_only) {
                   3779:         return -EROFS;
                   3780:     } else if (bs->drv->bdrv_co_discard) {
                   3781:         return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
                   3782:     } else if (bs->drv->bdrv_aio_discard) {
                   3783:         BlockDriverAIOCB *acb;
                   3784:         CoroutineIOCompletion co = {
                   3785:             .coroutine = qemu_coroutine_self(),
                   3786:         };
                   3787: 
                   3788:         acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
                   3789:                                         bdrv_co_io_em_complete, &co);
                   3790:         if (acb == NULL) {
                   3791:             return -EIO;
                   3792:         } else {
                   3793:             qemu_coroutine_yield();
                   3794:             return co.ret;
                   3795:         }
                   3796:     } else {
                   3797:         return 0;
                   3798:     }
                   3799: }
                   3800: 
                   3801: int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
                   3802: {
                   3803:     Coroutine *co;
                   3804:     RwCo rwco = {
                   3805:         .bs = bs,
                   3806:         .sector_num = sector_num,
                   3807:         .nb_sectors = nb_sectors,
                   3808:         .ret = NOT_DONE,
                   3809:     };
                   3810: 
                   3811:     if (qemu_in_coroutine()) {
                   3812:         /* Fast-path if already in coroutine context */
                   3813:         bdrv_discard_co_entry(&rwco);
                   3814:     } else {
                   3815:         co = qemu_coroutine_create(bdrv_discard_co_entry);
                   3816:         qemu_coroutine_enter(co, &rwco);
                   3817:         while (rwco.ret == NOT_DONE) {
                   3818:             qemu_aio_wait();
                   3819:         }
                   3820:     }
                   3821: 
                   3822:     return rwco.ret;
                   3823: }
                   3824: 
                   3825: /**************************************************************/
1.1.1.5   root     3826: /* removable device support */
                   3827: 
                   3828: /**
                   3829:  * Return TRUE if the media is present
                   3830:  */
                   3831: int bdrv_is_inserted(BlockDriverState *bs)
                   3832: {
                   3833:     BlockDriver *drv = bs->drv;
1.1.1.22  root     3834: 
1.1.1.5   root     3835:     if (!drv)
                   3836:         return 0;
                   3837:     if (!drv->bdrv_is_inserted)
1.1.1.22  root     3838:         return 1;
                   3839:     return drv->bdrv_is_inserted(bs);
1.1.1.5   root     3840: }
                   3841: 
                   3842: /**
1.1.1.22  root     3843:  * Return whether the media changed since the last call to this
                   3844:  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
1.1.1.5   root     3845:  */
                   3846: int bdrv_media_changed(BlockDriverState *bs)
                   3847: {
                   3848:     BlockDriver *drv = bs->drv;
                   3849: 
1.1.1.22  root     3850:     if (drv && drv->bdrv_media_changed) {
                   3851:         return drv->bdrv_media_changed(bs);
                   3852:     }
                   3853:     return -ENOTSUP;
1.1.1.5   root     3854: }
                   3855: 
                   3856: /**
                   3857:  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
                   3858:  */
1.1.1.23! root     3859: void bdrv_eject(BlockDriverState *bs, bool eject_flag)
1.1.1.5   root     3860: {
                   3861:     BlockDriver *drv = bs->drv;
                   3862: 
1.1.1.22  root     3863:     if (drv && drv->bdrv_eject) {
                   3864:         drv->bdrv_eject(bs, eject_flag);
1.1.1.13  root     3865:     }
1.1.1.23! root     3866: 
        !          3867:     if (bs->device_name[0] != '\0') {
        !          3868:         bdrv_emit_qmp_eject_event(bs, eject_flag);
        !          3869:     }
1.1.1.5   root     3870: }
                   3871: 
                   3872: /**
                   3873:  * Lock or unlock the media (if it is locked, the user won't be able
                   3874:  * to eject it manually).
                   3875:  */
1.1.1.22  root     3876: void bdrv_lock_medium(BlockDriverState *bs, bool locked)
1.1.1.5   root     3877: {
                   3878:     BlockDriver *drv = bs->drv;
                   3879: 
1.1.1.22  root     3880:     trace_bdrv_lock_medium(bs, locked);
1.1.1.21  root     3881: 
1.1.1.22  root     3882:     if (drv && drv->bdrv_lock_medium) {
                   3883:         drv->bdrv_lock_medium(bs, locked);
1.1.1.5   root     3884:     }
1.1       root     3885: }
1.1.1.6   root     3886: 
                   3887: /* needed for generic scsi interface */
                   3888: 
                   3889: int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
                   3890: {
                   3891:     BlockDriver *drv = bs->drv;
                   3892: 
                   3893:     if (drv && drv->bdrv_ioctl)
                   3894:         return drv->bdrv_ioctl(bs, req, buf);
                   3895:     return -ENOTSUP;
                   3896: }
1.1.1.13  root     3897: 
                   3898: BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
                   3899:         unsigned long int req, void *buf,
                   3900:         BlockDriverCompletionFunc *cb, void *opaque)
                   3901: {
                   3902:     BlockDriver *drv = bs->drv;
                   3903: 
                   3904:     if (drv && drv->bdrv_aio_ioctl)
                   3905:         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
                   3906:     return NULL;
                   3907: }
                   3908: 
1.1.1.22  root     3909: void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
                   3910: {
                   3911:     bs->buffer_alignment = align;
                   3912: }
1.1.1.14  root     3913: 
1.1.1.13  root     3914: void *qemu_blockalign(BlockDriverState *bs, size_t size)
                   3915: {
                   3916:     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
                   3917: }
1.1.1.14  root     3918: 
                   3919: void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
                   3920: {
                   3921:     int64_t bitmap_size;
                   3922: 
1.1.1.18  root     3923:     bs->dirty_count = 0;
1.1.1.14  root     3924:     if (enable) {
                   3925:         if (!bs->dirty_bitmap) {
                   3926:             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
1.1.1.23! root     3927:                     BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
        !          3928:             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
1.1.1.14  root     3929: 
1.1.1.23! root     3930:             bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
1.1.1.14  root     3931:         }
                   3932:     } else {
                   3933:         if (bs->dirty_bitmap) {
1.1.1.22  root     3934:             g_free(bs->dirty_bitmap);
1.1.1.14  root     3935:             bs->dirty_bitmap = NULL;
                   3936:         }
                   3937:     }
                   3938: }
                   3939: 
                   3940: int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
                   3941: {
                   3942:     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
                   3943: 
                   3944:     if (bs->dirty_bitmap &&
                   3945:         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
1.1.1.19  root     3946:         return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
                   3947:             (1UL << (chunk % (sizeof(unsigned long) * 8))));
1.1.1.14  root     3948:     } else {
                   3949:         return 0;
                   3950:     }
                   3951: }
                   3952: 
                   3953: void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
                   3954:                       int nr_sectors)
                   3955: {
                   3956:     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
                   3957: }
1.1.1.18  root     3958: 
                   3959: int64_t bdrv_get_dirty_count(BlockDriverState *bs)
                   3960: {
                   3961:     return bs->dirty_count;
                   3962: }
1.1.1.19  root     3963: 
                   3964: void bdrv_set_in_use(BlockDriverState *bs, int in_use)
                   3965: {
                   3966:     assert(bs->in_use != in_use);
                   3967:     bs->in_use = in_use;
                   3968: }
                   3969: 
                   3970: int bdrv_in_use(BlockDriverState *bs)
                   3971: {
                   3972:     return bs->in_use;
                   3973: }
                   3974: 
1.1.1.22  root     3975: void bdrv_iostatus_enable(BlockDriverState *bs)
                   3976: {
                   3977:     bs->iostatus_enabled = true;
                   3978:     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
                   3979: }
                   3980: 
                   3981: /* The I/O status is only enabled if the drive explicitly
                   3982:  * enables it _and_ the VM is configured to stop on errors */
                   3983: bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
                   3984: {
                   3985:     return (bs->iostatus_enabled &&
                   3986:            (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
                   3987:             bs->on_write_error == BLOCK_ERR_STOP_ANY    ||
                   3988:             bs->on_read_error == BLOCK_ERR_STOP_ANY));
                   3989: }
                   3990: 
                   3991: void bdrv_iostatus_disable(BlockDriverState *bs)
                   3992: {
                   3993:     bs->iostatus_enabled = false;
                   3994: }
                   3995: 
                   3996: void bdrv_iostatus_reset(BlockDriverState *bs)
                   3997: {
                   3998:     if (bdrv_iostatus_is_enabled(bs)) {
                   3999:         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
                   4000:     }
                   4001: }
                   4002: 
                   4003: /* XXX: Today this is set by device models because it makes the implementation
                   4004:    quite simple. However, the block layer knows about the error, so it's
                   4005:    possible to implement this without device models being involved */
                   4006: void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
                   4007: {
                   4008:     if (bdrv_iostatus_is_enabled(bs) &&
                   4009:         bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
                   4010:         assert(error >= 0);
                   4011:         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
                   4012:                                          BLOCK_DEVICE_IO_STATUS_FAILED;
                   4013:     }
                   4014: }
                   4015: 
                   4016: void
                   4017: bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
                   4018:         enum BlockAcctType type)
                   4019: {
                   4020:     assert(type < BDRV_MAX_IOTYPE);
                   4021: 
                   4022:     cookie->bytes = bytes;
                   4023:     cookie->start_time_ns = get_clock();
                   4024:     cookie->type = type;
                   4025: }
                   4026: 
                   4027: void
                   4028: bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
                   4029: {
                   4030:     assert(cookie->type < BDRV_MAX_IOTYPE);
                   4031: 
                   4032:     bs->nr_bytes[cookie->type] += cookie->bytes;
                   4033:     bs->nr_ops[cookie->type]++;
                   4034:     bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
                   4035: }
                   4036: 
1.1.1.19  root     4037: int bdrv_img_create(const char *filename, const char *fmt,
                   4038:                     const char *base_filename, const char *base_fmt,
                   4039:                     char *options, uint64_t img_size, int flags)
                   4040: {
                   4041:     QEMUOptionParameter *param = NULL, *create_options = NULL;
1.1.1.21  root     4042:     QEMUOptionParameter *backing_fmt, *backing_file, *size;
1.1.1.19  root     4043:     BlockDriverState *bs = NULL;
                   4044:     BlockDriver *drv, *proto_drv;
                   4045:     BlockDriver *backing_drv = NULL;
                   4046:     int ret = 0;
                   4047: 
                   4048:     /* Find driver and parse its options */
                   4049:     drv = bdrv_find_format(fmt);
                   4050:     if (!drv) {
                   4051:         error_report("Unknown file format '%s'", fmt);
                   4052:         ret = -EINVAL;
                   4053:         goto out;
                   4054:     }
                   4055: 
                   4056:     proto_drv = bdrv_find_protocol(filename);
                   4057:     if (!proto_drv) {
                   4058:         error_report("Unknown protocol '%s'", filename);
                   4059:         ret = -EINVAL;
                   4060:         goto out;
                   4061:     }
                   4062: 
                   4063:     create_options = append_option_parameters(create_options,
                   4064:                                               drv->create_options);
                   4065:     create_options = append_option_parameters(create_options,
                   4066:                                               proto_drv->create_options);
                   4067: 
                   4068:     /* Create parameter list with default values */
                   4069:     param = parse_option_parameters("", create_options, param);
                   4070: 
                   4071:     set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
                   4072: 
                   4073:     /* Parse -o options */
                   4074:     if (options) {
                   4075:         param = parse_option_parameters(options, create_options, param);
                   4076:         if (param == NULL) {
                   4077:             error_report("Invalid options for file format '%s'.", fmt);
                   4078:             ret = -EINVAL;
                   4079:             goto out;
                   4080:         }
                   4081:     }
                   4082: 
                   4083:     if (base_filename) {
                   4084:         if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
                   4085:                                  base_filename)) {
                   4086:             error_report("Backing file not supported for file format '%s'",
                   4087:                          fmt);
                   4088:             ret = -EINVAL;
                   4089:             goto out;
                   4090:         }
                   4091:     }
                   4092: 
                   4093:     if (base_fmt) {
                   4094:         if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
                   4095:             error_report("Backing file format not supported for file "
                   4096:                          "format '%s'", fmt);
                   4097:             ret = -EINVAL;
                   4098:             goto out;
                   4099:         }
                   4100:     }
                   4101: 
                   4102:     backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
                   4103:     if (backing_file && backing_file->value.s) {
                   4104:         if (!strcmp(filename, backing_file->value.s)) {
                   4105:             error_report("Error: Trying to create an image with the "
                   4106:                          "same filename as the backing file");
                   4107:             ret = -EINVAL;
                   4108:             goto out;
                   4109:         }
                   4110:     }
                   4111: 
                   4112:     backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
                   4113:     if (backing_fmt && backing_fmt->value.s) {
                   4114:         backing_drv = bdrv_find_format(backing_fmt->value.s);
                   4115:         if (!backing_drv) {
                   4116:             error_report("Unknown backing file format '%s'",
                   4117:                          backing_fmt->value.s);
                   4118:             ret = -EINVAL;
                   4119:             goto out;
                   4120:         }
                   4121:     }
                   4122: 
                   4123:     // The size for the image must always be specified, with one exception:
                   4124:     // If we are using a backing file, we can obtain the size from there
1.1.1.21  root     4125:     size = get_option_parameter(param, BLOCK_OPT_SIZE);
                   4126:     if (size && size->value.n == -1) {
1.1.1.19  root     4127:         if (backing_file && backing_file->value.s) {
                   4128:             uint64_t size;
                   4129:             char buf[32];
1.1.1.23! root     4130:             int back_flags;
        !          4131: 
        !          4132:             /* backing files always opened read-only */
        !          4133:             back_flags =
        !          4134:                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
1.1.1.19  root     4135: 
                   4136:             bs = bdrv_new("");
                   4137: 
1.1.1.23! root     4138:             ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
1.1.1.19  root     4139:             if (ret < 0) {
                   4140:                 error_report("Could not open '%s'", backing_file->value.s);
                   4141:                 goto out;
                   4142:             }
                   4143:             bdrv_get_geometry(bs, &size);
                   4144:             size *= 512;
                   4145: 
                   4146:             snprintf(buf, sizeof(buf), "%" PRId64, size);
                   4147:             set_option_parameter(param, BLOCK_OPT_SIZE, buf);
                   4148:         } else {
                   4149:             error_report("Image creation needs a size parameter");
                   4150:             ret = -EINVAL;
                   4151:             goto out;
                   4152:         }
                   4153:     }
                   4154: 
                   4155:     printf("Formatting '%s', fmt=%s ", filename, fmt);
                   4156:     print_option_parameters(param);
                   4157:     puts("");
                   4158: 
                   4159:     ret = bdrv_create(drv, filename, param);
                   4160: 
                   4161:     if (ret < 0) {
                   4162:         if (ret == -ENOTSUP) {
                   4163:             error_report("Formatting or formatting option not supported for "
                   4164:                          "file format '%s'", fmt);
                   4165:         } else if (ret == -EFBIG) {
                   4166:             error_report("The image size is too large for file format '%s'",
                   4167:                          fmt);
                   4168:         } else {
                   4169:             error_report("%s: error while creating %s: %s", filename, fmt,
                   4170:                          strerror(-ret));
                   4171:         }
                   4172:     }
                   4173: 
                   4174: out:
                   4175:     free_option_parameters(create_options);
                   4176:     free_option_parameters(param);
                   4177: 
                   4178:     if (bs) {
                   4179:         bdrv_delete(bs);
                   4180:     }
                   4181: 
                   4182:     return ret;
                   4183: }
1.1.1.23! root     4184: 
        !          4185: void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
        !          4186:                        int64_t speed, BlockDriverCompletionFunc *cb,
        !          4187:                        void *opaque, Error **errp)
        !          4188: {
        !          4189:     BlockJob *job;
        !          4190: 
        !          4191:     if (bs->job || bdrv_in_use(bs)) {
        !          4192:         error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
        !          4193:         return NULL;
        !          4194:     }
        !          4195:     bdrv_set_in_use(bs, 1);
        !          4196: 
        !          4197:     job = g_malloc0(job_type->instance_size);
        !          4198:     job->job_type      = job_type;
        !          4199:     job->bs            = bs;
        !          4200:     job->cb            = cb;
        !          4201:     job->opaque        = opaque;
        !          4202:     job->busy          = true;
        !          4203:     bs->job = job;
        !          4204: 
        !          4205:     /* Only set speed when necessary to avoid NotSupported error */
        !          4206:     if (speed != 0) {
        !          4207:         Error *local_err = NULL;
        !          4208: 
        !          4209:         block_job_set_speed(job, speed, &local_err);
        !          4210:         if (error_is_set(&local_err)) {
        !          4211:             bs->job = NULL;
        !          4212:             g_free(job);
        !          4213:             bdrv_set_in_use(bs, 0);
        !          4214:             error_propagate(errp, local_err);
        !          4215:             return NULL;
        !          4216:         }
        !          4217:     }
        !          4218:     return job;
        !          4219: }
        !          4220: 
        !          4221: void block_job_complete(BlockJob *job, int ret)
        !          4222: {
        !          4223:     BlockDriverState *bs = job->bs;
        !          4224: 
        !          4225:     assert(bs->job == job);
        !          4226:     job->cb(job->opaque, ret);
        !          4227:     bs->job = NULL;
        !          4228:     g_free(job);
        !          4229:     bdrv_set_in_use(bs, 0);
        !          4230: }
        !          4231: 
        !          4232: void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
        !          4233: {
        !          4234:     Error *local_err = NULL;
        !          4235: 
        !          4236:     if (!job->job_type->set_speed) {
        !          4237:         error_set(errp, QERR_NOT_SUPPORTED);
        !          4238:         return;
        !          4239:     }
        !          4240:     job->job_type->set_speed(job, speed, &local_err);
        !          4241:     if (error_is_set(&local_err)) {
        !          4242:         error_propagate(errp, local_err);
        !          4243:         return;
        !          4244:     }
        !          4245: 
        !          4246:     job->speed = speed;
        !          4247: }
        !          4248: 
        !          4249: void block_job_cancel(BlockJob *job)
        !          4250: {
        !          4251:     job->cancelled = true;
        !          4252:     if (job->co && !job->busy) {
        !          4253:         qemu_coroutine_enter(job->co, NULL);
        !          4254:     }
        !          4255: }
        !          4256: 
        !          4257: bool block_job_is_cancelled(BlockJob *job)
        !          4258: {
        !          4259:     return job->cancelled;
        !          4260: }
        !          4261: 
        !          4262: struct BlockCancelData {
        !          4263:     BlockJob *job;
        !          4264:     BlockDriverCompletionFunc *cb;
        !          4265:     void *opaque;
        !          4266:     bool cancelled;
        !          4267:     int ret;
        !          4268: };
        !          4269: 
        !          4270: static void block_job_cancel_cb(void *opaque, int ret)
        !          4271: {
        !          4272:     struct BlockCancelData *data = opaque;
        !          4273: 
        !          4274:     data->cancelled = block_job_is_cancelled(data->job);
        !          4275:     data->ret = ret;
        !          4276:     data->cb(data->opaque, ret);
        !          4277: }
        !          4278: 
        !          4279: int block_job_cancel_sync(BlockJob *job)
        !          4280: {
        !          4281:     struct BlockCancelData data;
        !          4282:     BlockDriverState *bs = job->bs;
        !          4283: 
        !          4284:     assert(bs->job == job);
        !          4285: 
        !          4286:     /* Set up our own callback to store the result and chain to
        !          4287:      * the original callback.
        !          4288:      */
        !          4289:     data.job = job;
        !          4290:     data.cb = job->cb;
        !          4291:     data.opaque = job->opaque;
        !          4292:     data.ret = -EINPROGRESS;
        !          4293:     job->cb = block_job_cancel_cb;
        !          4294:     job->opaque = &data;
        !          4295:     block_job_cancel(job);
        !          4296:     while (data.ret == -EINPROGRESS) {
        !          4297:         qemu_aio_wait();
        !          4298:     }
        !          4299:     return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
        !          4300: }
        !          4301: 
        !          4302: void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
        !          4303: {
        !          4304:     /* Check cancellation *before* setting busy = false, too!  */
        !          4305:     if (!block_job_is_cancelled(job)) {
        !          4306:         job->busy = false;
        !          4307:         co_sleep_ns(clock, ns);
        !          4308:         job->busy = true;
        !          4309:     }
        !          4310: }

unix.superglobalmegacorp.com