File:  [Qemu by Fabrice Bellard] / qemu / block-migration.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 18:34:38 2018 UTC (3 years, 1 month ago) by root
Branches: qemu, MAIN
CVS tags: qemu0150, qemu0141, qemu0140, HEAD
qemu 0.14.0

    1: /*
    2:  * QEMU live block migration
    3:  *
    4:  * Copyright IBM, Corp. 2009
    5:  *
    6:  * Authors:
    7:  *  Liran Schour   <lirans@il.ibm.com>
    8:  *
    9:  * This work is licensed under the terms of the GNU GPL, version 2.  See
   10:  * the COPYING file in the top-level directory.
   11:  *
   12:  */
   13: 
   14: #include "qemu-common.h"
   15: #include "block_int.h"
   16: #include "hw/hw.h"
   17: #include "qemu-queue.h"
   18: #include "qemu-timer.h"
   19: #include "monitor.h"
   20: #include "block-migration.h"
   21: #include "migration.h"
   22: #include "blockdev.h"
   23: #include <assert.h>
   24: 
   25: #define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
   26: 
   27: #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
   28: #define BLK_MIG_FLAG_EOS                0x02
   29: #define BLK_MIG_FLAG_PROGRESS           0x04
   30: 
   31: #define MAX_IS_ALLOCATED_SEARCH 65536
   32: 
   33: //#define DEBUG_BLK_MIGRATION
   34: 
   35: #ifdef DEBUG_BLK_MIGRATION
   36: #define DPRINTF(fmt, ...) \
   37:     do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
   38: #else
   39: #define DPRINTF(fmt, ...) \
   40:     do { } while (0)
   41: #endif
   42: 
   43: typedef struct BlkMigDevState {
   44:     BlockDriverState *bs;
   45:     int bulk_completed;
   46:     int shared_base;
   47:     int64_t cur_sector;
   48:     int64_t cur_dirty;
   49:     int64_t completed_sectors;
   50:     int64_t total_sectors;
   51:     int64_t dirty;
   52:     QSIMPLEQ_ENTRY(BlkMigDevState) entry;
   53:     unsigned long *aio_bitmap;
   54: } BlkMigDevState;
   55: 
   56: typedef struct BlkMigBlock {
   57:     uint8_t *buf;
   58:     BlkMigDevState *bmds;
   59:     int64_t sector;
   60:     int nr_sectors;
   61:     struct iovec iov;
   62:     QEMUIOVector qiov;
   63:     BlockDriverAIOCB *aiocb;
   64:     int ret;
   65:     int64_t time;
   66:     QSIMPLEQ_ENTRY(BlkMigBlock) entry;
   67: } BlkMigBlock;
   68: 
   69: typedef struct BlkMigState {
   70:     int blk_enable;
   71:     int shared_base;
   72:     QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
   73:     QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
   74:     int submitted;
   75:     int read_done;
   76:     int transferred;
   77:     int64_t total_sector_sum;
   78:     int prev_progress;
   79:     int bulk_completed;
   80:     long double total_time;
   81:     int reads;
   82: } BlkMigState;
   83: 
   84: static BlkMigState block_mig_state;
   85: 
   86: static void blk_send(QEMUFile *f, BlkMigBlock * blk)
   87: {
   88:     int len;
   89: 
   90:     /* sector number and flags */
   91:     qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
   92:                      | BLK_MIG_FLAG_DEVICE_BLOCK);
   93: 
   94:     /* device name */
   95:     len = strlen(blk->bmds->bs->device_name);
   96:     qemu_put_byte(f, len);
   97:     qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
   98: 
   99:     qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
  100: }
  101: 
  102: int blk_mig_active(void)
  103: {
  104:     return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
  105: }
  106: 
  107: uint64_t blk_mig_bytes_transferred(void)
  108: {
  109:     BlkMigDevState *bmds;
  110:     uint64_t sum = 0;
  111: 
  112:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  113:         sum += bmds->completed_sectors;
  114:     }
  115:     return sum << BDRV_SECTOR_BITS;
  116: }
  117: 
  118: uint64_t blk_mig_bytes_remaining(void)
  119: {
  120:     return blk_mig_bytes_total() - blk_mig_bytes_transferred();
  121: }
  122: 
  123: uint64_t blk_mig_bytes_total(void)
  124: {
  125:     BlkMigDevState *bmds;
  126:     uint64_t sum = 0;
  127: 
  128:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  129:         sum += bmds->total_sectors;
  130:     }
  131:     return sum << BDRV_SECTOR_BITS;
  132: }
  133: 
  134: static inline void add_avg_read_time(int64_t time)
  135: {
  136:     block_mig_state.reads++;
  137:     block_mig_state.total_time += time;
  138: }
  139: 
  140: static inline long double compute_read_bwidth(void)
  141: {
  142:     assert(block_mig_state.total_time != 0);
  143:     return  (block_mig_state.reads * BLOCK_SIZE)/ block_mig_state.total_time;
  144: }
  145: 
  146: static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
  147: {
  148:     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
  149: 
  150:     if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
  151:         return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
  152:             (1UL << (chunk % (sizeof(unsigned long) * 8))));
  153:     } else {
  154:         return 0;
  155:     }
  156: }
  157: 
  158: static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
  159:                              int nb_sectors, int set)
  160: {
  161:     int64_t start, end;
  162:     unsigned long val, idx, bit;
  163: 
  164:     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
  165:     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
  166: 
  167:     for (; start <= end; start++) {
  168:         idx = start / (sizeof(unsigned long) * 8);
  169:         bit = start % (sizeof(unsigned long) * 8);
  170:         val = bmds->aio_bitmap[idx];
  171:         if (set) {
  172:             val |= 1UL << bit;
  173:         } else {
  174:             val &= ~(1UL << bit);
  175:         }
  176:         bmds->aio_bitmap[idx] = val;
  177:     }
  178: }
  179: 
  180: static void alloc_aio_bitmap(BlkMigDevState *bmds)
  181: {
  182:     BlockDriverState *bs = bmds->bs;
  183:     int64_t bitmap_size;
  184: 
  185:     bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
  186:             BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
  187:     bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
  188: 
  189:     bmds->aio_bitmap = qemu_mallocz(bitmap_size);
  190: }
  191: 
  192: static void blk_mig_read_cb(void *opaque, int ret)
  193: {
  194:     BlkMigBlock *blk = opaque;
  195: 
  196:     blk->ret = ret;
  197: 
  198:     blk->time = qemu_get_clock_ns(rt_clock) - blk->time;
  199: 
  200:     add_avg_read_time(blk->time);
  201: 
  202:     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
  203:     bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
  204: 
  205:     block_mig_state.submitted--;
  206:     block_mig_state.read_done++;
  207:     assert(block_mig_state.submitted >= 0);
  208: }
  209: 
  210: static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
  211:                                 BlkMigDevState *bmds)
  212: {
  213:     int64_t total_sectors = bmds->total_sectors;
  214:     int64_t cur_sector = bmds->cur_sector;
  215:     BlockDriverState *bs = bmds->bs;
  216:     BlkMigBlock *blk;
  217:     int nr_sectors;
  218: 
  219:     if (bmds->shared_base) {
  220:         while (cur_sector < total_sectors &&
  221:                !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
  222:                                   &nr_sectors)) {
  223:             cur_sector += nr_sectors;
  224:         }
  225:     }
  226: 
  227:     if (cur_sector >= total_sectors) {
  228:         bmds->cur_sector = bmds->completed_sectors = total_sectors;
  229:         return 1;
  230:     }
  231: 
  232:     bmds->completed_sectors = cur_sector;
  233: 
  234:     cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
  235: 
  236:     /* we are going to transfer a full block even if it is not allocated */
  237:     nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
  238: 
  239:     if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
  240:         nr_sectors = total_sectors - cur_sector;
  241:     }
  242: 
  243:     blk = qemu_malloc(sizeof(BlkMigBlock));
  244:     blk->buf = qemu_malloc(BLOCK_SIZE);
  245:     blk->bmds = bmds;
  246:     blk->sector = cur_sector;
  247:     blk->nr_sectors = nr_sectors;
  248: 
  249:     blk->iov.iov_base = blk->buf;
  250:     blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
  251:     qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
  252: 
  253:     blk->time = qemu_get_clock_ns(rt_clock);
  254: 
  255:     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
  256:                                 nr_sectors, blk_mig_read_cb, blk);
  257:     if (!blk->aiocb) {
  258:         goto error;
  259:     }
  260:     block_mig_state.submitted++;
  261: 
  262:     bdrv_reset_dirty(bs, cur_sector, nr_sectors);
  263:     bmds->cur_sector = cur_sector + nr_sectors;
  264: 
  265:     return (bmds->cur_sector >= total_sectors);
  266: 
  267: error:
  268:     monitor_printf(mon, "Error reading sector %" PRId64 "\n", cur_sector);
  269:     qemu_file_set_error(f);
  270:     qemu_free(blk->buf);
  271:     qemu_free(blk);
  272:     return 0;
  273: }
  274: 
  275: static void set_dirty_tracking(int enable)
  276: {
  277:     BlkMigDevState *bmds;
  278: 
  279:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  280:         bdrv_set_dirty_tracking(bmds->bs, enable);
  281:     }
  282: }
  283: 
  284: static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
  285: {
  286:     Monitor *mon = opaque;
  287:     BlkMigDevState *bmds;
  288:     int64_t sectors;
  289: 
  290:     if (!bdrv_is_read_only(bs)) {
  291:         sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
  292:         if (sectors <= 0) {
  293:             return;
  294:         }
  295: 
  296:         bmds = qemu_mallocz(sizeof(BlkMigDevState));
  297:         bmds->bs = bs;
  298:         bmds->bulk_completed = 0;
  299:         bmds->total_sectors = sectors;
  300:         bmds->completed_sectors = 0;
  301:         bmds->shared_base = block_mig_state.shared_base;
  302:         alloc_aio_bitmap(bmds);
  303:         drive_get_ref(drive_get_by_blockdev(bs));
  304:         bdrv_set_in_use(bs, 1);
  305: 
  306:         block_mig_state.total_sector_sum += sectors;
  307: 
  308:         if (bmds->shared_base) {
  309:             monitor_printf(mon, "Start migration for %s with shared base "
  310:                                 "image\n",
  311:                            bs->device_name);
  312:         } else {
  313:             monitor_printf(mon, "Start full migration for %s\n",
  314:                            bs->device_name);
  315:         }
  316: 
  317:         QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
  318:     }
  319: }
  320: 
  321: static void init_blk_migration(Monitor *mon, QEMUFile *f)
  322: {
  323:     block_mig_state.submitted = 0;
  324:     block_mig_state.read_done = 0;
  325:     block_mig_state.transferred = 0;
  326:     block_mig_state.total_sector_sum = 0;
  327:     block_mig_state.prev_progress = -1;
  328:     block_mig_state.bulk_completed = 0;
  329:     block_mig_state.total_time = 0;
  330:     block_mig_state.reads = 0;
  331: 
  332:     bdrv_iterate(init_blk_migration_it, mon);
  333: }
  334: 
  335: static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f)
  336: {
  337:     int64_t completed_sector_sum = 0;
  338:     BlkMigDevState *bmds;
  339:     int progress;
  340:     int ret = 0;
  341: 
  342:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  343:         if (bmds->bulk_completed == 0) {
  344:             if (mig_save_device_bulk(mon, f, bmds) == 1) {
  345:                 /* completed bulk section for this device */
  346:                 bmds->bulk_completed = 1;
  347:             }
  348:             completed_sector_sum += bmds->completed_sectors;
  349:             ret = 1;
  350:             break;
  351:         } else {
  352:             completed_sector_sum += bmds->completed_sectors;
  353:         }
  354:     }
  355: 
  356:     if (block_mig_state.total_sector_sum != 0) {
  357:         progress = completed_sector_sum * 100 /
  358:                    block_mig_state.total_sector_sum;
  359:     } else {
  360:         progress = 100;
  361:     }
  362:     if (progress != block_mig_state.prev_progress) {
  363:         block_mig_state.prev_progress = progress;
  364:         qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
  365:                          | BLK_MIG_FLAG_PROGRESS);
  366:         monitor_printf(mon, "Completed %d %%\r", progress);
  367:         monitor_flush(mon);
  368:     }
  369: 
  370:     return ret;
  371: }
  372: 
  373: static void blk_mig_reset_dirty_cursor(void)
  374: {
  375:     BlkMigDevState *bmds;
  376: 
  377:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  378:         bmds->cur_dirty = 0;
  379:     }
  380: }
  381: 
  382: static int mig_save_device_dirty(Monitor *mon, QEMUFile *f,
  383:                                  BlkMigDevState *bmds, int is_async)
  384: {
  385:     BlkMigBlock *blk;
  386:     int64_t total_sectors = bmds->total_sectors;
  387:     int64_t sector;
  388:     int nr_sectors;
  389: 
  390:     for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
  391:         if (bmds_aio_inflight(bmds, sector)) {
  392:             qemu_aio_flush();
  393:         }
  394:         if (bdrv_get_dirty(bmds->bs, sector)) {
  395: 
  396:             if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
  397:                 nr_sectors = total_sectors - sector;
  398:             } else {
  399:                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
  400:             }
  401:             blk = qemu_malloc(sizeof(BlkMigBlock));
  402:             blk->buf = qemu_malloc(BLOCK_SIZE);
  403:             blk->bmds = bmds;
  404:             blk->sector = sector;
  405:             blk->nr_sectors = nr_sectors;
  406: 
  407:             if (is_async) {
  408:                 blk->iov.iov_base = blk->buf;
  409:                 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
  410:                 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
  411: 
  412:                 blk->time = qemu_get_clock_ns(rt_clock);
  413: 
  414:                 blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
  415:                                             nr_sectors, blk_mig_read_cb, blk);
  416:                 if (!blk->aiocb) {
  417:                     goto error;
  418:                 }
  419:                 block_mig_state.submitted++;
  420:                 bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
  421:             } else {
  422:                 if (bdrv_read(bmds->bs, sector, blk->buf,
  423:                               nr_sectors) < 0) {
  424:                     goto error;
  425:                 }
  426:                 blk_send(f, blk);
  427: 
  428:                 qemu_free(blk->buf);
  429:                 qemu_free(blk);
  430:             }
  431: 
  432:             bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
  433:             break;
  434:         }
  435:         sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
  436:         bmds->cur_dirty = sector;
  437:     }
  438: 
  439:     return (bmds->cur_dirty >= bmds->total_sectors);
  440: 
  441: error:
  442:     monitor_printf(mon, "Error reading sector %" PRId64 "\n", sector);
  443:     qemu_file_set_error(f);
  444:     qemu_free(blk->buf);
  445:     qemu_free(blk);
  446:     return 0;
  447: }
  448: 
  449: static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile *f, int is_async)
  450: {
  451:     BlkMigDevState *bmds;
  452:     int ret = 0;
  453: 
  454:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  455:         if (mig_save_device_dirty(mon, f, bmds, is_async) == 0) {
  456:             ret = 1;
  457:             break;
  458:         }
  459:     }
  460: 
  461:     return ret;
  462: }
  463: 
  464: static void flush_blks(QEMUFile* f)
  465: {
  466:     BlkMigBlock *blk;
  467: 
  468:     DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
  469:             __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
  470:             block_mig_state.transferred);
  471: 
  472:     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
  473:         if (qemu_file_rate_limit(f)) {
  474:             break;
  475:         }
  476:         if (blk->ret < 0) {
  477:             qemu_file_set_error(f);
  478:             break;
  479:         }
  480:         blk_send(f, blk);
  481: 
  482:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
  483:         qemu_free(blk->buf);
  484:         qemu_free(blk);
  485: 
  486:         block_mig_state.read_done--;
  487:         block_mig_state.transferred++;
  488:         assert(block_mig_state.read_done >= 0);
  489:     }
  490: 
  491:     DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
  492:             block_mig_state.submitted, block_mig_state.read_done,
  493:             block_mig_state.transferred);
  494: }
  495: 
  496: static int64_t get_remaining_dirty(void)
  497: {
  498:     BlkMigDevState *bmds;
  499:     int64_t dirty = 0;
  500: 
  501:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  502:         dirty += bdrv_get_dirty_count(bmds->bs);
  503:     }
  504: 
  505:     return dirty * BLOCK_SIZE;
  506: }
  507: 
  508: static int is_stage2_completed(void)
  509: {
  510:     int64_t remaining_dirty;
  511:     long double bwidth;
  512: 
  513:     if (block_mig_state.bulk_completed == 1) {
  514: 
  515:         remaining_dirty = get_remaining_dirty();
  516:         if (remaining_dirty == 0) {
  517:             return 1;
  518:         }
  519: 
  520:         bwidth = compute_read_bwidth();
  521: 
  522:         if ((remaining_dirty / bwidth) <=
  523:             migrate_max_downtime()) {
  524:             /* finish stage2 because we think that we can finish remaing work
  525:                below max_downtime */
  526: 
  527:             return 1;
  528:         }
  529:     }
  530: 
  531:     return 0;
  532: }
  533: 
  534: static void blk_mig_cleanup(Monitor *mon)
  535: {
  536:     BlkMigDevState *bmds;
  537:     BlkMigBlock *blk;
  538: 
  539:     set_dirty_tracking(0);
  540: 
  541:     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
  542:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
  543:         bdrv_set_in_use(bmds->bs, 0);
  544:         drive_put_ref(drive_get_by_blockdev(bmds->bs));
  545:         qemu_free(bmds->aio_bitmap);
  546:         qemu_free(bmds);
  547:     }
  548: 
  549:     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
  550:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
  551:         qemu_free(blk->buf);
  552:         qemu_free(blk);
  553:     }
  554: 
  555:     monitor_printf(mon, "\n");
  556: }
  557: 
  558: static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
  559: {
  560:     DPRINTF("Enter save live stage %d submitted %d transferred %d\n",
  561:             stage, block_mig_state.submitted, block_mig_state.transferred);
  562: 
  563:     if (stage < 0) {
  564:         blk_mig_cleanup(mon);
  565:         return 0;
  566:     }
  567: 
  568:     if (block_mig_state.blk_enable != 1) {
  569:         /* no need to migrate storage */
  570:         qemu_put_be64(f, BLK_MIG_FLAG_EOS);
  571:         return 1;
  572:     }
  573: 
  574:     if (stage == 1) {
  575:         init_blk_migration(mon, f);
  576: 
  577:         /* start track dirty blocks */
  578:         set_dirty_tracking(1);
  579:     }
  580: 
  581:     flush_blks(f);
  582: 
  583:     if (qemu_file_has_error(f)) {
  584:         blk_mig_cleanup(mon);
  585:         return 0;
  586:     }
  587: 
  588:     blk_mig_reset_dirty_cursor();
  589: 
  590:     if (stage == 2) {
  591:         /* control the rate of transfer */
  592:         while ((block_mig_state.submitted +
  593:                 block_mig_state.read_done) * BLOCK_SIZE <
  594:                qemu_file_get_rate_limit(f)) {
  595:             if (block_mig_state.bulk_completed == 0) {
  596:                 /* first finish the bulk phase */
  597:                 if (blk_mig_save_bulked_block(mon, f) == 0) {
  598:                     /* finished saving bulk on all devices */
  599:                     block_mig_state.bulk_completed = 1;
  600:                 }
  601:             } else {
  602:                 if (blk_mig_save_dirty_block(mon, f, 1) == 0) {
  603:                     /* no more dirty blocks */
  604:                     break;
  605:                 }
  606:             }
  607:         }
  608: 
  609:         flush_blks(f);
  610: 
  611:         if (qemu_file_has_error(f)) {
  612:             blk_mig_cleanup(mon);
  613:             return 0;
  614:         }
  615:     }
  616: 
  617:     if (stage == 3) {
  618:         /* we know for sure that save bulk is completed and
  619:            all async read completed */
  620:         assert(block_mig_state.submitted == 0);
  621: 
  622:         while (blk_mig_save_dirty_block(mon, f, 0) != 0);
  623:         blk_mig_cleanup(mon);
  624: 
  625:         /* report completion */
  626:         qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
  627: 
  628:         if (qemu_file_has_error(f)) {
  629:             return 0;
  630:         }
  631: 
  632:         monitor_printf(mon, "Block migration completed\n");
  633:     }
  634: 
  635:     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
  636: 
  637:     return ((stage == 2) && is_stage2_completed());
  638: }
  639: 
  640: static int block_load(QEMUFile *f, void *opaque, int version_id)
  641: {
  642:     static int banner_printed;
  643:     int len, flags;
  644:     char device_name[256];
  645:     int64_t addr;
  646:     BlockDriverState *bs, *bs_prev = NULL;
  647:     uint8_t *buf;
  648:     int64_t total_sectors = 0;
  649:     int nr_sectors;
  650: 
  651:     do {
  652:         addr = qemu_get_be64(f);
  653: 
  654:         flags = addr & ~BDRV_SECTOR_MASK;
  655:         addr >>= BDRV_SECTOR_BITS;
  656: 
  657:         if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
  658:             int ret;
  659:             /* get device name */
  660:             len = qemu_get_byte(f);
  661:             qemu_get_buffer(f, (uint8_t *)device_name, len);
  662:             device_name[len] = '\0';
  663: 
  664:             bs = bdrv_find(device_name);
  665:             if (!bs) {
  666:                 fprintf(stderr, "Error unknown block device %s\n",
  667:                         device_name);
  668:                 return -EINVAL;
  669:             }
  670: 
  671:             if (bs != bs_prev) {
  672:                 bs_prev = bs;
  673:                 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
  674:                 if (total_sectors <= 0) {
  675:                     error_report("Error getting length of block device %s\n",
  676:                                  device_name);
  677:                     return -EINVAL;
  678:                 }
  679:             }
  680: 
  681:             if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
  682:                 nr_sectors = total_sectors - addr;
  683:             } else {
  684:                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
  685:             }
  686: 
  687:             buf = qemu_malloc(BLOCK_SIZE);
  688: 
  689:             qemu_get_buffer(f, buf, BLOCK_SIZE);
  690:             ret = bdrv_write(bs, addr, buf, nr_sectors);
  691: 
  692:             qemu_free(buf);
  693:             if (ret < 0) {
  694:                 return ret;
  695:             }
  696:         } else if (flags & BLK_MIG_FLAG_PROGRESS) {
  697:             if (!banner_printed) {
  698:                 printf("Receiving block device images\n");
  699:                 banner_printed = 1;
  700:             }
  701:             printf("Completed %d %%%c", (int)addr,
  702:                    (addr == 100) ? '\n' : '\r');
  703:             fflush(stdout);
  704:         } else if (!(flags & BLK_MIG_FLAG_EOS)) {
  705:             fprintf(stderr, "Unknown flags\n");
  706:             return -EINVAL;
  707:         }
  708:         if (qemu_file_has_error(f)) {
  709:             return -EIO;
  710:         }
  711:     } while (!(flags & BLK_MIG_FLAG_EOS));
  712: 
  713:     return 0;
  714: }
  715: 
  716: static void block_set_params(int blk_enable, int shared_base, void *opaque)
  717: {
  718:     block_mig_state.blk_enable = blk_enable;
  719:     block_mig_state.shared_base = shared_base;
  720: 
  721:     /* shared base means that blk_enable = 1 */
  722:     block_mig_state.blk_enable |= shared_base;
  723: }
  724: 
  725: void blk_mig_init(void)
  726: {
  727:     QSIMPLEQ_INIT(&block_mig_state.bmds_list);
  728:     QSIMPLEQ_INIT(&block_mig_state.blk_list);
  729: 
  730:     register_savevm_live(NULL, "block", 0, 1, block_set_params,
  731:                          block_save_live, NULL, block_load, &block_mig_state);
  732: }

unix.superglobalmegacorp.com