File:  [Qemu by Fabrice Bellard] / qemu / block-migration.c
Revision 1.1.1.4 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 18:56:53 2018 UTC (3 years, 6 months ago) by root
Branches: qemu, MAIN
CVS tags: qemu1000, qemu0151, HEAD
qemu 0.15.1

    1: /*
    2:  * QEMU live block migration
    3:  *
    4:  * Copyright IBM, Corp. 2009
    5:  *
    6:  * Authors:
    7:  *  Liran Schour   <lirans@il.ibm.com>
    8:  *
    9:  * This work is licensed under the terms of the GNU GPL, version 2.  See
   10:  * the COPYING file in the top-level directory.
   11:  *
   12:  */
   13: 
   14: #include "qemu-common.h"
   15: #include "block_int.h"
   16: #include "hw/hw.h"
   17: #include "qemu-queue.h"
   18: #include "qemu-timer.h"
   19: #include "monitor.h"
   20: #include "block-migration.h"
   21: #include "migration.h"
   22: #include "blockdev.h"
   23: #include <assert.h>
   24: 
   25: #define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
   26: 
   27: #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
   28: #define BLK_MIG_FLAG_EOS                0x02
   29: #define BLK_MIG_FLAG_PROGRESS           0x04
   30: 
   31: #define MAX_IS_ALLOCATED_SEARCH 65536
   32: 
   33: //#define DEBUG_BLK_MIGRATION
   34: 
   35: #ifdef DEBUG_BLK_MIGRATION
   36: #define DPRINTF(fmt, ...) \
   37:     do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
   38: #else
   39: #define DPRINTF(fmt, ...) \
   40:     do { } while (0)
   41: #endif
   42: 
   43: typedef struct BlkMigDevState {
   44:     BlockDriverState *bs;
   45:     int bulk_completed;
   46:     int shared_base;
   47:     int64_t cur_sector;
   48:     int64_t cur_dirty;
   49:     int64_t completed_sectors;
   50:     int64_t total_sectors;
   51:     int64_t dirty;
   52:     QSIMPLEQ_ENTRY(BlkMigDevState) entry;
   53:     unsigned long *aio_bitmap;
   54: } BlkMigDevState;
   55: 
   56: typedef struct BlkMigBlock {
   57:     uint8_t *buf;
   58:     BlkMigDevState *bmds;
   59:     int64_t sector;
   60:     int nr_sectors;
   61:     struct iovec iov;
   62:     QEMUIOVector qiov;
   63:     BlockDriverAIOCB *aiocb;
   64:     int ret;
   65:     QSIMPLEQ_ENTRY(BlkMigBlock) entry;
   66: } BlkMigBlock;
   67: 
   68: typedef struct BlkMigState {
   69:     int blk_enable;
   70:     int shared_base;
   71:     QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
   72:     QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
   73:     int submitted;
   74:     int read_done;
   75:     int transferred;
   76:     int64_t total_sector_sum;
   77:     int prev_progress;
   78:     int bulk_completed;
   79:     long double total_time;
   80:     long double prev_time_offset;
   81:     int reads;
   82: } BlkMigState;
   83: 
   84: static BlkMigState block_mig_state;
   85: 
   86: static void blk_send(QEMUFile *f, BlkMigBlock * blk)
   87: {
   88:     int len;
   89: 
   90:     /* sector number and flags */
   91:     qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
   92:                      | BLK_MIG_FLAG_DEVICE_BLOCK);
   93: 
   94:     /* device name */
   95:     len = strlen(blk->bmds->bs->device_name);
   96:     qemu_put_byte(f, len);
   97:     qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
   98: 
   99:     qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
  100: }
  101: 
  102: int blk_mig_active(void)
  103: {
  104:     return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
  105: }
  106: 
  107: uint64_t blk_mig_bytes_transferred(void)
  108: {
  109:     BlkMigDevState *bmds;
  110:     uint64_t sum = 0;
  111: 
  112:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  113:         sum += bmds->completed_sectors;
  114:     }
  115:     return sum << BDRV_SECTOR_BITS;
  116: }
  117: 
  118: uint64_t blk_mig_bytes_remaining(void)
  119: {
  120:     return blk_mig_bytes_total() - blk_mig_bytes_transferred();
  121: }
  122: 
  123: uint64_t blk_mig_bytes_total(void)
  124: {
  125:     BlkMigDevState *bmds;
  126:     uint64_t sum = 0;
  127: 
  128:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  129:         sum += bmds->total_sectors;
  130:     }
  131:     return sum << BDRV_SECTOR_BITS;
  132: }
  133: 
  134: static inline long double compute_read_bwidth(void)
  135: {
  136:     assert(block_mig_state.total_time != 0);
  137:     return (block_mig_state.reads / block_mig_state.total_time) * BLOCK_SIZE;
  138: }
  139: 
  140: static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
  141: {
  142:     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
  143: 
  144:     if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
  145:         return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
  146:             (1UL << (chunk % (sizeof(unsigned long) * 8))));
  147:     } else {
  148:         return 0;
  149:     }
  150: }
  151: 
  152: static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
  153:                              int nb_sectors, int set)
  154: {
  155:     int64_t start, end;
  156:     unsigned long val, idx, bit;
  157: 
  158:     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
  159:     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
  160: 
  161:     for (; start <= end; start++) {
  162:         idx = start / (sizeof(unsigned long) * 8);
  163:         bit = start % (sizeof(unsigned long) * 8);
  164:         val = bmds->aio_bitmap[idx];
  165:         if (set) {
  166:             val |= 1UL << bit;
  167:         } else {
  168:             val &= ~(1UL << bit);
  169:         }
  170:         bmds->aio_bitmap[idx] = val;
  171:     }
  172: }
  173: 
  174: static void alloc_aio_bitmap(BlkMigDevState *bmds)
  175: {
  176:     BlockDriverState *bs = bmds->bs;
  177:     int64_t bitmap_size;
  178: 
  179:     bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
  180:             BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
  181:     bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
  182: 
  183:     bmds->aio_bitmap = qemu_mallocz(bitmap_size);
  184: }
  185: 
  186: static void blk_mig_read_cb(void *opaque, int ret)
  187: {
  188:     long double curr_time = qemu_get_clock_ns(rt_clock);
  189:     BlkMigBlock *blk = opaque;
  190: 
  191:     blk->ret = ret;
  192: 
  193:     block_mig_state.reads++;
  194:     block_mig_state.total_time += (curr_time - block_mig_state.prev_time_offset);
  195:     block_mig_state.prev_time_offset = curr_time;
  196: 
  197:     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
  198:     bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
  199: 
  200:     block_mig_state.submitted--;
  201:     block_mig_state.read_done++;
  202:     assert(block_mig_state.submitted >= 0);
  203: }
  204: 
  205: static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
  206:                                 BlkMigDevState *bmds)
  207: {
  208:     int64_t total_sectors = bmds->total_sectors;
  209:     int64_t cur_sector = bmds->cur_sector;
  210:     BlockDriverState *bs = bmds->bs;
  211:     BlkMigBlock *blk;
  212:     int nr_sectors;
  213: 
  214:     if (bmds->shared_base) {
  215:         while (cur_sector < total_sectors &&
  216:                !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
  217:                                   &nr_sectors)) {
  218:             cur_sector += nr_sectors;
  219:         }
  220:     }
  221: 
  222:     if (cur_sector >= total_sectors) {
  223:         bmds->cur_sector = bmds->completed_sectors = total_sectors;
  224:         return 1;
  225:     }
  226: 
  227:     bmds->completed_sectors = cur_sector;
  228: 
  229:     cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
  230: 
  231:     /* we are going to transfer a full block even if it is not allocated */
  232:     nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
  233: 
  234:     if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
  235:         nr_sectors = total_sectors - cur_sector;
  236:     }
  237: 
  238:     blk = qemu_malloc(sizeof(BlkMigBlock));
  239:     blk->buf = qemu_malloc(BLOCK_SIZE);
  240:     blk->bmds = bmds;
  241:     blk->sector = cur_sector;
  242:     blk->nr_sectors = nr_sectors;
  243: 
  244:     blk->iov.iov_base = blk->buf;
  245:     blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
  246:     qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
  247: 
  248:     if (block_mig_state.submitted == 0) {
  249:         block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);
  250:     }
  251: 
  252:     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
  253:                                 nr_sectors, blk_mig_read_cb, blk);
  254:     if (!blk->aiocb) {
  255:         goto error;
  256:     }
  257:     block_mig_state.submitted++;
  258: 
  259:     bdrv_reset_dirty(bs, cur_sector, nr_sectors);
  260:     bmds->cur_sector = cur_sector + nr_sectors;
  261: 
  262:     return (bmds->cur_sector >= total_sectors);
  263: 
  264: error:
  265:     monitor_printf(mon, "Error reading sector %" PRId64 "\n", cur_sector);
  266:     qemu_file_set_error(f);
  267:     qemu_free(blk->buf);
  268:     qemu_free(blk);
  269:     return 0;
  270: }
  271: 
  272: static void set_dirty_tracking(int enable)
  273: {
  274:     BlkMigDevState *bmds;
  275: 
  276:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  277:         bdrv_set_dirty_tracking(bmds->bs, enable);
  278:     }
  279: }
  280: 
  281: static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
  282: {
  283:     Monitor *mon = opaque;
  284:     BlkMigDevState *bmds;
  285:     int64_t sectors;
  286: 
  287:     if (!bdrv_is_read_only(bs)) {
  288:         sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
  289:         if (sectors <= 0) {
  290:             return;
  291:         }
  292: 
  293:         bmds = qemu_mallocz(sizeof(BlkMigDevState));
  294:         bmds->bs = bs;
  295:         bmds->bulk_completed = 0;
  296:         bmds->total_sectors = sectors;
  297:         bmds->completed_sectors = 0;
  298:         bmds->shared_base = block_mig_state.shared_base;
  299:         alloc_aio_bitmap(bmds);
  300:         drive_get_ref(drive_get_by_blockdev(bs));
  301:         bdrv_set_in_use(bs, 1);
  302: 
  303:         block_mig_state.total_sector_sum += sectors;
  304: 
  305:         if (bmds->shared_base) {
  306:             monitor_printf(mon, "Start migration for %s with shared base "
  307:                                 "image\n",
  308:                            bs->device_name);
  309:         } else {
  310:             monitor_printf(mon, "Start full migration for %s\n",
  311:                            bs->device_name);
  312:         }
  313: 
  314:         QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
  315:     }
  316: }
  317: 
  318: static void init_blk_migration(Monitor *mon, QEMUFile *f)
  319: {
  320:     block_mig_state.submitted = 0;
  321:     block_mig_state.read_done = 0;
  322:     block_mig_state.transferred = 0;
  323:     block_mig_state.total_sector_sum = 0;
  324:     block_mig_state.prev_progress = -1;
  325:     block_mig_state.bulk_completed = 0;
  326:     block_mig_state.total_time = 0;
  327:     block_mig_state.reads = 0;
  328: 
  329:     bdrv_iterate(init_blk_migration_it, mon);
  330: }
  331: 
  332: static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f)
  333: {
  334:     int64_t completed_sector_sum = 0;
  335:     BlkMigDevState *bmds;
  336:     int progress;
  337:     int ret = 0;
  338: 
  339:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  340:         if (bmds->bulk_completed == 0) {
  341:             if (mig_save_device_bulk(mon, f, bmds) == 1) {
  342:                 /* completed bulk section for this device */
  343:                 bmds->bulk_completed = 1;
  344:             }
  345:             completed_sector_sum += bmds->completed_sectors;
  346:             ret = 1;
  347:             break;
  348:         } else {
  349:             completed_sector_sum += bmds->completed_sectors;
  350:         }
  351:     }
  352: 
  353:     if (block_mig_state.total_sector_sum != 0) {
  354:         progress = completed_sector_sum * 100 /
  355:                    block_mig_state.total_sector_sum;
  356:     } else {
  357:         progress = 100;
  358:     }
  359:     if (progress != block_mig_state.prev_progress) {
  360:         block_mig_state.prev_progress = progress;
  361:         qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
  362:                          | BLK_MIG_FLAG_PROGRESS);
  363:         monitor_printf(mon, "Completed %d %%\r", progress);
  364:         monitor_flush(mon);
  365:     }
  366: 
  367:     return ret;
  368: }
  369: 
  370: static void blk_mig_reset_dirty_cursor(void)
  371: {
  372:     BlkMigDevState *bmds;
  373: 
  374:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  375:         bmds->cur_dirty = 0;
  376:     }
  377: }
  378: 
  379: static int mig_save_device_dirty(Monitor *mon, QEMUFile *f,
  380:                                  BlkMigDevState *bmds, int is_async)
  381: {
  382:     BlkMigBlock *blk;
  383:     int64_t total_sectors = bmds->total_sectors;
  384:     int64_t sector;
  385:     int nr_sectors;
  386: 
  387:     for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
  388:         if (bmds_aio_inflight(bmds, sector)) {
  389:             qemu_aio_flush();
  390:         }
  391:         if (bdrv_get_dirty(bmds->bs, sector)) {
  392: 
  393:             if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
  394:                 nr_sectors = total_sectors - sector;
  395:             } else {
  396:                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
  397:             }
  398:             blk = qemu_malloc(sizeof(BlkMigBlock));
  399:             blk->buf = qemu_malloc(BLOCK_SIZE);
  400:             blk->bmds = bmds;
  401:             blk->sector = sector;
  402:             blk->nr_sectors = nr_sectors;
  403: 
  404:             if (is_async) {
  405:                 blk->iov.iov_base = blk->buf;
  406:                 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
  407:                 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
  408: 
  409:                 if (block_mig_state.submitted == 0) {
  410:                     block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);
  411:                 }
  412: 
  413:                 blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
  414:                                             nr_sectors, blk_mig_read_cb, blk);
  415:                 if (!blk->aiocb) {
  416:                     goto error;
  417:                 }
  418:                 block_mig_state.submitted++;
  419:                 bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
  420:             } else {
  421:                 if (bdrv_read(bmds->bs, sector, blk->buf,
  422:                               nr_sectors) < 0) {
  423:                     goto error;
  424:                 }
  425:                 blk_send(f, blk);
  426: 
  427:                 qemu_free(blk->buf);
  428:                 qemu_free(blk);
  429:             }
  430: 
  431:             bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
  432:             break;
  433:         }
  434:         sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
  435:         bmds->cur_dirty = sector;
  436:     }
  437: 
  438:     return (bmds->cur_dirty >= bmds->total_sectors);
  439: 
  440: error:
  441:     monitor_printf(mon, "Error reading sector %" PRId64 "\n", sector);
  442:     qemu_file_set_error(f);
  443:     qemu_free(blk->buf);
  444:     qemu_free(blk);
  445:     return 0;
  446: }
  447: 
  448: static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile *f, int is_async)
  449: {
  450:     BlkMigDevState *bmds;
  451:     int ret = 0;
  452: 
  453:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  454:         if (mig_save_device_dirty(mon, f, bmds, is_async) == 0) {
  455:             ret = 1;
  456:             break;
  457:         }
  458:     }
  459: 
  460:     return ret;
  461: }
  462: 
  463: static void flush_blks(QEMUFile* f)
  464: {
  465:     BlkMigBlock *blk;
  466: 
  467:     DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
  468:             __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
  469:             block_mig_state.transferred);
  470: 
  471:     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
  472:         if (qemu_file_rate_limit(f)) {
  473:             break;
  474:         }
  475:         if (blk->ret < 0) {
  476:             qemu_file_set_error(f);
  477:             break;
  478:         }
  479:         blk_send(f, blk);
  480: 
  481:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
  482:         qemu_free(blk->buf);
  483:         qemu_free(blk);
  484: 
  485:         block_mig_state.read_done--;
  486:         block_mig_state.transferred++;
  487:         assert(block_mig_state.read_done >= 0);
  488:     }
  489: 
  490:     DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
  491:             block_mig_state.submitted, block_mig_state.read_done,
  492:             block_mig_state.transferred);
  493: }
  494: 
  495: static int64_t get_remaining_dirty(void)
  496: {
  497:     BlkMigDevState *bmds;
  498:     int64_t dirty = 0;
  499: 
  500:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  501:         dirty += bdrv_get_dirty_count(bmds->bs);
  502:     }
  503: 
  504:     return dirty * BLOCK_SIZE;
  505: }
  506: 
  507: static int is_stage2_completed(void)
  508: {
  509:     int64_t remaining_dirty;
  510:     long double bwidth;
  511: 
  512:     if (block_mig_state.bulk_completed == 1) {
  513: 
  514:         remaining_dirty = get_remaining_dirty();
  515:         if (remaining_dirty == 0) {
  516:             return 1;
  517:         }
  518: 
  519:         bwidth = compute_read_bwidth();
  520: 
  521:         if ((remaining_dirty / bwidth) <=
  522:             migrate_max_downtime()) {
  523:             /* finish stage2 because we think that we can finish remaing work
  524:                below max_downtime */
  525: 
  526:             return 1;
  527:         }
  528:     }
  529: 
  530:     return 0;
  531: }
  532: 
  533: static void blk_mig_cleanup(Monitor *mon)
  534: {
  535:     BlkMigDevState *bmds;
  536:     BlkMigBlock *blk;
  537: 
  538:     set_dirty_tracking(0);
  539: 
  540:     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
  541:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
  542:         bdrv_set_in_use(bmds->bs, 0);
  543:         drive_put_ref(drive_get_by_blockdev(bmds->bs));
  544:         qemu_free(bmds->aio_bitmap);
  545:         qemu_free(bmds);
  546:     }
  547: 
  548:     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
  549:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
  550:         qemu_free(blk->buf);
  551:         qemu_free(blk);
  552:     }
  553: 
  554:     monitor_printf(mon, "\n");
  555: }
  556: 
  557: static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
  558: {
  559:     DPRINTF("Enter save live stage %d submitted %d transferred %d\n",
  560:             stage, block_mig_state.submitted, block_mig_state.transferred);
  561: 
  562:     if (stage < 0) {
  563:         blk_mig_cleanup(mon);
  564:         return 0;
  565:     }
  566: 
  567:     if (block_mig_state.blk_enable != 1) {
  568:         /* no need to migrate storage */
  569:         qemu_put_be64(f, BLK_MIG_FLAG_EOS);
  570:         return 1;
  571:     }
  572: 
  573:     if (stage == 1) {
  574:         init_blk_migration(mon, f);
  575: 
  576:         /* start track dirty blocks */
  577:         set_dirty_tracking(1);
  578:     }
  579: 
  580:     flush_blks(f);
  581: 
  582:     if (qemu_file_has_error(f)) {
  583:         blk_mig_cleanup(mon);
  584:         return 0;
  585:     }
  586: 
  587:     blk_mig_reset_dirty_cursor();
  588: 
  589:     if (stage == 2) {
  590:         /* control the rate of transfer */
  591:         while ((block_mig_state.submitted +
  592:                 block_mig_state.read_done) * BLOCK_SIZE <
  593:                qemu_file_get_rate_limit(f)) {
  594:             if (block_mig_state.bulk_completed == 0) {
  595:                 /* first finish the bulk phase */
  596:                 if (blk_mig_save_bulked_block(mon, f) == 0) {
  597:                     /* finished saving bulk on all devices */
  598:                     block_mig_state.bulk_completed = 1;
  599:                 }
  600:             } else {
  601:                 if (blk_mig_save_dirty_block(mon, f, 1) == 0) {
  602:                     /* no more dirty blocks */
  603:                     break;
  604:                 }
  605:             }
  606:         }
  607: 
  608:         flush_blks(f);
  609: 
  610:         if (qemu_file_has_error(f)) {
  611:             blk_mig_cleanup(mon);
  612:             return 0;
  613:         }
  614:     }
  615: 
  616:     if (stage == 3) {
  617:         /* we know for sure that save bulk is completed and
  618:            all async read completed */
  619:         assert(block_mig_state.submitted == 0);
  620: 
  621:         while (blk_mig_save_dirty_block(mon, f, 0) != 0);
  622:         blk_mig_cleanup(mon);
  623: 
  624:         /* report completion */
  625:         qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
  626: 
  627:         if (qemu_file_has_error(f)) {
  628:             return 0;
  629:         }
  630: 
  631:         monitor_printf(mon, "Block migration completed\n");
  632:     }
  633: 
  634:     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
  635: 
  636:     return ((stage == 2) && is_stage2_completed());
  637: }
  638: 
  639: static int block_load(QEMUFile *f, void *opaque, int version_id)
  640: {
  641:     static int banner_printed;
  642:     int len, flags;
  643:     char device_name[256];
  644:     int64_t addr;
  645:     BlockDriverState *bs, *bs_prev = NULL;
  646:     uint8_t *buf;
  647:     int64_t total_sectors = 0;
  648:     int nr_sectors;
  649: 
  650:     do {
  651:         addr = qemu_get_be64(f);
  652: 
  653:         flags = addr & ~BDRV_SECTOR_MASK;
  654:         addr >>= BDRV_SECTOR_BITS;
  655: 
  656:         if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
  657:             int ret;
  658:             /* get device name */
  659:             len = qemu_get_byte(f);
  660:             qemu_get_buffer(f, (uint8_t *)device_name, len);
  661:             device_name[len] = '\0';
  662: 
  663:             bs = bdrv_find(device_name);
  664:             if (!bs) {
  665:                 fprintf(stderr, "Error unknown block device %s\n",
  666:                         device_name);
  667:                 return -EINVAL;
  668:             }
  669: 
  670:             if (bs != bs_prev) {
  671:                 bs_prev = bs;
  672:                 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
  673:                 if (total_sectors <= 0) {
  674:                     error_report("Error getting length of block device %s",
  675:                                  device_name);
  676:                     return -EINVAL;
  677:                 }
  678:             }
  679: 
  680:             if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
  681:                 nr_sectors = total_sectors - addr;
  682:             } else {
  683:                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
  684:             }
  685: 
  686:             buf = qemu_malloc(BLOCK_SIZE);
  687: 
  688:             qemu_get_buffer(f, buf, BLOCK_SIZE);
  689:             ret = bdrv_write(bs, addr, buf, nr_sectors);
  690: 
  691:             qemu_free(buf);
  692:             if (ret < 0) {
  693:                 return ret;
  694:             }
  695:         } else if (flags & BLK_MIG_FLAG_PROGRESS) {
  696:             if (!banner_printed) {
  697:                 printf("Receiving block device images\n");
  698:                 banner_printed = 1;
  699:             }
  700:             printf("Completed %d %%%c", (int)addr,
  701:                    (addr == 100) ? '\n' : '\r');
  702:             fflush(stdout);
  703:         } else if (!(flags & BLK_MIG_FLAG_EOS)) {
  704:             fprintf(stderr, "Unknown flags\n");
  705:             return -EINVAL;
  706:         }
  707:         if (qemu_file_has_error(f)) {
  708:             return -EIO;
  709:         }
  710:     } while (!(flags & BLK_MIG_FLAG_EOS));
  711: 
  712:     return 0;
  713: }
  714: 
  715: static void block_set_params(int blk_enable, int shared_base, void *opaque)
  716: {
  717:     block_mig_state.blk_enable = blk_enable;
  718:     block_mig_state.shared_base = shared_base;
  719: 
  720:     /* shared base means that blk_enable = 1 */
  721:     block_mig_state.blk_enable |= shared_base;
  722: }
  723: 
  724: void blk_mig_init(void)
  725: {
  726:     QSIMPLEQ_INIT(&block_mig_state.bmds_list);
  727:     QSIMPLEQ_INIT(&block_mig_state.blk_list);
  728: 
  729:     register_savevm_live(NULL, "block", 0, 1, block_set_params,
  730:                          block_save_live, NULL, block_load, &block_mig_state);
  731: }

unix.superglobalmegacorp.com