Annotation of qemu/block-migration.c, revision 1.1.1.4

1.1       root        1: /*
                      2:  * QEMU live block migration
                      3:  *
                      4:  * Copyright IBM, Corp. 2009
                      5:  *
                      6:  * Authors:
                      7:  *  Liran Schour   <lirans@il.ibm.com>
                      8:  *
                      9:  * This work is licensed under the terms of the GNU GPL, version 2.  See
                     10:  * the COPYING file in the top-level directory.
                     11:  *
                     12:  */
                     13: 
                     14: #include "qemu-common.h"
                     15: #include "block_int.h"
                     16: #include "hw/hw.h"
                     17: #include "qemu-queue.h"
1.1.1.2   root       18: #include "qemu-timer.h"
1.1       root       19: #include "monitor.h"
                     20: #include "block-migration.h"
1.1.1.2   root       21: #include "migration.h"
1.1.1.3   root       22: #include "blockdev.h"
1.1       root       23: #include <assert.h>
                     24: 
                     25: #define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
                     26: 
                     27: #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
                     28: #define BLK_MIG_FLAG_EOS                0x02
                     29: #define BLK_MIG_FLAG_PROGRESS           0x04
                     30: 
                     31: #define MAX_IS_ALLOCATED_SEARCH 65536
                     32: 
                     33: //#define DEBUG_BLK_MIGRATION
                     34: 
                     35: #ifdef DEBUG_BLK_MIGRATION
1.1.1.2   root       36: #define DPRINTF(fmt, ...) \
1.1       root       37:     do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
                     38: #else
1.1.1.2   root       39: #define DPRINTF(fmt, ...) \
1.1       root       40:     do { } while (0)
                     41: #endif
                     42: 
                     43: typedef struct BlkMigDevState {
                     44:     BlockDriverState *bs;
                     45:     int bulk_completed;
                     46:     int shared_base;
                     47:     int64_t cur_sector;
1.1.1.2   root       48:     int64_t cur_dirty;
1.1       root       49:     int64_t completed_sectors;
                     50:     int64_t total_sectors;
                     51:     int64_t dirty;
                     52:     QSIMPLEQ_ENTRY(BlkMigDevState) entry;
1.1.1.3   root       53:     unsigned long *aio_bitmap;
1.1       root       54: } BlkMigDevState;
                     55: 
                     56: typedef struct BlkMigBlock {
                     57:     uint8_t *buf;
                     58:     BlkMigDevState *bmds;
                     59:     int64_t sector;
1.1.1.3   root       60:     int nr_sectors;
1.1       root       61:     struct iovec iov;
                     62:     QEMUIOVector qiov;
                     63:     BlockDriverAIOCB *aiocb;
                     64:     int ret;
                     65:     QSIMPLEQ_ENTRY(BlkMigBlock) entry;
                     66: } BlkMigBlock;
                     67: 
                     68: typedef struct BlkMigState {
                     69:     int blk_enable;
                     70:     int shared_base;
                     71:     QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
                     72:     QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
                     73:     int submitted;
                     74:     int read_done;
                     75:     int transferred;
                     76:     int64_t total_sector_sum;
                     77:     int prev_progress;
1.1.1.2   root       78:     int bulk_completed;
                     79:     long double total_time;
1.1.1.4 ! root       80:     long double prev_time_offset;
1.1.1.2   root       81:     int reads;
1.1       root       82: } BlkMigState;
                     83: 
                     84: static BlkMigState block_mig_state;
                     85: 
                     86: static void blk_send(QEMUFile *f, BlkMigBlock * blk)
                     87: {
                     88:     int len;
                     89: 
                     90:     /* sector number and flags */
                     91:     qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
                     92:                      | BLK_MIG_FLAG_DEVICE_BLOCK);
                     93: 
                     94:     /* device name */
                     95:     len = strlen(blk->bmds->bs->device_name);
                     96:     qemu_put_byte(f, len);
                     97:     qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
                     98: 
                     99:     qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
                    100: }
                    101: 
                    102: int blk_mig_active(void)
                    103: {
                    104:     return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
                    105: }
                    106: 
                    107: uint64_t blk_mig_bytes_transferred(void)
                    108: {
                    109:     BlkMigDevState *bmds;
                    110:     uint64_t sum = 0;
                    111: 
                    112:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    113:         sum += bmds->completed_sectors;
                    114:     }
                    115:     return sum << BDRV_SECTOR_BITS;
                    116: }
                    117: 
                    118: uint64_t blk_mig_bytes_remaining(void)
                    119: {
                    120:     return blk_mig_bytes_total() - blk_mig_bytes_transferred();
                    121: }
                    122: 
                    123: uint64_t blk_mig_bytes_total(void)
                    124: {
                    125:     BlkMigDevState *bmds;
                    126:     uint64_t sum = 0;
                    127: 
                    128:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    129:         sum += bmds->total_sectors;
                    130:     }
                    131:     return sum << BDRV_SECTOR_BITS;
                    132: }
                    133: 
1.1.1.2   root      134: static inline long double compute_read_bwidth(void)
                    135: {
                    136:     assert(block_mig_state.total_time != 0);
1.1.1.4 ! root      137:     return (block_mig_state.reads / block_mig_state.total_time) * BLOCK_SIZE;
1.1.1.2   root      138: }
                    139: 
1.1.1.3   root      140: static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
                    141: {
                    142:     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
                    143: 
                    144:     if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
                    145:         return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
                    146:             (1UL << (chunk % (sizeof(unsigned long) * 8))));
                    147:     } else {
                    148:         return 0;
                    149:     }
                    150: }
                    151: 
                    152: static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
                    153:                              int nb_sectors, int set)
                    154: {
                    155:     int64_t start, end;
                    156:     unsigned long val, idx, bit;
                    157: 
                    158:     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
                    159:     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
                    160: 
                    161:     for (; start <= end; start++) {
                    162:         idx = start / (sizeof(unsigned long) * 8);
                    163:         bit = start % (sizeof(unsigned long) * 8);
                    164:         val = bmds->aio_bitmap[idx];
                    165:         if (set) {
                    166:             val |= 1UL << bit;
                    167:         } else {
                    168:             val &= ~(1UL << bit);
                    169:         }
                    170:         bmds->aio_bitmap[idx] = val;
                    171:     }
                    172: }
                    173: 
                    174: static void alloc_aio_bitmap(BlkMigDevState *bmds)
                    175: {
                    176:     BlockDriverState *bs = bmds->bs;
                    177:     int64_t bitmap_size;
                    178: 
                    179:     bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
                    180:             BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
                    181:     bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
                    182: 
                    183:     bmds->aio_bitmap = qemu_mallocz(bitmap_size);
                    184: }
                    185: 
1.1       root      186: static void blk_mig_read_cb(void *opaque, int ret)
                    187: {
1.1.1.4 ! root      188:     long double curr_time = qemu_get_clock_ns(rt_clock);
1.1       root      189:     BlkMigBlock *blk = opaque;
                    190: 
                    191:     blk->ret = ret;
                    192: 
1.1.1.4 ! root      193:     block_mig_state.reads++;
        !           194:     block_mig_state.total_time += (curr_time - block_mig_state.prev_time_offset);
        !           195:     block_mig_state.prev_time_offset = curr_time;
1.1.1.2   root      196: 
1.1       root      197:     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
1.1.1.3   root      198:     bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
1.1       root      199: 
                    200:     block_mig_state.submitted--;
                    201:     block_mig_state.read_done++;
                    202:     assert(block_mig_state.submitted >= 0);
                    203: }
                    204: 
                    205: static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
1.1.1.2   root      206:                                 BlkMigDevState *bmds)
1.1       root      207: {
                    208:     int64_t total_sectors = bmds->total_sectors;
                    209:     int64_t cur_sector = bmds->cur_sector;
                    210:     BlockDriverState *bs = bmds->bs;
                    211:     BlkMigBlock *blk;
                    212:     int nr_sectors;
                    213: 
                    214:     if (bmds->shared_base) {
                    215:         while (cur_sector < total_sectors &&
                    216:                !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
                    217:                                   &nr_sectors)) {
                    218:             cur_sector += nr_sectors;
                    219:         }
                    220:     }
                    221: 
                    222:     if (cur_sector >= total_sectors) {
                    223:         bmds->cur_sector = bmds->completed_sectors = total_sectors;
                    224:         return 1;
                    225:     }
                    226: 
                    227:     bmds->completed_sectors = cur_sector;
                    228: 
                    229:     cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
                    230: 
                    231:     /* we are going to transfer a full block even if it is not allocated */
                    232:     nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
                    233: 
                    234:     if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
                    235:         nr_sectors = total_sectors - cur_sector;
                    236:     }
                    237: 
                    238:     blk = qemu_malloc(sizeof(BlkMigBlock));
                    239:     blk->buf = qemu_malloc(BLOCK_SIZE);
                    240:     blk->bmds = bmds;
                    241:     blk->sector = cur_sector;
1.1.1.3   root      242:     blk->nr_sectors = nr_sectors;
1.1       root      243: 
1.1.1.2   root      244:     blk->iov.iov_base = blk->buf;
                    245:     blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
                    246:     qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
                    247: 
1.1.1.4 ! root      248:     if (block_mig_state.submitted == 0) {
        !           249:         block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);
        !           250:     }
1.1.1.2   root      251: 
                    252:     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
                    253:                                 nr_sectors, blk_mig_read_cb, blk);
                    254:     if (!blk->aiocb) {
                    255:         goto error;
1.1       root      256:     }
1.1.1.2   root      257:     block_mig_state.submitted++;
1.1       root      258: 
                    259:     bdrv_reset_dirty(bs, cur_sector, nr_sectors);
                    260:     bmds->cur_sector = cur_sector + nr_sectors;
                    261: 
                    262:     return (bmds->cur_sector >= total_sectors);
                    263: 
                    264: error:
                    265:     monitor_printf(mon, "Error reading sector %" PRId64 "\n", cur_sector);
                    266:     qemu_file_set_error(f);
                    267:     qemu_free(blk->buf);
                    268:     qemu_free(blk);
                    269:     return 0;
                    270: }
                    271: 
                    272: static void set_dirty_tracking(int enable)
                    273: {
                    274:     BlkMigDevState *bmds;
                    275: 
                    276:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    277:         bdrv_set_dirty_tracking(bmds->bs, enable);
                    278:     }
                    279: }
                    280: 
1.1.1.2   root      281: static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
1.1       root      282: {
1.1.1.2   root      283:     Monitor *mon = opaque;
1.1       root      284:     BlkMigDevState *bmds;
                    285:     int64_t sectors;
                    286: 
1.1.1.2   root      287:     if (!bdrv_is_read_only(bs)) {
                    288:         sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
                    289:         if (sectors <= 0) {
                    290:             return;
                    291:         }
                    292: 
                    293:         bmds = qemu_mallocz(sizeof(BlkMigDevState));
                    294:         bmds->bs = bs;
                    295:         bmds->bulk_completed = 0;
                    296:         bmds->total_sectors = sectors;
                    297:         bmds->completed_sectors = 0;
                    298:         bmds->shared_base = block_mig_state.shared_base;
1.1.1.3   root      299:         alloc_aio_bitmap(bmds);
                    300:         drive_get_ref(drive_get_by_blockdev(bs));
                    301:         bdrv_set_in_use(bs, 1);
1.1.1.2   root      302: 
                    303:         block_mig_state.total_sector_sum += sectors;
                    304: 
                    305:         if (bmds->shared_base) {
                    306:             monitor_printf(mon, "Start migration for %s with shared base "
                    307:                                 "image\n",
                    308:                            bs->device_name);
                    309:         } else {
                    310:             monitor_printf(mon, "Start full migration for %s\n",
                    311:                            bs->device_name);
                    312:         }
                    313: 
                    314:         QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
                    315:     }
                    316: }
                    317: 
                    318: static void init_blk_migration(Monitor *mon, QEMUFile *f)
                    319: {
1.1       root      320:     block_mig_state.submitted = 0;
                    321:     block_mig_state.read_done = 0;
                    322:     block_mig_state.transferred = 0;
                    323:     block_mig_state.total_sector_sum = 0;
                    324:     block_mig_state.prev_progress = -1;
1.1.1.2   root      325:     block_mig_state.bulk_completed = 0;
                    326:     block_mig_state.total_time = 0;
                    327:     block_mig_state.reads = 0;
1.1       root      328: 
1.1.1.2   root      329:     bdrv_iterate(init_blk_migration_it, mon);
1.1       root      330: }
                    331: 
1.1.1.2   root      332: static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f)
1.1       root      333: {
                    334:     int64_t completed_sector_sum = 0;
                    335:     BlkMigDevState *bmds;
                    336:     int progress;
                    337:     int ret = 0;
                    338: 
                    339:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    340:         if (bmds->bulk_completed == 0) {
1.1.1.2   root      341:             if (mig_save_device_bulk(mon, f, bmds) == 1) {
1.1       root      342:                 /* completed bulk section for this device */
                    343:                 bmds->bulk_completed = 1;
                    344:             }
                    345:             completed_sector_sum += bmds->completed_sectors;
                    346:             ret = 1;
                    347:             break;
                    348:         } else {
                    349:             completed_sector_sum += bmds->completed_sectors;
                    350:         }
                    351:     }
                    352: 
1.1.1.3   root      353:     if (block_mig_state.total_sector_sum != 0) {
                    354:         progress = completed_sector_sum * 100 /
                    355:                    block_mig_state.total_sector_sum;
                    356:     } else {
                    357:         progress = 100;
                    358:     }
1.1       root      359:     if (progress != block_mig_state.prev_progress) {
                    360:         block_mig_state.prev_progress = progress;
                    361:         qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
                    362:                          | BLK_MIG_FLAG_PROGRESS);
                    363:         monitor_printf(mon, "Completed %d %%\r", progress);
                    364:         monitor_flush(mon);
                    365:     }
                    366: 
                    367:     return ret;
                    368: }
                    369: 
1.1.1.2   root      370: static void blk_mig_reset_dirty_cursor(void)
1.1       root      371: {
                    372:     BlkMigDevState *bmds;
1.1.1.2   root      373: 
                    374:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    375:         bmds->cur_dirty = 0;
                    376:     }
                    377: }
                    378: 
                    379: static int mig_save_device_dirty(Monitor *mon, QEMUFile *f,
                    380:                                  BlkMigDevState *bmds, int is_async)
                    381: {
                    382:     BlkMigBlock *blk;
                    383:     int64_t total_sectors = bmds->total_sectors;
1.1       root      384:     int64_t sector;
1.1.1.2   root      385:     int nr_sectors;
1.1       root      386: 
1.1.1.2   root      387:     for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
1.1.1.3   root      388:         if (bmds_aio_inflight(bmds, sector)) {
                    389:             qemu_aio_flush();
                    390:         }
1.1.1.2   root      391:         if (bdrv_get_dirty(bmds->bs, sector)) {
1.1       root      392: 
1.1.1.2   root      393:             if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
                    394:                 nr_sectors = total_sectors - sector;
                    395:             } else {
                    396:                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
                    397:             }
                    398:             blk = qemu_malloc(sizeof(BlkMigBlock));
                    399:             blk->buf = qemu_malloc(BLOCK_SIZE);
                    400:             blk->bmds = bmds;
                    401:             blk->sector = sector;
1.1.1.3   root      402:             blk->nr_sectors = nr_sectors;
1.1.1.2   root      403: 
                    404:             if (is_async) {
                    405:                 blk->iov.iov_base = blk->buf;
                    406:                 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
                    407:                 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
                    408: 
1.1.1.4 ! root      409:                 if (block_mig_state.submitted == 0) {
        !           410:                     block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);
        !           411:                 }
1.1.1.2   root      412: 
                    413:                 blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
                    414:                                             nr_sectors, blk_mig_read_cb, blk);
                    415:                 if (!blk->aiocb) {
                    416:                     goto error;
1.1       root      417:                 }
1.1.1.2   root      418:                 block_mig_state.submitted++;
1.1.1.3   root      419:                 bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
1.1.1.2   root      420:             } else {
                    421:                 if (bdrv_read(bmds->bs, sector, blk->buf,
                    422:                               nr_sectors) < 0) {
                    423:                     goto error;
                    424:                 }
                    425:                 blk_send(f, blk);
1.1       root      426: 
1.1.1.2   root      427:                 qemu_free(blk->buf);
                    428:                 qemu_free(blk);
1.1       root      429:             }
1.1.1.2   root      430: 
                    431:             bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
                    432:             break;
1.1       root      433:         }
1.1.1.2   root      434:         sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
                    435:         bmds->cur_dirty = sector;
1.1       root      436:     }
                    437: 
1.1.1.2   root      438:     return (bmds->cur_dirty >= bmds->total_sectors);
                    439: 
                    440: error:
                    441:     monitor_printf(mon, "Error reading sector %" PRId64 "\n", sector);
                    442:     qemu_file_set_error(f);
                    443:     qemu_free(blk->buf);
                    444:     qemu_free(blk);
                    445:     return 0;
                    446: }
                    447: 
                    448: static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile *f, int is_async)
                    449: {
                    450:     BlkMigDevState *bmds;
                    451:     int ret = 0;
                    452: 
                    453:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    454:         if (mig_save_device_dirty(mon, f, bmds, is_async) == 0) {
                    455:             ret = 1;
                    456:             break;
                    457:         }
                    458:     }
                    459: 
                    460:     return ret;
1.1       root      461: }
                    462: 
                    463: static void flush_blks(QEMUFile* f)
                    464: {
                    465:     BlkMigBlock *blk;
                    466: 
1.1.1.2   root      467:     DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
1.1       root      468:             __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
                    469:             block_mig_state.transferred);
                    470: 
                    471:     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
                    472:         if (qemu_file_rate_limit(f)) {
                    473:             break;
                    474:         }
                    475:         if (blk->ret < 0) {
                    476:             qemu_file_set_error(f);
                    477:             break;
                    478:         }
                    479:         blk_send(f, blk);
                    480: 
                    481:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
                    482:         qemu_free(blk->buf);
                    483:         qemu_free(blk);
                    484: 
                    485:         block_mig_state.read_done--;
                    486:         block_mig_state.transferred++;
                    487:         assert(block_mig_state.read_done >= 0);
                    488:     }
                    489: 
1.1.1.2   root      490:     DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
1.1       root      491:             block_mig_state.submitted, block_mig_state.read_done,
                    492:             block_mig_state.transferred);
                    493: }
                    494: 
1.1.1.2   root      495: static int64_t get_remaining_dirty(void)
1.1       root      496: {
                    497:     BlkMigDevState *bmds;
1.1.1.2   root      498:     int64_t dirty = 0;
1.1       root      499: 
1.1.1.2   root      500:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    501:         dirty += bdrv_get_dirty_count(bmds->bs);
1.1       root      502:     }
                    503: 
1.1.1.2   root      504:     return dirty * BLOCK_SIZE;
                    505: }
                    506: 
                    507: static int is_stage2_completed(void)
                    508: {
                    509:     int64_t remaining_dirty;
                    510:     long double bwidth;
                    511: 
                    512:     if (block_mig_state.bulk_completed == 1) {
                    513: 
                    514:         remaining_dirty = get_remaining_dirty();
1.1.1.3   root      515:         if (remaining_dirty == 0) {
                    516:             return 1;
                    517:         }
1.1.1.2   root      518: 
1.1.1.3   root      519:         bwidth = compute_read_bwidth();
1.1.1.2   root      520: 
1.1.1.3   root      521:         if ((remaining_dirty / bwidth) <=
1.1.1.2   root      522:             migrate_max_downtime()) {
                    523:             /* finish stage2 because we think that we can finish remaing work
                    524:                below max_downtime */
                    525: 
                    526:             return 1;
1.1       root      527:         }
                    528:     }
                    529: 
1.1.1.2   root      530:     return 0;
1.1       root      531: }
                    532: 
                    533: static void blk_mig_cleanup(Monitor *mon)
                    534: {
                    535:     BlkMigDevState *bmds;
                    536:     BlkMigBlock *blk;
                    537: 
1.1.1.3   root      538:     set_dirty_tracking(0);
                    539: 
1.1       root      540:     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
                    541:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
1.1.1.3   root      542:         bdrv_set_in_use(bmds->bs, 0);
                    543:         drive_put_ref(drive_get_by_blockdev(bmds->bs));
                    544:         qemu_free(bmds->aio_bitmap);
1.1       root      545:         qemu_free(bmds);
                    546:     }
                    547: 
                    548:     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
                    549:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
                    550:         qemu_free(blk->buf);
                    551:         qemu_free(blk);
                    552:     }
                    553: 
                    554:     monitor_printf(mon, "\n");
                    555: }
                    556: 
                    557: static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
                    558: {
1.1.1.2   root      559:     DPRINTF("Enter save live stage %d submitted %d transferred %d\n",
1.1       root      560:             stage, block_mig_state.submitted, block_mig_state.transferred);
                    561: 
                    562:     if (stage < 0) {
                    563:         blk_mig_cleanup(mon);
                    564:         return 0;
                    565:     }
                    566: 
                    567:     if (block_mig_state.blk_enable != 1) {
                    568:         /* no need to migrate storage */
                    569:         qemu_put_be64(f, BLK_MIG_FLAG_EOS);
                    570:         return 1;
                    571:     }
                    572: 
                    573:     if (stage == 1) {
                    574:         init_blk_migration(mon, f);
                    575: 
                    576:         /* start track dirty blocks */
                    577:         set_dirty_tracking(1);
                    578:     }
                    579: 
                    580:     flush_blks(f);
                    581: 
                    582:     if (qemu_file_has_error(f)) {
                    583:         blk_mig_cleanup(mon);
                    584:         return 0;
                    585:     }
                    586: 
1.1.1.2   root      587:     blk_mig_reset_dirty_cursor();
                    588: 
                    589:     if (stage == 2) {
                    590:         /* control the rate of transfer */
                    591:         while ((block_mig_state.submitted +
                    592:                 block_mig_state.read_done) * BLOCK_SIZE <
                    593:                qemu_file_get_rate_limit(f)) {
                    594:             if (block_mig_state.bulk_completed == 0) {
                    595:                 /* first finish the bulk phase */
                    596:                 if (blk_mig_save_bulked_block(mon, f) == 0) {
                    597:                     /* finished saving bulk on all devices */
                    598:                     block_mig_state.bulk_completed = 1;
                    599:                 }
                    600:             } else {
                    601:                 if (blk_mig_save_dirty_block(mon, f, 1) == 0) {
                    602:                     /* no more dirty blocks */
                    603:                     break;
                    604:                 }
                    605:             }
1.1       root      606:         }
                    607: 
1.1.1.2   root      608:         flush_blks(f);
1.1       root      609: 
1.1.1.2   root      610:         if (qemu_file_has_error(f)) {
                    611:             blk_mig_cleanup(mon);
                    612:             return 0;
                    613:         }
1.1       root      614:     }
                    615: 
                    616:     if (stage == 3) {
1.1.1.2   root      617:         /* we know for sure that save bulk is completed and
                    618:            all async read completed */
                    619:         assert(block_mig_state.submitted == 0);
1.1       root      620: 
1.1.1.2   root      621:         while (blk_mig_save_dirty_block(mon, f, 0) != 0);
1.1       root      622:         blk_mig_cleanup(mon);
                    623: 
                    624:         /* report completion */
                    625:         qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
                    626: 
                    627:         if (qemu_file_has_error(f)) {
                    628:             return 0;
                    629:         }
                    630: 
                    631:         monitor_printf(mon, "Block migration completed\n");
                    632:     }
                    633: 
                    634:     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
                    635: 
                    636:     return ((stage == 2) && is_stage2_completed());
                    637: }
                    638: 
                    639: static int block_load(QEMUFile *f, void *opaque, int version_id)
                    640: {
                    641:     static int banner_printed;
                    642:     int len, flags;
                    643:     char device_name[256];
                    644:     int64_t addr;
1.1.1.3   root      645:     BlockDriverState *bs, *bs_prev = NULL;
1.1       root      646:     uint8_t *buf;
1.1.1.3   root      647:     int64_t total_sectors = 0;
                    648:     int nr_sectors;
1.1       root      649: 
                    650:     do {
                    651:         addr = qemu_get_be64(f);
                    652: 
                    653:         flags = addr & ~BDRV_SECTOR_MASK;
                    654:         addr >>= BDRV_SECTOR_BITS;
                    655: 
                    656:         if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
1.1.1.2   root      657:             int ret;
1.1       root      658:             /* get device name */
                    659:             len = qemu_get_byte(f);
                    660:             qemu_get_buffer(f, (uint8_t *)device_name, len);
                    661:             device_name[len] = '\0';
                    662: 
                    663:             bs = bdrv_find(device_name);
                    664:             if (!bs) {
                    665:                 fprintf(stderr, "Error unknown block device %s\n",
                    666:                         device_name);
                    667:                 return -EINVAL;
                    668:             }
                    669: 
1.1.1.3   root      670:             if (bs != bs_prev) {
                    671:                 bs_prev = bs;
                    672:                 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
                    673:                 if (total_sectors <= 0) {
1.1.1.4 ! root      674:                     error_report("Error getting length of block device %s",
1.1.1.3   root      675:                                  device_name);
                    676:                     return -EINVAL;
                    677:                 }
                    678:             }
                    679: 
                    680:             if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
                    681:                 nr_sectors = total_sectors - addr;
                    682:             } else {
                    683:                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
                    684:             }
                    685: 
1.1       root      686:             buf = qemu_malloc(BLOCK_SIZE);
                    687: 
                    688:             qemu_get_buffer(f, buf, BLOCK_SIZE);
1.1.1.3   root      689:             ret = bdrv_write(bs, addr, buf, nr_sectors);
1.1       root      690: 
                    691:             qemu_free(buf);
1.1.1.2   root      692:             if (ret < 0) {
                    693:                 return ret;
                    694:             }
1.1       root      695:         } else if (flags & BLK_MIG_FLAG_PROGRESS) {
                    696:             if (!banner_printed) {
                    697:                 printf("Receiving block device images\n");
                    698:                 banner_printed = 1;
                    699:             }
                    700:             printf("Completed %d %%%c", (int)addr,
                    701:                    (addr == 100) ? '\n' : '\r');
                    702:             fflush(stdout);
                    703:         } else if (!(flags & BLK_MIG_FLAG_EOS)) {
                    704:             fprintf(stderr, "Unknown flags\n");
                    705:             return -EINVAL;
                    706:         }
                    707:         if (qemu_file_has_error(f)) {
                    708:             return -EIO;
                    709:         }
                    710:     } while (!(flags & BLK_MIG_FLAG_EOS));
                    711: 
                    712:     return 0;
                    713: }
                    714: 
                    715: static void block_set_params(int blk_enable, int shared_base, void *opaque)
                    716: {
                    717:     block_mig_state.blk_enable = blk_enable;
                    718:     block_mig_state.shared_base = shared_base;
                    719: 
                    720:     /* shared base means that blk_enable = 1 */
                    721:     block_mig_state.blk_enable |= shared_base;
                    722: }
                    723: 
                    724: void blk_mig_init(void)
                    725: {
                    726:     QSIMPLEQ_INIT(&block_mig_state.bmds_list);
                    727:     QSIMPLEQ_INIT(&block_mig_state.blk_list);
                    728: 
1.1.1.2   root      729:     register_savevm_live(NULL, "block", 0, 1, block_set_params,
                    730:                          block_save_live, NULL, block_load, &block_mig_state);
1.1       root      731: }

unix.superglobalmegacorp.com