Annotation of qemu/block-migration.c, revision 1.1.1.6

1.1       root        1: /*
                      2:  * QEMU live block migration
                      3:  *
                      4:  * Copyright IBM, Corp. 2009
                      5:  *
                      6:  * Authors:
                      7:  *  Liran Schour   <lirans@il.ibm.com>
                      8:  *
                      9:  * This work is licensed under the terms of the GNU GPL, version 2.  See
                     10:  * the COPYING file in the top-level directory.
                     11:  *
1.1.1.6 ! root       12:  * Contributions after 2012-01-13 are licensed under the terms of the
        !            13:  * GNU GPL, version 2 or (at your option) any later version.
1.1       root       14:  */
                     15: 
                     16: #include "qemu-common.h"
                     17: #include "block_int.h"
                     18: #include "hw/hw.h"
                     19: #include "qemu-queue.h"
1.1.1.2   root       20: #include "qemu-timer.h"
1.1       root       21: #include "block-migration.h"
1.1.1.2   root       22: #include "migration.h"
1.1.1.3   root       23: #include "blockdev.h"
1.1       root       24: #include <assert.h>
                     25: 
                     26: #define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
                     27: 
                     28: #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
                     29: #define BLK_MIG_FLAG_EOS                0x02
                     30: #define BLK_MIG_FLAG_PROGRESS           0x04
                     31: 
                     32: #define MAX_IS_ALLOCATED_SEARCH 65536
                     33: 
                     34: //#define DEBUG_BLK_MIGRATION
                     35: 
                     36: #ifdef DEBUG_BLK_MIGRATION
1.1.1.2   root       37: #define DPRINTF(fmt, ...) \
1.1       root       38:     do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
                     39: #else
1.1.1.2   root       40: #define DPRINTF(fmt, ...) \
1.1       root       41:     do { } while (0)
                     42: #endif
                     43: 
                     44: typedef struct BlkMigDevState {
                     45:     BlockDriverState *bs;
                     46:     int bulk_completed;
                     47:     int shared_base;
                     48:     int64_t cur_sector;
1.1.1.2   root       49:     int64_t cur_dirty;
1.1       root       50:     int64_t completed_sectors;
                     51:     int64_t total_sectors;
                     52:     int64_t dirty;
                     53:     QSIMPLEQ_ENTRY(BlkMigDevState) entry;
1.1.1.3   root       54:     unsigned long *aio_bitmap;
1.1       root       55: } BlkMigDevState;
                     56: 
                     57: typedef struct BlkMigBlock {
                     58:     uint8_t *buf;
                     59:     BlkMigDevState *bmds;
                     60:     int64_t sector;
1.1.1.3   root       61:     int nr_sectors;
1.1       root       62:     struct iovec iov;
                     63:     QEMUIOVector qiov;
                     64:     BlockDriverAIOCB *aiocb;
                     65:     int ret;
                     66:     QSIMPLEQ_ENTRY(BlkMigBlock) entry;
                     67: } BlkMigBlock;
                     68: 
                     69: typedef struct BlkMigState {
                     70:     int blk_enable;
                     71:     int shared_base;
                     72:     QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
                     73:     QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
                     74:     int submitted;
                     75:     int read_done;
                     76:     int transferred;
                     77:     int64_t total_sector_sum;
                     78:     int prev_progress;
1.1.1.2   root       79:     int bulk_completed;
                     80:     long double total_time;
1.1.1.4   root       81:     long double prev_time_offset;
1.1.1.2   root       82:     int reads;
1.1       root       83: } BlkMigState;
                     84: 
                     85: static BlkMigState block_mig_state;
                     86: 
                     87: static void blk_send(QEMUFile *f, BlkMigBlock * blk)
                     88: {
                     89:     int len;
                     90: 
                     91:     /* sector number and flags */
                     92:     qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
                     93:                      | BLK_MIG_FLAG_DEVICE_BLOCK);
                     94: 
                     95:     /* device name */
                     96:     len = strlen(blk->bmds->bs->device_name);
                     97:     qemu_put_byte(f, len);
                     98:     qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
                     99: 
                    100:     qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
                    101: }
                    102: 
                    103: int blk_mig_active(void)
                    104: {
                    105:     return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
                    106: }
                    107: 
                    108: uint64_t blk_mig_bytes_transferred(void)
                    109: {
                    110:     BlkMigDevState *bmds;
                    111:     uint64_t sum = 0;
                    112: 
                    113:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    114:         sum += bmds->completed_sectors;
                    115:     }
                    116:     return sum << BDRV_SECTOR_BITS;
                    117: }
                    118: 
                    119: uint64_t blk_mig_bytes_remaining(void)
                    120: {
                    121:     return blk_mig_bytes_total() - blk_mig_bytes_transferred();
                    122: }
                    123: 
                    124: uint64_t blk_mig_bytes_total(void)
                    125: {
                    126:     BlkMigDevState *bmds;
                    127:     uint64_t sum = 0;
                    128: 
                    129:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    130:         sum += bmds->total_sectors;
                    131:     }
                    132:     return sum << BDRV_SECTOR_BITS;
                    133: }
                    134: 
1.1.1.2   root      135: static inline long double compute_read_bwidth(void)
                    136: {
                    137:     assert(block_mig_state.total_time != 0);
1.1.1.4   root      138:     return (block_mig_state.reads / block_mig_state.total_time) * BLOCK_SIZE;
1.1.1.2   root      139: }
                    140: 
1.1.1.3   root      141: static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
                    142: {
                    143:     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
                    144: 
                    145:     if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
                    146:         return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
                    147:             (1UL << (chunk % (sizeof(unsigned long) * 8))));
                    148:     } else {
                    149:         return 0;
                    150:     }
                    151: }
                    152: 
                    153: static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
                    154:                              int nb_sectors, int set)
                    155: {
                    156:     int64_t start, end;
                    157:     unsigned long val, idx, bit;
                    158: 
                    159:     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
                    160:     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
                    161: 
                    162:     for (; start <= end; start++) {
                    163:         idx = start / (sizeof(unsigned long) * 8);
                    164:         bit = start % (sizeof(unsigned long) * 8);
                    165:         val = bmds->aio_bitmap[idx];
                    166:         if (set) {
                    167:             val |= 1UL << bit;
                    168:         } else {
                    169:             val &= ~(1UL << bit);
                    170:         }
                    171:         bmds->aio_bitmap[idx] = val;
                    172:     }
                    173: }
                    174: 
                    175: static void alloc_aio_bitmap(BlkMigDevState *bmds)
                    176: {
                    177:     BlockDriverState *bs = bmds->bs;
                    178:     int64_t bitmap_size;
                    179: 
                    180:     bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
                    181:             BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
                    182:     bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
                    183: 
1.1.1.5   root      184:     bmds->aio_bitmap = g_malloc0(bitmap_size);
1.1.1.3   root      185: }
                    186: 
1.1       root      187: static void blk_mig_read_cb(void *opaque, int ret)
                    188: {
1.1.1.4   root      189:     long double curr_time = qemu_get_clock_ns(rt_clock);
1.1       root      190:     BlkMigBlock *blk = opaque;
                    191: 
                    192:     blk->ret = ret;
                    193: 
1.1.1.4   root      194:     block_mig_state.reads++;
                    195:     block_mig_state.total_time += (curr_time - block_mig_state.prev_time_offset);
                    196:     block_mig_state.prev_time_offset = curr_time;
1.1.1.2   root      197: 
1.1       root      198:     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
1.1.1.3   root      199:     bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
1.1       root      200: 
                    201:     block_mig_state.submitted--;
                    202:     block_mig_state.read_done++;
                    203:     assert(block_mig_state.submitted >= 0);
                    204: }
                    205: 
1.1.1.6 ! root      206: static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
1.1       root      207: {
                    208:     int64_t total_sectors = bmds->total_sectors;
                    209:     int64_t cur_sector = bmds->cur_sector;
                    210:     BlockDriverState *bs = bmds->bs;
                    211:     BlkMigBlock *blk;
                    212:     int nr_sectors;
                    213: 
                    214:     if (bmds->shared_base) {
                    215:         while (cur_sector < total_sectors &&
                    216:                !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
                    217:                                   &nr_sectors)) {
                    218:             cur_sector += nr_sectors;
                    219:         }
                    220:     }
                    221: 
                    222:     if (cur_sector >= total_sectors) {
                    223:         bmds->cur_sector = bmds->completed_sectors = total_sectors;
                    224:         return 1;
                    225:     }
                    226: 
                    227:     bmds->completed_sectors = cur_sector;
                    228: 
                    229:     cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
                    230: 
                    231:     /* we are going to transfer a full block even if it is not allocated */
                    232:     nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
                    233: 
                    234:     if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
                    235:         nr_sectors = total_sectors - cur_sector;
                    236:     }
                    237: 
1.1.1.5   root      238:     blk = g_malloc(sizeof(BlkMigBlock));
                    239:     blk->buf = g_malloc(BLOCK_SIZE);
1.1       root      240:     blk->bmds = bmds;
                    241:     blk->sector = cur_sector;
1.1.1.3   root      242:     blk->nr_sectors = nr_sectors;
1.1       root      243: 
1.1.1.2   root      244:     blk->iov.iov_base = blk->buf;
                    245:     blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
                    246:     qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
                    247: 
1.1.1.4   root      248:     if (block_mig_state.submitted == 0) {
                    249:         block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);
                    250:     }
1.1.1.2   root      251: 
                    252:     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
                    253:                                 nr_sectors, blk_mig_read_cb, blk);
                    254:     block_mig_state.submitted++;
1.1       root      255: 
                    256:     bdrv_reset_dirty(bs, cur_sector, nr_sectors);
                    257:     bmds->cur_sector = cur_sector + nr_sectors;
                    258: 
                    259:     return (bmds->cur_sector >= total_sectors);
                    260: }
                    261: 
                    262: static void set_dirty_tracking(int enable)
                    263: {
                    264:     BlkMigDevState *bmds;
                    265: 
                    266:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    267:         bdrv_set_dirty_tracking(bmds->bs, enable);
                    268:     }
                    269: }
                    270: 
1.1.1.2   root      271: static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
1.1       root      272: {
                    273:     BlkMigDevState *bmds;
                    274:     int64_t sectors;
                    275: 
1.1.1.2   root      276:     if (!bdrv_is_read_only(bs)) {
                    277:         sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
                    278:         if (sectors <= 0) {
                    279:             return;
                    280:         }
                    281: 
1.1.1.5   root      282:         bmds = g_malloc0(sizeof(BlkMigDevState));
1.1.1.2   root      283:         bmds->bs = bs;
                    284:         bmds->bulk_completed = 0;
                    285:         bmds->total_sectors = sectors;
                    286:         bmds->completed_sectors = 0;
                    287:         bmds->shared_base = block_mig_state.shared_base;
1.1.1.3   root      288:         alloc_aio_bitmap(bmds);
                    289:         drive_get_ref(drive_get_by_blockdev(bs));
                    290:         bdrv_set_in_use(bs, 1);
1.1.1.2   root      291: 
                    292:         block_mig_state.total_sector_sum += sectors;
                    293: 
                    294:         if (bmds->shared_base) {
1.1.1.6 ! root      295:             DPRINTF("Start migration for %s with shared base image\n",
        !           296:                     bs->device_name);
1.1.1.2   root      297:         } else {
1.1.1.6 ! root      298:             DPRINTF("Start full migration for %s\n", bs->device_name);
1.1.1.2   root      299:         }
                    300: 
                    301:         QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
                    302:     }
                    303: }
                    304: 
1.1.1.6 ! root      305: static void init_blk_migration(QEMUFile *f)
1.1.1.2   root      306: {
1.1       root      307:     block_mig_state.submitted = 0;
                    308:     block_mig_state.read_done = 0;
                    309:     block_mig_state.transferred = 0;
                    310:     block_mig_state.total_sector_sum = 0;
                    311:     block_mig_state.prev_progress = -1;
1.1.1.2   root      312:     block_mig_state.bulk_completed = 0;
                    313:     block_mig_state.total_time = 0;
                    314:     block_mig_state.reads = 0;
1.1       root      315: 
1.1.1.6 ! root      316:     bdrv_iterate(init_blk_migration_it, NULL);
1.1       root      317: }
                    318: 
1.1.1.6 ! root      319: static int blk_mig_save_bulked_block(QEMUFile *f)
1.1       root      320: {
                    321:     int64_t completed_sector_sum = 0;
                    322:     BlkMigDevState *bmds;
                    323:     int progress;
                    324:     int ret = 0;
                    325: 
                    326:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    327:         if (bmds->bulk_completed == 0) {
1.1.1.6 ! root      328:             if (mig_save_device_bulk(f, bmds) == 1) {
1.1       root      329:                 /* completed bulk section for this device */
                    330:                 bmds->bulk_completed = 1;
                    331:             }
                    332:             completed_sector_sum += bmds->completed_sectors;
                    333:             ret = 1;
                    334:             break;
                    335:         } else {
                    336:             completed_sector_sum += bmds->completed_sectors;
                    337:         }
                    338:     }
                    339: 
1.1.1.3   root      340:     if (block_mig_state.total_sector_sum != 0) {
                    341:         progress = completed_sector_sum * 100 /
                    342:                    block_mig_state.total_sector_sum;
                    343:     } else {
                    344:         progress = 100;
                    345:     }
1.1       root      346:     if (progress != block_mig_state.prev_progress) {
                    347:         block_mig_state.prev_progress = progress;
                    348:         qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
                    349:                          | BLK_MIG_FLAG_PROGRESS);
1.1.1.6 ! root      350:         DPRINTF("Completed %d %%\r", progress);
1.1       root      351:     }
                    352: 
                    353:     return ret;
                    354: }
                    355: 
1.1.1.2   root      356: static void blk_mig_reset_dirty_cursor(void)
1.1       root      357: {
                    358:     BlkMigDevState *bmds;
1.1.1.2   root      359: 
                    360:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    361:         bmds->cur_dirty = 0;
                    362:     }
                    363: }
                    364: 
1.1.1.6 ! root      365: static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
        !           366:                                  int is_async)
1.1.1.2   root      367: {
                    368:     BlkMigBlock *blk;
                    369:     int64_t total_sectors = bmds->total_sectors;
1.1       root      370:     int64_t sector;
1.1.1.2   root      371:     int nr_sectors;
1.1.1.5   root      372:     int ret = -EIO;
1.1       root      373: 
1.1.1.2   root      374:     for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
1.1.1.3   root      375:         if (bmds_aio_inflight(bmds, sector)) {
1.1.1.6 ! root      376:             bdrv_drain_all();
1.1.1.3   root      377:         }
1.1.1.2   root      378:         if (bdrv_get_dirty(bmds->bs, sector)) {
1.1       root      379: 
1.1.1.2   root      380:             if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
                    381:                 nr_sectors = total_sectors - sector;
                    382:             } else {
                    383:                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
                    384:             }
1.1.1.5   root      385:             blk = g_malloc(sizeof(BlkMigBlock));
                    386:             blk->buf = g_malloc(BLOCK_SIZE);
1.1.1.2   root      387:             blk->bmds = bmds;
                    388:             blk->sector = sector;
1.1.1.3   root      389:             blk->nr_sectors = nr_sectors;
1.1.1.2   root      390: 
                    391:             if (is_async) {
                    392:                 blk->iov.iov_base = blk->buf;
                    393:                 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
                    394:                 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
                    395: 
1.1.1.4   root      396:                 if (block_mig_state.submitted == 0) {
                    397:                     block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);
                    398:                 }
1.1.1.2   root      399: 
                    400:                 blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
                    401:                                             nr_sectors, blk_mig_read_cb, blk);
                    402:                 block_mig_state.submitted++;
1.1.1.3   root      403:                 bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
1.1.1.2   root      404:             } else {
1.1.1.5   root      405:                 ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
                    406:                 if (ret < 0) {
1.1.1.2   root      407:                     goto error;
                    408:                 }
                    409:                 blk_send(f, blk);
1.1       root      410: 
1.1.1.5   root      411:                 g_free(blk->buf);
                    412:                 g_free(blk);
1.1       root      413:             }
1.1.1.2   root      414: 
                    415:             bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
                    416:             break;
1.1       root      417:         }
1.1.1.2   root      418:         sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
                    419:         bmds->cur_dirty = sector;
1.1       root      420:     }
                    421: 
1.1.1.2   root      422:     return (bmds->cur_dirty >= bmds->total_sectors);
                    423: 
                    424: error:
1.1.1.6 ! root      425:     DPRINTF("Error reading sector %" PRId64 "\n", sector);
1.1.1.5   root      426:     qemu_file_set_error(f, ret);
                    427:     g_free(blk->buf);
                    428:     g_free(blk);
1.1.1.2   root      429:     return 0;
                    430: }
                    431: 
1.1.1.6 ! root      432: static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
1.1.1.2   root      433: {
                    434:     BlkMigDevState *bmds;
                    435:     int ret = 0;
                    436: 
                    437:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
1.1.1.6 ! root      438:         if (mig_save_device_dirty(f, bmds, is_async) == 0) {
1.1.1.2   root      439:             ret = 1;
                    440:             break;
                    441:         }
                    442:     }
                    443: 
                    444:     return ret;
1.1       root      445: }
                    446: 
                    447: static void flush_blks(QEMUFile* f)
                    448: {
                    449:     BlkMigBlock *blk;
                    450: 
1.1.1.2   root      451:     DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
1.1       root      452:             __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
                    453:             block_mig_state.transferred);
                    454: 
                    455:     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
                    456:         if (qemu_file_rate_limit(f)) {
                    457:             break;
                    458:         }
                    459:         if (blk->ret < 0) {
1.1.1.5   root      460:             qemu_file_set_error(f, blk->ret);
1.1       root      461:             break;
                    462:         }
                    463:         blk_send(f, blk);
                    464: 
                    465:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
1.1.1.5   root      466:         g_free(blk->buf);
                    467:         g_free(blk);
1.1       root      468: 
                    469:         block_mig_state.read_done--;
                    470:         block_mig_state.transferred++;
                    471:         assert(block_mig_state.read_done >= 0);
                    472:     }
                    473: 
1.1.1.2   root      474:     DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
1.1       root      475:             block_mig_state.submitted, block_mig_state.read_done,
                    476:             block_mig_state.transferred);
                    477: }
                    478: 
1.1.1.2   root      479: static int64_t get_remaining_dirty(void)
1.1       root      480: {
                    481:     BlkMigDevState *bmds;
1.1.1.2   root      482:     int64_t dirty = 0;
1.1       root      483: 
1.1.1.2   root      484:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    485:         dirty += bdrv_get_dirty_count(bmds->bs);
1.1       root      486:     }
                    487: 
1.1.1.2   root      488:     return dirty * BLOCK_SIZE;
                    489: }
                    490: 
                    491: static int is_stage2_completed(void)
                    492: {
                    493:     int64_t remaining_dirty;
                    494:     long double bwidth;
                    495: 
                    496:     if (block_mig_state.bulk_completed == 1) {
                    497: 
                    498:         remaining_dirty = get_remaining_dirty();
1.1.1.3   root      499:         if (remaining_dirty == 0) {
                    500:             return 1;
                    501:         }
1.1.1.2   root      502: 
1.1.1.3   root      503:         bwidth = compute_read_bwidth();
1.1.1.2   root      504: 
1.1.1.3   root      505:         if ((remaining_dirty / bwidth) <=
1.1.1.2   root      506:             migrate_max_downtime()) {
1.1.1.5   root      507:             /* finish stage2 because we think that we can finish remaining work
1.1.1.2   root      508:                below max_downtime */
                    509: 
                    510:             return 1;
1.1       root      511:         }
                    512:     }
                    513: 
1.1.1.2   root      514:     return 0;
1.1       root      515: }
                    516: 
1.1.1.6 ! root      517: static void blk_mig_cleanup(void)
1.1       root      518: {
                    519:     BlkMigDevState *bmds;
                    520:     BlkMigBlock *blk;
                    521: 
1.1.1.3   root      522:     set_dirty_tracking(0);
                    523: 
1.1       root      524:     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
                    525:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
1.1.1.3   root      526:         bdrv_set_in_use(bmds->bs, 0);
                    527:         drive_put_ref(drive_get_by_blockdev(bmds->bs));
1.1.1.5   root      528:         g_free(bmds->aio_bitmap);
                    529:         g_free(bmds);
1.1       root      530:     }
                    531: 
                    532:     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
                    533:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
1.1.1.5   root      534:         g_free(blk->buf);
                    535:         g_free(blk);
1.1       root      536:     }
                    537: }
                    538: 
1.1.1.6 ! root      539: static int block_save_live(QEMUFile *f, int stage, void *opaque)
1.1       root      540: {
1.1.1.5   root      541:     int ret;
                    542: 
1.1.1.2   root      543:     DPRINTF("Enter save live stage %d submitted %d transferred %d\n",
1.1       root      544:             stage, block_mig_state.submitted, block_mig_state.transferred);
                    545: 
                    546:     if (stage < 0) {
1.1.1.6 ! root      547:         blk_mig_cleanup();
1.1       root      548:         return 0;
                    549:     }
                    550: 
                    551:     if (block_mig_state.blk_enable != 1) {
                    552:         /* no need to migrate storage */
                    553:         qemu_put_be64(f, BLK_MIG_FLAG_EOS);
                    554:         return 1;
                    555:     }
                    556: 
                    557:     if (stage == 1) {
1.1.1.6 ! root      558:         init_blk_migration(f);
1.1       root      559: 
                    560:         /* start track dirty blocks */
                    561:         set_dirty_tracking(1);
                    562:     }
                    563: 
                    564:     flush_blks(f);
                    565: 
1.1.1.5   root      566:     ret = qemu_file_get_error(f);
                    567:     if (ret) {
1.1.1.6 ! root      568:         blk_mig_cleanup();
1.1.1.5   root      569:         return ret;
1.1       root      570:     }
                    571: 
1.1.1.2   root      572:     blk_mig_reset_dirty_cursor();
                    573: 
                    574:     if (stage == 2) {
                    575:         /* control the rate of transfer */
                    576:         while ((block_mig_state.submitted +
                    577:                 block_mig_state.read_done) * BLOCK_SIZE <
                    578:                qemu_file_get_rate_limit(f)) {
                    579:             if (block_mig_state.bulk_completed == 0) {
                    580:                 /* first finish the bulk phase */
1.1.1.6 ! root      581:                 if (blk_mig_save_bulked_block(f) == 0) {
1.1.1.2   root      582:                     /* finished saving bulk on all devices */
                    583:                     block_mig_state.bulk_completed = 1;
                    584:                 }
                    585:             } else {
1.1.1.6 ! root      586:                 if (blk_mig_save_dirty_block(f, 1) == 0) {
1.1.1.2   root      587:                     /* no more dirty blocks */
                    588:                     break;
                    589:                 }
                    590:             }
1.1       root      591:         }
                    592: 
1.1.1.2   root      593:         flush_blks(f);
1.1       root      594: 
1.1.1.5   root      595:         ret = qemu_file_get_error(f);
                    596:         if (ret) {
1.1.1.6 ! root      597:             blk_mig_cleanup();
1.1.1.5   root      598:             return ret;
1.1.1.2   root      599:         }
1.1       root      600:     }
                    601: 
                    602:     if (stage == 3) {
1.1.1.2   root      603:         /* we know for sure that save bulk is completed and
                    604:            all async read completed */
                    605:         assert(block_mig_state.submitted == 0);
1.1       root      606: 
1.1.1.6 ! root      607:         while (blk_mig_save_dirty_block(f, 0) != 0);
        !           608:         blk_mig_cleanup();
1.1       root      609: 
                    610:         /* report completion */
                    611:         qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
                    612: 
1.1.1.5   root      613:         ret = qemu_file_get_error(f);
                    614:         if (ret) {
                    615:             return ret;
1.1       root      616:         }
                    617: 
1.1.1.6 ! root      618:         DPRINTF("Block migration completed\n");
1.1       root      619:     }
                    620: 
                    621:     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
                    622: 
                    623:     return ((stage == 2) && is_stage2_completed());
                    624: }
                    625: 
                    626: static int block_load(QEMUFile *f, void *opaque, int version_id)
                    627: {
                    628:     static int banner_printed;
                    629:     int len, flags;
                    630:     char device_name[256];
                    631:     int64_t addr;
1.1.1.3   root      632:     BlockDriverState *bs, *bs_prev = NULL;
1.1       root      633:     uint8_t *buf;
1.1.1.3   root      634:     int64_t total_sectors = 0;
                    635:     int nr_sectors;
1.1.1.5   root      636:     int ret;
1.1       root      637: 
                    638:     do {
                    639:         addr = qemu_get_be64(f);
                    640: 
                    641:         flags = addr & ~BDRV_SECTOR_MASK;
                    642:         addr >>= BDRV_SECTOR_BITS;
                    643: 
                    644:         if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
                    645:             /* get device name */
                    646:             len = qemu_get_byte(f);
                    647:             qemu_get_buffer(f, (uint8_t *)device_name, len);
                    648:             device_name[len] = '\0';
                    649: 
                    650:             bs = bdrv_find(device_name);
                    651:             if (!bs) {
                    652:                 fprintf(stderr, "Error unknown block device %s\n",
                    653:                         device_name);
                    654:                 return -EINVAL;
                    655:             }
                    656: 
1.1.1.3   root      657:             if (bs != bs_prev) {
                    658:                 bs_prev = bs;
                    659:                 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
                    660:                 if (total_sectors <= 0) {
1.1.1.4   root      661:                     error_report("Error getting length of block device %s",
1.1.1.3   root      662:                                  device_name);
                    663:                     return -EINVAL;
                    664:                 }
                    665:             }
                    666: 
                    667:             if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
                    668:                 nr_sectors = total_sectors - addr;
                    669:             } else {
                    670:                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
                    671:             }
                    672: 
1.1.1.5   root      673:             buf = g_malloc(BLOCK_SIZE);
1.1       root      674: 
                    675:             qemu_get_buffer(f, buf, BLOCK_SIZE);
1.1.1.3   root      676:             ret = bdrv_write(bs, addr, buf, nr_sectors);
1.1       root      677: 
1.1.1.5   root      678:             g_free(buf);
1.1.1.2   root      679:             if (ret < 0) {
                    680:                 return ret;
                    681:             }
1.1       root      682:         } else if (flags & BLK_MIG_FLAG_PROGRESS) {
                    683:             if (!banner_printed) {
                    684:                 printf("Receiving block device images\n");
                    685:                 banner_printed = 1;
                    686:             }
                    687:             printf("Completed %d %%%c", (int)addr,
                    688:                    (addr == 100) ? '\n' : '\r');
                    689:             fflush(stdout);
                    690:         } else if (!(flags & BLK_MIG_FLAG_EOS)) {
                    691:             fprintf(stderr, "Unknown flags\n");
                    692:             return -EINVAL;
                    693:         }
1.1.1.5   root      694:         ret = qemu_file_get_error(f);
                    695:         if (ret != 0) {
                    696:             return ret;
1.1       root      697:         }
                    698:     } while (!(flags & BLK_MIG_FLAG_EOS));
                    699: 
                    700:     return 0;
                    701: }
                    702: 
                    703: static void block_set_params(int blk_enable, int shared_base, void *opaque)
                    704: {
                    705:     block_mig_state.blk_enable = blk_enable;
                    706:     block_mig_state.shared_base = shared_base;
                    707: 
                    708:     /* shared base means that blk_enable = 1 */
                    709:     block_mig_state.blk_enable |= shared_base;
                    710: }
                    711: 
                    712: void blk_mig_init(void)
                    713: {
                    714:     QSIMPLEQ_INIT(&block_mig_state.bmds_list);
                    715:     QSIMPLEQ_INIT(&block_mig_state.blk_list);
                    716: 
1.1.1.2   root      717:     register_savevm_live(NULL, "block", 0, 1, block_set_params,
                    718:                          block_save_live, NULL, block_load, &block_mig_state);
1.1       root      719: }

unix.superglobalmegacorp.com