Annotation of qemu/block-migration.c, revision 1.1.1.3

1.1       root        1: /*
                      2:  * QEMU live block migration
                      3:  *
                      4:  * Copyright IBM, Corp. 2009
                      5:  *
                      6:  * Authors:
                      7:  *  Liran Schour   <lirans@il.ibm.com>
                      8:  *
                      9:  * This work is licensed under the terms of the GNU GPL, version 2.  See
                     10:  * the COPYING file in the top-level directory.
                     11:  *
                     12:  */
                     13: 
                     14: #include "qemu-common.h"
                     15: #include "block_int.h"
                     16: #include "hw/hw.h"
                     17: #include "qemu-queue.h"
1.1.1.2   root       18: #include "qemu-timer.h"
1.1       root       19: #include "monitor.h"
                     20: #include "block-migration.h"
1.1.1.2   root       21: #include "migration.h"
1.1.1.3 ! root       22: #include "blockdev.h"
1.1       root       23: #include <assert.h>
                     24: 
                     25: #define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
                     26: 
                     27: #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
                     28: #define BLK_MIG_FLAG_EOS                0x02
                     29: #define BLK_MIG_FLAG_PROGRESS           0x04
                     30: 
                     31: #define MAX_IS_ALLOCATED_SEARCH 65536
                     32: 
                     33: //#define DEBUG_BLK_MIGRATION
                     34: 
                     35: #ifdef DEBUG_BLK_MIGRATION
1.1.1.2   root       36: #define DPRINTF(fmt, ...) \
1.1       root       37:     do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
                     38: #else
1.1.1.2   root       39: #define DPRINTF(fmt, ...) \
1.1       root       40:     do { } while (0)
                     41: #endif
                     42: 
                     43: typedef struct BlkMigDevState {
                     44:     BlockDriverState *bs;
                     45:     int bulk_completed;
                     46:     int shared_base;
                     47:     int64_t cur_sector;
1.1.1.2   root       48:     int64_t cur_dirty;
1.1       root       49:     int64_t completed_sectors;
                     50:     int64_t total_sectors;
                     51:     int64_t dirty;
                     52:     QSIMPLEQ_ENTRY(BlkMigDevState) entry;
1.1.1.3 ! root       53:     unsigned long *aio_bitmap;
1.1       root       54: } BlkMigDevState;
                     55: 
                     56: typedef struct BlkMigBlock {
                     57:     uint8_t *buf;
                     58:     BlkMigDevState *bmds;
                     59:     int64_t sector;
1.1.1.3 ! root       60:     int nr_sectors;
1.1       root       61:     struct iovec iov;
                     62:     QEMUIOVector qiov;
                     63:     BlockDriverAIOCB *aiocb;
                     64:     int ret;
1.1.1.2   root       65:     int64_t time;
1.1       root       66:     QSIMPLEQ_ENTRY(BlkMigBlock) entry;
                     67: } BlkMigBlock;
                     68: 
                     69: typedef struct BlkMigState {
                     70:     int blk_enable;
                     71:     int shared_base;
                     72:     QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
                     73:     QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
                     74:     int submitted;
                     75:     int read_done;
                     76:     int transferred;
                     77:     int64_t total_sector_sum;
                     78:     int prev_progress;
1.1.1.2   root       79:     int bulk_completed;
                     80:     long double total_time;
                     81:     int reads;
1.1       root       82: } BlkMigState;
                     83: 
                     84: static BlkMigState block_mig_state;
                     85: 
                     86: static void blk_send(QEMUFile *f, BlkMigBlock * blk)
                     87: {
                     88:     int len;
                     89: 
                     90:     /* sector number and flags */
                     91:     qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
                     92:                      | BLK_MIG_FLAG_DEVICE_BLOCK);
                     93: 
                     94:     /* device name */
                     95:     len = strlen(blk->bmds->bs->device_name);
                     96:     qemu_put_byte(f, len);
                     97:     qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
                     98: 
                     99:     qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
                    100: }
                    101: 
                    102: int blk_mig_active(void)
                    103: {
                    104:     return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
                    105: }
                    106: 
                    107: uint64_t blk_mig_bytes_transferred(void)
                    108: {
                    109:     BlkMigDevState *bmds;
                    110:     uint64_t sum = 0;
                    111: 
                    112:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    113:         sum += bmds->completed_sectors;
                    114:     }
                    115:     return sum << BDRV_SECTOR_BITS;
                    116: }
                    117: 
                    118: uint64_t blk_mig_bytes_remaining(void)
                    119: {
                    120:     return blk_mig_bytes_total() - blk_mig_bytes_transferred();
                    121: }
                    122: 
                    123: uint64_t blk_mig_bytes_total(void)
                    124: {
                    125:     BlkMigDevState *bmds;
                    126:     uint64_t sum = 0;
                    127: 
                    128:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    129:         sum += bmds->total_sectors;
                    130:     }
                    131:     return sum << BDRV_SECTOR_BITS;
                    132: }
                    133: 
1.1.1.2   root      134: static inline void add_avg_read_time(int64_t time)
                    135: {
                    136:     block_mig_state.reads++;
                    137:     block_mig_state.total_time += time;
                    138: }
                    139: 
                    140: static inline long double compute_read_bwidth(void)
                    141: {
                    142:     assert(block_mig_state.total_time != 0);
                    143:     return  (block_mig_state.reads * BLOCK_SIZE)/ block_mig_state.total_time;
                    144: }
                    145: 
1.1.1.3 ! root      146: static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
        !           147: {
        !           148:     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
        !           149: 
        !           150:     if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
        !           151:         return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
        !           152:             (1UL << (chunk % (sizeof(unsigned long) * 8))));
        !           153:     } else {
        !           154:         return 0;
        !           155:     }
        !           156: }
        !           157: 
        !           158: static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
        !           159:                              int nb_sectors, int set)
        !           160: {
        !           161:     int64_t start, end;
        !           162:     unsigned long val, idx, bit;
        !           163: 
        !           164:     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
        !           165:     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
        !           166: 
        !           167:     for (; start <= end; start++) {
        !           168:         idx = start / (sizeof(unsigned long) * 8);
        !           169:         bit = start % (sizeof(unsigned long) * 8);
        !           170:         val = bmds->aio_bitmap[idx];
        !           171:         if (set) {
        !           172:             val |= 1UL << bit;
        !           173:         } else {
        !           174:             val &= ~(1UL << bit);
        !           175:         }
        !           176:         bmds->aio_bitmap[idx] = val;
        !           177:     }
        !           178: }
        !           179: 
        !           180: static void alloc_aio_bitmap(BlkMigDevState *bmds)
        !           181: {
        !           182:     BlockDriverState *bs = bmds->bs;
        !           183:     int64_t bitmap_size;
        !           184: 
        !           185:     bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
        !           186:             BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
        !           187:     bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
        !           188: 
        !           189:     bmds->aio_bitmap = qemu_mallocz(bitmap_size);
        !           190: }
        !           191: 
1.1       root      192: static void blk_mig_read_cb(void *opaque, int ret)
                    193: {
                    194:     BlkMigBlock *blk = opaque;
                    195: 
                    196:     blk->ret = ret;
                    197: 
1.1.1.2   root      198:     blk->time = qemu_get_clock_ns(rt_clock) - blk->time;
                    199: 
                    200:     add_avg_read_time(blk->time);
                    201: 
1.1       root      202:     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
1.1.1.3 ! root      203:     bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
1.1       root      204: 
                    205:     block_mig_state.submitted--;
                    206:     block_mig_state.read_done++;
                    207:     assert(block_mig_state.submitted >= 0);
                    208: }
                    209: 
                    210: static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
1.1.1.2   root      211:                                 BlkMigDevState *bmds)
1.1       root      212: {
                    213:     int64_t total_sectors = bmds->total_sectors;
                    214:     int64_t cur_sector = bmds->cur_sector;
                    215:     BlockDriverState *bs = bmds->bs;
                    216:     BlkMigBlock *blk;
                    217:     int nr_sectors;
                    218: 
                    219:     if (bmds->shared_base) {
                    220:         while (cur_sector < total_sectors &&
                    221:                !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
                    222:                                   &nr_sectors)) {
                    223:             cur_sector += nr_sectors;
                    224:         }
                    225:     }
                    226: 
                    227:     if (cur_sector >= total_sectors) {
                    228:         bmds->cur_sector = bmds->completed_sectors = total_sectors;
                    229:         return 1;
                    230:     }
                    231: 
                    232:     bmds->completed_sectors = cur_sector;
                    233: 
                    234:     cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
                    235: 
                    236:     /* we are going to transfer a full block even if it is not allocated */
                    237:     nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
                    238: 
                    239:     if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
                    240:         nr_sectors = total_sectors - cur_sector;
                    241:     }
                    242: 
                    243:     blk = qemu_malloc(sizeof(BlkMigBlock));
                    244:     blk->buf = qemu_malloc(BLOCK_SIZE);
                    245:     blk->bmds = bmds;
                    246:     blk->sector = cur_sector;
1.1.1.3 ! root      247:     blk->nr_sectors = nr_sectors;
1.1       root      248: 
1.1.1.2   root      249:     blk->iov.iov_base = blk->buf;
                    250:     blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
                    251:     qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
                    252: 
                    253:     blk->time = qemu_get_clock_ns(rt_clock);
                    254: 
                    255:     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
                    256:                                 nr_sectors, blk_mig_read_cb, blk);
                    257:     if (!blk->aiocb) {
                    258:         goto error;
1.1       root      259:     }
1.1.1.2   root      260:     block_mig_state.submitted++;
1.1       root      261: 
                    262:     bdrv_reset_dirty(bs, cur_sector, nr_sectors);
                    263:     bmds->cur_sector = cur_sector + nr_sectors;
                    264: 
                    265:     return (bmds->cur_sector >= total_sectors);
                    266: 
                    267: error:
                    268:     monitor_printf(mon, "Error reading sector %" PRId64 "\n", cur_sector);
                    269:     qemu_file_set_error(f);
                    270:     qemu_free(blk->buf);
                    271:     qemu_free(blk);
                    272:     return 0;
                    273: }
                    274: 
                    275: static void set_dirty_tracking(int enable)
                    276: {
                    277:     BlkMigDevState *bmds;
                    278: 
                    279:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    280:         bdrv_set_dirty_tracking(bmds->bs, enable);
                    281:     }
                    282: }
                    283: 
1.1.1.2   root      284: static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
1.1       root      285: {
1.1.1.2   root      286:     Monitor *mon = opaque;
1.1       root      287:     BlkMigDevState *bmds;
                    288:     int64_t sectors;
                    289: 
1.1.1.2   root      290:     if (!bdrv_is_read_only(bs)) {
                    291:         sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
                    292:         if (sectors <= 0) {
                    293:             return;
                    294:         }
                    295: 
                    296:         bmds = qemu_mallocz(sizeof(BlkMigDevState));
                    297:         bmds->bs = bs;
                    298:         bmds->bulk_completed = 0;
                    299:         bmds->total_sectors = sectors;
                    300:         bmds->completed_sectors = 0;
                    301:         bmds->shared_base = block_mig_state.shared_base;
1.1.1.3 ! root      302:         alloc_aio_bitmap(bmds);
        !           303:         drive_get_ref(drive_get_by_blockdev(bs));
        !           304:         bdrv_set_in_use(bs, 1);
1.1.1.2   root      305: 
                    306:         block_mig_state.total_sector_sum += sectors;
                    307: 
                    308:         if (bmds->shared_base) {
                    309:             monitor_printf(mon, "Start migration for %s with shared base "
                    310:                                 "image\n",
                    311:                            bs->device_name);
                    312:         } else {
                    313:             monitor_printf(mon, "Start full migration for %s\n",
                    314:                            bs->device_name);
                    315:         }
                    316: 
                    317:         QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
                    318:     }
                    319: }
                    320: 
                    321: static void init_blk_migration(Monitor *mon, QEMUFile *f)
                    322: {
1.1       root      323:     block_mig_state.submitted = 0;
                    324:     block_mig_state.read_done = 0;
                    325:     block_mig_state.transferred = 0;
                    326:     block_mig_state.total_sector_sum = 0;
                    327:     block_mig_state.prev_progress = -1;
1.1.1.2   root      328:     block_mig_state.bulk_completed = 0;
                    329:     block_mig_state.total_time = 0;
                    330:     block_mig_state.reads = 0;
1.1       root      331: 
1.1.1.2   root      332:     bdrv_iterate(init_blk_migration_it, mon);
1.1       root      333: }
                    334: 
1.1.1.2   root      335: static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f)
1.1       root      336: {
                    337:     int64_t completed_sector_sum = 0;
                    338:     BlkMigDevState *bmds;
                    339:     int progress;
                    340:     int ret = 0;
                    341: 
                    342:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    343:         if (bmds->bulk_completed == 0) {
1.1.1.2   root      344:             if (mig_save_device_bulk(mon, f, bmds) == 1) {
1.1       root      345:                 /* completed bulk section for this device */
                    346:                 bmds->bulk_completed = 1;
                    347:             }
                    348:             completed_sector_sum += bmds->completed_sectors;
                    349:             ret = 1;
                    350:             break;
                    351:         } else {
                    352:             completed_sector_sum += bmds->completed_sectors;
                    353:         }
                    354:     }
                    355: 
1.1.1.3 ! root      356:     if (block_mig_state.total_sector_sum != 0) {
        !           357:         progress = completed_sector_sum * 100 /
        !           358:                    block_mig_state.total_sector_sum;
        !           359:     } else {
        !           360:         progress = 100;
        !           361:     }
1.1       root      362:     if (progress != block_mig_state.prev_progress) {
                    363:         block_mig_state.prev_progress = progress;
                    364:         qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
                    365:                          | BLK_MIG_FLAG_PROGRESS);
                    366:         monitor_printf(mon, "Completed %d %%\r", progress);
                    367:         monitor_flush(mon);
                    368:     }
                    369: 
                    370:     return ret;
                    371: }
                    372: 
1.1.1.2   root      373: static void blk_mig_reset_dirty_cursor(void)
1.1       root      374: {
                    375:     BlkMigDevState *bmds;
1.1.1.2   root      376: 
                    377:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    378:         bmds->cur_dirty = 0;
                    379:     }
                    380: }
                    381: 
                    382: static int mig_save_device_dirty(Monitor *mon, QEMUFile *f,
                    383:                                  BlkMigDevState *bmds, int is_async)
                    384: {
                    385:     BlkMigBlock *blk;
                    386:     int64_t total_sectors = bmds->total_sectors;
1.1       root      387:     int64_t sector;
1.1.1.2   root      388:     int nr_sectors;
1.1       root      389: 
1.1.1.2   root      390:     for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
1.1.1.3 ! root      391:         if (bmds_aio_inflight(bmds, sector)) {
        !           392:             qemu_aio_flush();
        !           393:         }
1.1.1.2   root      394:         if (bdrv_get_dirty(bmds->bs, sector)) {
1.1       root      395: 
1.1.1.2   root      396:             if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
                    397:                 nr_sectors = total_sectors - sector;
                    398:             } else {
                    399:                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
                    400:             }
                    401:             blk = qemu_malloc(sizeof(BlkMigBlock));
                    402:             blk->buf = qemu_malloc(BLOCK_SIZE);
                    403:             blk->bmds = bmds;
                    404:             blk->sector = sector;
1.1.1.3 ! root      405:             blk->nr_sectors = nr_sectors;
1.1.1.2   root      406: 
                    407:             if (is_async) {
                    408:                 blk->iov.iov_base = blk->buf;
                    409:                 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
                    410:                 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
                    411: 
1.1.1.3 ! root      412:                 blk->time = qemu_get_clock_ns(rt_clock);
1.1.1.2   root      413: 
                    414:                 blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
                    415:                                             nr_sectors, blk_mig_read_cb, blk);
                    416:                 if (!blk->aiocb) {
                    417:                     goto error;
1.1       root      418:                 }
1.1.1.2   root      419:                 block_mig_state.submitted++;
1.1.1.3 ! root      420:                 bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
1.1.1.2   root      421:             } else {
                    422:                 if (bdrv_read(bmds->bs, sector, blk->buf,
                    423:                               nr_sectors) < 0) {
                    424:                     goto error;
                    425:                 }
                    426:                 blk_send(f, blk);
1.1       root      427: 
1.1.1.2   root      428:                 qemu_free(blk->buf);
                    429:                 qemu_free(blk);
1.1       root      430:             }
1.1.1.2   root      431: 
                    432:             bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
                    433:             break;
1.1       root      434:         }
1.1.1.2   root      435:         sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
                    436:         bmds->cur_dirty = sector;
1.1       root      437:     }
                    438: 
1.1.1.2   root      439:     return (bmds->cur_dirty >= bmds->total_sectors);
                    440: 
                    441: error:
                    442:     monitor_printf(mon, "Error reading sector %" PRId64 "\n", sector);
                    443:     qemu_file_set_error(f);
                    444:     qemu_free(blk->buf);
                    445:     qemu_free(blk);
                    446:     return 0;
                    447: }
                    448: 
                    449: static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile *f, int is_async)
                    450: {
                    451:     BlkMigDevState *bmds;
                    452:     int ret = 0;
                    453: 
                    454:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    455:         if (mig_save_device_dirty(mon, f, bmds, is_async) == 0) {
                    456:             ret = 1;
                    457:             break;
                    458:         }
                    459:     }
                    460: 
                    461:     return ret;
1.1       root      462: }
                    463: 
                    464: static void flush_blks(QEMUFile* f)
                    465: {
                    466:     BlkMigBlock *blk;
                    467: 
1.1.1.2   root      468:     DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
1.1       root      469:             __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
                    470:             block_mig_state.transferred);
                    471: 
                    472:     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
                    473:         if (qemu_file_rate_limit(f)) {
                    474:             break;
                    475:         }
                    476:         if (blk->ret < 0) {
                    477:             qemu_file_set_error(f);
                    478:             break;
                    479:         }
                    480:         blk_send(f, blk);
                    481: 
                    482:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
                    483:         qemu_free(blk->buf);
                    484:         qemu_free(blk);
                    485: 
                    486:         block_mig_state.read_done--;
                    487:         block_mig_state.transferred++;
                    488:         assert(block_mig_state.read_done >= 0);
                    489:     }
                    490: 
1.1.1.2   root      491:     DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
1.1       root      492:             block_mig_state.submitted, block_mig_state.read_done,
                    493:             block_mig_state.transferred);
                    494: }
                    495: 
1.1.1.2   root      496: static int64_t get_remaining_dirty(void)
1.1       root      497: {
                    498:     BlkMigDevState *bmds;
1.1.1.2   root      499:     int64_t dirty = 0;
1.1       root      500: 
1.1.1.2   root      501:     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
                    502:         dirty += bdrv_get_dirty_count(bmds->bs);
1.1       root      503:     }
                    504: 
1.1.1.2   root      505:     return dirty * BLOCK_SIZE;
                    506: }
                    507: 
                    508: static int is_stage2_completed(void)
                    509: {
                    510:     int64_t remaining_dirty;
                    511:     long double bwidth;
                    512: 
                    513:     if (block_mig_state.bulk_completed == 1) {
                    514: 
                    515:         remaining_dirty = get_remaining_dirty();
1.1.1.3 ! root      516:         if (remaining_dirty == 0) {
        !           517:             return 1;
        !           518:         }
1.1.1.2   root      519: 
1.1.1.3 ! root      520:         bwidth = compute_read_bwidth();
1.1.1.2   root      521: 
1.1.1.3 ! root      522:         if ((remaining_dirty / bwidth) <=
1.1.1.2   root      523:             migrate_max_downtime()) {
                    524:             /* finish stage2 because we think that we can finish remaing work
                    525:                below max_downtime */
                    526: 
                    527:             return 1;
1.1       root      528:         }
                    529:     }
                    530: 
1.1.1.2   root      531:     return 0;
1.1       root      532: }
                    533: 
                    534: static void blk_mig_cleanup(Monitor *mon)
                    535: {
                    536:     BlkMigDevState *bmds;
                    537:     BlkMigBlock *blk;
                    538: 
1.1.1.3 ! root      539:     set_dirty_tracking(0);
        !           540: 
1.1       root      541:     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
                    542:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
1.1.1.3 ! root      543:         bdrv_set_in_use(bmds->bs, 0);
        !           544:         drive_put_ref(drive_get_by_blockdev(bmds->bs));
        !           545:         qemu_free(bmds->aio_bitmap);
1.1       root      546:         qemu_free(bmds);
                    547:     }
                    548: 
                    549:     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
                    550:         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
                    551:         qemu_free(blk->buf);
                    552:         qemu_free(blk);
                    553:     }
                    554: 
                    555:     monitor_printf(mon, "\n");
                    556: }
                    557: 
                    558: static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
                    559: {
1.1.1.2   root      560:     DPRINTF("Enter save live stage %d submitted %d transferred %d\n",
1.1       root      561:             stage, block_mig_state.submitted, block_mig_state.transferred);
                    562: 
                    563:     if (stage < 0) {
                    564:         blk_mig_cleanup(mon);
                    565:         return 0;
                    566:     }
                    567: 
                    568:     if (block_mig_state.blk_enable != 1) {
                    569:         /* no need to migrate storage */
                    570:         qemu_put_be64(f, BLK_MIG_FLAG_EOS);
                    571:         return 1;
                    572:     }
                    573: 
                    574:     if (stage == 1) {
                    575:         init_blk_migration(mon, f);
                    576: 
                    577:         /* start track dirty blocks */
                    578:         set_dirty_tracking(1);
                    579:     }
                    580: 
                    581:     flush_blks(f);
                    582: 
                    583:     if (qemu_file_has_error(f)) {
                    584:         blk_mig_cleanup(mon);
                    585:         return 0;
                    586:     }
                    587: 
1.1.1.2   root      588:     blk_mig_reset_dirty_cursor();
                    589: 
                    590:     if (stage == 2) {
                    591:         /* control the rate of transfer */
                    592:         while ((block_mig_state.submitted +
                    593:                 block_mig_state.read_done) * BLOCK_SIZE <
                    594:                qemu_file_get_rate_limit(f)) {
                    595:             if (block_mig_state.bulk_completed == 0) {
                    596:                 /* first finish the bulk phase */
                    597:                 if (blk_mig_save_bulked_block(mon, f) == 0) {
                    598:                     /* finished saving bulk on all devices */
                    599:                     block_mig_state.bulk_completed = 1;
                    600:                 }
                    601:             } else {
                    602:                 if (blk_mig_save_dirty_block(mon, f, 1) == 0) {
                    603:                     /* no more dirty blocks */
                    604:                     break;
                    605:                 }
                    606:             }
1.1       root      607:         }
                    608: 
1.1.1.2   root      609:         flush_blks(f);
1.1       root      610: 
1.1.1.2   root      611:         if (qemu_file_has_error(f)) {
                    612:             blk_mig_cleanup(mon);
                    613:             return 0;
                    614:         }
1.1       root      615:     }
                    616: 
                    617:     if (stage == 3) {
1.1.1.2   root      618:         /* we know for sure that save bulk is completed and
                    619:            all async read completed */
                    620:         assert(block_mig_state.submitted == 0);
1.1       root      621: 
1.1.1.2   root      622:         while (blk_mig_save_dirty_block(mon, f, 0) != 0);
1.1       root      623:         blk_mig_cleanup(mon);
                    624: 
                    625:         /* report completion */
                    626:         qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
                    627: 
                    628:         if (qemu_file_has_error(f)) {
                    629:             return 0;
                    630:         }
                    631: 
                    632:         monitor_printf(mon, "Block migration completed\n");
                    633:     }
                    634: 
                    635:     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
                    636: 
                    637:     return ((stage == 2) && is_stage2_completed());
                    638: }
                    639: 
                    640: static int block_load(QEMUFile *f, void *opaque, int version_id)
                    641: {
                    642:     static int banner_printed;
                    643:     int len, flags;
                    644:     char device_name[256];
                    645:     int64_t addr;
1.1.1.3 ! root      646:     BlockDriverState *bs, *bs_prev = NULL;
1.1       root      647:     uint8_t *buf;
1.1.1.3 ! root      648:     int64_t total_sectors = 0;
        !           649:     int nr_sectors;
1.1       root      650: 
                    651:     do {
                    652:         addr = qemu_get_be64(f);
                    653: 
                    654:         flags = addr & ~BDRV_SECTOR_MASK;
                    655:         addr >>= BDRV_SECTOR_BITS;
                    656: 
                    657:         if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
1.1.1.2   root      658:             int ret;
1.1       root      659:             /* get device name */
                    660:             len = qemu_get_byte(f);
                    661:             qemu_get_buffer(f, (uint8_t *)device_name, len);
                    662:             device_name[len] = '\0';
                    663: 
                    664:             bs = bdrv_find(device_name);
                    665:             if (!bs) {
                    666:                 fprintf(stderr, "Error unknown block device %s\n",
                    667:                         device_name);
                    668:                 return -EINVAL;
                    669:             }
                    670: 
1.1.1.3 ! root      671:             if (bs != bs_prev) {
        !           672:                 bs_prev = bs;
        !           673:                 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
        !           674:                 if (total_sectors <= 0) {
        !           675:                     error_report("Error getting length of block device %s\n",
        !           676:                                  device_name);
        !           677:                     return -EINVAL;
        !           678:                 }
        !           679:             }
        !           680: 
        !           681:             if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
        !           682:                 nr_sectors = total_sectors - addr;
        !           683:             } else {
        !           684:                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
        !           685:             }
        !           686: 
1.1       root      687:             buf = qemu_malloc(BLOCK_SIZE);
                    688: 
                    689:             qemu_get_buffer(f, buf, BLOCK_SIZE);
1.1.1.3 ! root      690:             ret = bdrv_write(bs, addr, buf, nr_sectors);
1.1       root      691: 
                    692:             qemu_free(buf);
1.1.1.2   root      693:             if (ret < 0) {
                    694:                 return ret;
                    695:             }
1.1       root      696:         } else if (flags & BLK_MIG_FLAG_PROGRESS) {
                    697:             if (!banner_printed) {
                    698:                 printf("Receiving block device images\n");
                    699:                 banner_printed = 1;
                    700:             }
                    701:             printf("Completed %d %%%c", (int)addr,
                    702:                    (addr == 100) ? '\n' : '\r');
                    703:             fflush(stdout);
                    704:         } else if (!(flags & BLK_MIG_FLAG_EOS)) {
                    705:             fprintf(stderr, "Unknown flags\n");
                    706:             return -EINVAL;
                    707:         }
                    708:         if (qemu_file_has_error(f)) {
                    709:             return -EIO;
                    710:         }
                    711:     } while (!(flags & BLK_MIG_FLAG_EOS));
                    712: 
                    713:     return 0;
                    714: }
                    715: 
                    716: static void block_set_params(int blk_enable, int shared_base, void *opaque)
                    717: {
                    718:     block_mig_state.blk_enable = blk_enable;
                    719:     block_mig_state.shared_base = shared_base;
                    720: 
                    721:     /* shared base means that blk_enable = 1 */
                    722:     block_mig_state.blk_enable |= shared_base;
                    723: }
                    724: 
                    725: void blk_mig_init(void)
                    726: {
                    727:     QSIMPLEQ_INIT(&block_mig_state.bmds_list);
                    728:     QSIMPLEQ_INIT(&block_mig_state.blk_list);
                    729: 
1.1.1.2   root      730:     register_savevm_live(NULL, "block", 0, 1, block_set_params,
                    731:                          block_save_live, NULL, block_load, &block_mig_state);
1.1       root      732: }

unix.superglobalmegacorp.com