File:  [Qemu by Fabrice Bellard] / qemu / kvm-all.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 16:50:24 2018 UTC (3 years, 2 months ago) by root
Branches: qemu, MAIN
CVS tags: qemu0100, HEAD
qemu 0.10.0

    1: /*
    2:  * QEMU KVM support
    3:  *
    4:  * Copyright IBM, Corp. 2008
    5:  *           Red Hat, Inc. 2008
    6:  *
    7:  * Authors:
    8:  *  Anthony Liguori   <aliguori@us.ibm.com>
    9:  *  Glauber Costa     <gcosta@redhat.com>
   10:  *
   11:  * This work is licensed under the terms of the GNU GPL, version 2 or later.
   12:  * See the COPYING file in the top-level directory.
   13:  *
   14:  */
   15: 
   16: #include <sys/types.h>
   17: #include <sys/ioctl.h>
   18: #include <sys/mman.h>
   19: #include <stdarg.h>
   20: 
   21: #include <linux/kvm.h>
   22: 
   23: #include "qemu-common.h"
   24: #include "sysemu.h"
   25: #include "kvm.h"
   26: 
   27: /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
   28: #define PAGE_SIZE TARGET_PAGE_SIZE
   29: 
   30: //#define DEBUG_KVM
   31: 
   32: #ifdef DEBUG_KVM
   33: #define dprintf(fmt, ...) \
   34:     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
   35: #else
   36: #define dprintf(fmt, ...) \
   37:     do { } while (0)
   38: #endif
   39: 
   40: typedef struct KVMSlot
   41: {
   42:     target_phys_addr_t start_addr;
   43:     ram_addr_t memory_size;
   44:     ram_addr_t phys_offset;
   45:     int slot;
   46:     int flags;
   47: } KVMSlot;
   48: 
   49: typedef struct kvm_dirty_log KVMDirtyLog;
   50: 
   51: int kvm_allowed = 0;
   52: 
   53: struct KVMState
   54: {
   55:     KVMSlot slots[32];
   56:     int fd;
   57:     int vmfd;
   58:     int coalesced_mmio;
   59: };
   60: 
   61: static KVMState *kvm_state;
   62: 
   63: static KVMSlot *kvm_alloc_slot(KVMState *s)
   64: {
   65:     int i;
   66: 
   67:     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
   68:         /* KVM private memory slots */
   69:         if (i >= 8 && i < 12)
   70:             continue;
   71:         if (s->slots[i].memory_size == 0)
   72:             return &s->slots[i];
   73:     }
   74: 
   75:     return NULL;
   76: }
   77: 
   78: static KVMSlot *kvm_lookup_slot(KVMState *s, target_phys_addr_t start_addr)
   79: {
   80:     int i;
   81: 
   82:     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
   83:         KVMSlot *mem = &s->slots[i];
   84: 
   85:         if (start_addr >= mem->start_addr &&
   86:             start_addr < (mem->start_addr + mem->memory_size))
   87:             return mem;
   88:     }
   89: 
   90:     return NULL;
   91: }
   92: 
   93: static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
   94: {
   95:     struct kvm_userspace_memory_region mem;
   96: 
   97:     mem.slot = slot->slot;
   98:     mem.guest_phys_addr = slot->start_addr;
   99:     mem.memory_size = slot->memory_size;
  100:     mem.userspace_addr = (unsigned long)phys_ram_base + slot->phys_offset;
  101:     mem.flags = slot->flags;
  102: 
  103:     return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
  104: }
  105: 
  106: 
  107: int kvm_init_vcpu(CPUState *env)
  108: {
  109:     KVMState *s = kvm_state;
  110:     long mmap_size;
  111:     int ret;
  112: 
  113:     dprintf("kvm_init_vcpu\n");
  114: 
  115:     ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
  116:     if (ret < 0) {
  117:         dprintf("kvm_create_vcpu failed\n");
  118:         goto err;
  119:     }
  120: 
  121:     env->kvm_fd = ret;
  122:     env->kvm_state = s;
  123: 
  124:     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
  125:     if (mmap_size < 0) {
  126:         dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
  127:         goto err;
  128:     }
  129: 
  130:     env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
  131:                         env->kvm_fd, 0);
  132:     if (env->kvm_run == MAP_FAILED) {
  133:         ret = -errno;
  134:         dprintf("mmap'ing vcpu state failed\n");
  135:         goto err;
  136:     }
  137: 
  138:     ret = kvm_arch_init_vcpu(env);
  139: 
  140: err:
  141:     return ret;
  142: }
  143: 
  144: int kvm_sync_vcpus(void)
  145: {
  146:     CPUState *env;
  147: 
  148:     for (env = first_cpu; env != NULL; env = env->next_cpu) {
  149:         int ret;
  150: 
  151:         ret = kvm_arch_put_registers(env);
  152:         if (ret)
  153:             return ret;
  154:     }
  155: 
  156:     return 0;
  157: }
  158: 
  159: /*
  160:  * dirty pages logging control
  161:  */
  162: static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, target_phys_addr_t end_addr,
  163:                                       unsigned flags,
  164:                                       unsigned mask)
  165: {
  166:     KVMState *s = kvm_state;
  167:     KVMSlot *mem = kvm_lookup_slot(s, phys_addr);
  168:     if (mem == NULL)  {
  169:             dprintf("invalid parameters %llx-%llx\n", phys_addr, end_addr);
  170:             return -EINVAL;
  171:     }
  172: 
  173:     flags = (mem->flags & ~mask) | flags;
  174:     /* Nothing changed, no need to issue ioctl */
  175:     if (flags == mem->flags)
  176:             return 0;
  177: 
  178:     mem->flags = flags;
  179: 
  180:     return kvm_set_user_memory_region(s, mem);
  181: }
  182: 
  183: int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t end_addr)
  184: {
  185:         return kvm_dirty_pages_log_change(phys_addr, end_addr,
  186:                                           KVM_MEM_LOG_DIRTY_PAGES,
  187:                                           KVM_MEM_LOG_DIRTY_PAGES);
  188: }
  189: 
  190: int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t end_addr)
  191: {
  192:         return kvm_dirty_pages_log_change(phys_addr, end_addr,
  193:                                           0,
  194:                                           KVM_MEM_LOG_DIRTY_PAGES);
  195: }
  196: 
  197: /**
  198:  * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
  199:  * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
  200:  * This means all bits are set to dirty.
  201:  *
  202:  * @start_add: start of logged region. This is what we use to search the memslot
  203:  * @end_addr: end of logged region.
  204:  */
  205: void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
  206: {
  207:     KVMState *s = kvm_state;
  208:     KVMDirtyLog d;
  209:     KVMSlot *mem = kvm_lookup_slot(s, start_addr);
  210:     unsigned long alloc_size;
  211:     ram_addr_t addr;
  212:     target_phys_addr_t phys_addr = start_addr;
  213: 
  214:     dprintf("sync addr: %llx into %lx\n", start_addr, mem->phys_offset);
  215:     if (mem == NULL) {
  216:             fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
  217:             return;
  218:     }
  219: 
  220:     alloc_size = mem->memory_size >> TARGET_PAGE_BITS / sizeof(d.dirty_bitmap);
  221:     d.dirty_bitmap = qemu_mallocz(alloc_size);
  222: 
  223:     d.slot = mem->slot;
  224:     dprintf("slot %d, phys_addr %llx, uaddr: %llx\n",
  225:             d.slot, mem->start_addr, mem->phys_offset);
  226: 
  227:     if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
  228:         dprintf("ioctl failed %d\n", errno);
  229:         goto out;
  230:     }
  231: 
  232:     phys_addr = start_addr;
  233:     for (addr = mem->phys_offset; phys_addr < end_addr; phys_addr+= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
  234:         unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
  235:         unsigned nr = (phys_addr - start_addr) >> TARGET_PAGE_BITS;
  236:         unsigned word = nr / (sizeof(*bitmap) * 8);
  237:         unsigned bit = nr % (sizeof(*bitmap) * 8);
  238:         if ((bitmap[word] >> bit) & 1)
  239:             cpu_physical_memory_set_dirty(addr);
  240:     }
  241: out:
  242:     qemu_free(d.dirty_bitmap);
  243: }
  244: 
  245: int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
  246: {
  247:     int ret = -ENOSYS;
  248: #ifdef KVM_CAP_COALESCED_MMIO
  249:     KVMState *s = kvm_state;
  250: 
  251:     if (s->coalesced_mmio) {
  252:         struct kvm_coalesced_mmio_zone zone;
  253: 
  254:         zone.addr = start;
  255:         zone.size = size;
  256: 
  257:         ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
  258:     }
  259: #endif
  260: 
  261:     return ret;
  262: }
  263: 
  264: int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
  265: {
  266:     int ret = -ENOSYS;
  267: #ifdef KVM_CAP_COALESCED_MMIO
  268:     KVMState *s = kvm_state;
  269: 
  270:     if (s->coalesced_mmio) {
  271:         struct kvm_coalesced_mmio_zone zone;
  272: 
  273:         zone.addr = start;
  274:         zone.size = size;
  275: 
  276:         ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
  277:     }
  278: #endif
  279: 
  280:     return ret;
  281: }
  282: 
  283: int kvm_init(int smp_cpus)
  284: {
  285:     KVMState *s;
  286:     int ret;
  287:     int i;
  288: 
  289:     if (smp_cpus > 1)
  290:         return -EINVAL;
  291: 
  292:     s = qemu_mallocz(sizeof(KVMState));
  293: 
  294:     for (i = 0; i < ARRAY_SIZE(s->slots); i++)
  295:         s->slots[i].slot = i;
  296: 
  297:     s->vmfd = -1;
  298:     s->fd = open("/dev/kvm", O_RDWR);
  299:     if (s->fd == -1) {
  300:         fprintf(stderr, "Could not access KVM kernel module: %m\n");
  301:         ret = -errno;
  302:         goto err;
  303:     }
  304: 
  305:     ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
  306:     if (ret < KVM_API_VERSION) {
  307:         if (ret > 0)
  308:             ret = -EINVAL;
  309:         fprintf(stderr, "kvm version too old\n");
  310:         goto err;
  311:     }
  312: 
  313:     if (ret > KVM_API_VERSION) {
  314:         ret = -EINVAL;
  315:         fprintf(stderr, "kvm version not supported\n");
  316:         goto err;
  317:     }
  318: 
  319:     s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
  320:     if (s->vmfd < 0)
  321:         goto err;
  322: 
  323:     /* initially, KVM allocated its own memory and we had to jump through
  324:      * hooks to make phys_ram_base point to this.  Modern versions of KVM
  325:      * just use a user allocated buffer so we can use phys_ram_base
  326:      * unmodified.  Make sure we have a sufficiently modern version of KVM.
  327:      */
  328:     ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
  329:     if (ret <= 0) {
  330:         if (ret == 0)
  331:             ret = -EINVAL;
  332:         fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n");
  333:         goto err;
  334:     }
  335: 
  336:     /* There was a nasty bug in < kvm-80 that prevents memory slots from being
  337:      * destroyed properly.  Since we rely on this capability, refuse to work
  338:      * with any kernel without this capability. */
  339:     ret = kvm_ioctl(s, KVM_CHECK_EXTENSION,
  340:                     KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
  341:     if (ret <= 0) {
  342:         if (ret == 0)
  343:             ret = -EINVAL;
  344: 
  345:         fprintf(stderr,
  346:                 "KVM kernel module broken (DESTROY_MEMORY_REGION)\n"
  347:                 "Please upgrade to at least kvm-81.\n");
  348:         goto err;
  349:     }
  350: 
  351:     s->coalesced_mmio = 0;
  352: #ifdef KVM_CAP_COALESCED_MMIO
  353:     ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
  354:     if (ret > 0)
  355:         s->coalesced_mmio = ret;
  356: #endif
  357: 
  358:     ret = kvm_arch_init(s, smp_cpus);
  359:     if (ret < 0)
  360:         goto err;
  361: 
  362:     kvm_state = s;
  363: 
  364:     return 0;
  365: 
  366: err:
  367:     if (s) {
  368:         if (s->vmfd != -1)
  369:             close(s->vmfd);
  370:         if (s->fd != -1)
  371:             close(s->fd);
  372:     }
  373:     qemu_free(s);
  374: 
  375:     return ret;
  376: }
  377: 
  378: static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
  379:                          int direction, int size, uint32_t count)
  380: {
  381:     int i;
  382:     uint8_t *ptr = data;
  383: 
  384:     for (i = 0; i < count; i++) {
  385:         if (direction == KVM_EXIT_IO_IN) {
  386:             switch (size) {
  387:             case 1:
  388:                 stb_p(ptr, cpu_inb(env, port));
  389:                 break;
  390:             case 2:
  391:                 stw_p(ptr, cpu_inw(env, port));
  392:                 break;
  393:             case 4:
  394:                 stl_p(ptr, cpu_inl(env, port));
  395:                 break;
  396:             }
  397:         } else {
  398:             switch (size) {
  399:             case 1:
  400:                 cpu_outb(env, port, ldub_p(ptr));
  401:                 break;
  402:             case 2:
  403:                 cpu_outw(env, port, lduw_p(ptr));
  404:                 break;
  405:             case 4:
  406:                 cpu_outl(env, port, ldl_p(ptr));
  407:                 break;
  408:             }
  409:         }
  410: 
  411:         ptr += size;
  412:     }
  413: 
  414:     return 1;
  415: }
  416: 
  417: static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
  418: {
  419: #ifdef KVM_CAP_COALESCED_MMIO
  420:     KVMState *s = kvm_state;
  421:     if (s->coalesced_mmio) {
  422:         struct kvm_coalesced_mmio_ring *ring;
  423: 
  424:         ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
  425:         while (ring->first != ring->last) {
  426:             struct kvm_coalesced_mmio *ent;
  427: 
  428:             ent = &ring->coalesced_mmio[ring->first];
  429: 
  430:             cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
  431:             /* FIXME smp_wmb() */
  432:             ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
  433:         }
  434:     }
  435: #endif
  436: }
  437: 
  438: int kvm_cpu_exec(CPUState *env)
  439: {
  440:     struct kvm_run *run = env->kvm_run;
  441:     int ret;
  442: 
  443:     dprintf("kvm_cpu_exec()\n");
  444: 
  445:     do {
  446:         kvm_arch_pre_run(env, run);
  447: 
  448:         if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
  449:             dprintf("interrupt exit requested\n");
  450:             ret = 0;
  451:             break;
  452:         }
  453: 
  454:         ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
  455:         kvm_arch_post_run(env, run);
  456: 
  457:         if (ret == -EINTR || ret == -EAGAIN) {
  458:             dprintf("io window exit\n");
  459:             ret = 0;
  460:             break;
  461:         }
  462: 
  463:         if (ret < 0) {
  464:             dprintf("kvm run failed %s\n", strerror(-ret));
  465:             abort();
  466:         }
  467: 
  468:         kvm_run_coalesced_mmio(env, run);
  469: 
  470:         ret = 0; /* exit loop */
  471:         switch (run->exit_reason) {
  472:         case KVM_EXIT_IO:
  473:             dprintf("handle_io\n");
  474:             ret = kvm_handle_io(env, run->io.port,
  475:                                 (uint8_t *)run + run->io.data_offset,
  476:                                 run->io.direction,
  477:                                 run->io.size,
  478:                                 run->io.count);
  479:             break;
  480:         case KVM_EXIT_MMIO:
  481:             dprintf("handle_mmio\n");
  482:             cpu_physical_memory_rw(run->mmio.phys_addr,
  483:                                    run->mmio.data,
  484:                                    run->mmio.len,
  485:                                    run->mmio.is_write);
  486:             ret = 1;
  487:             break;
  488:         case KVM_EXIT_IRQ_WINDOW_OPEN:
  489:             dprintf("irq_window_open\n");
  490:             break;
  491:         case KVM_EXIT_SHUTDOWN:
  492:             dprintf("shutdown\n");
  493:             qemu_system_reset_request();
  494:             ret = 1;
  495:             break;
  496:         case KVM_EXIT_UNKNOWN:
  497:             dprintf("kvm_exit_unknown\n");
  498:             break;
  499:         case KVM_EXIT_FAIL_ENTRY:
  500:             dprintf("kvm_exit_fail_entry\n");
  501:             break;
  502:         case KVM_EXIT_EXCEPTION:
  503:             dprintf("kvm_exit_exception\n");
  504:             break;
  505:         case KVM_EXIT_DEBUG:
  506:             dprintf("kvm_exit_debug\n");
  507:             break;
  508:         default:
  509:             dprintf("kvm_arch_handle_exit\n");
  510:             ret = kvm_arch_handle_exit(env, run);
  511:             break;
  512:         }
  513:     } while (ret > 0);
  514: 
  515:     if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
  516:         env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
  517:         env->exception_index = EXCP_INTERRUPT;
  518:     }
  519: 
  520:     return ret;
  521: }
  522: 
  523: void kvm_set_phys_mem(target_phys_addr_t start_addr,
  524:                       ram_addr_t size,
  525:                       ram_addr_t phys_offset)
  526: {
  527:     KVMState *s = kvm_state;
  528:     ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
  529:     KVMSlot *mem;
  530: 
  531:     /* KVM does not support read-only slots */
  532:     phys_offset &= ~IO_MEM_ROM;
  533: 
  534:     mem = kvm_lookup_slot(s, start_addr);
  535:     if (mem) {
  536:         if ((flags == IO_MEM_UNASSIGNED) || (flags >= TLB_MMIO)) {
  537:             mem->memory_size = 0;
  538:             mem->start_addr = start_addr;
  539:             mem->phys_offset = 0;
  540:             mem->flags = 0;
  541: 
  542:             kvm_set_user_memory_region(s, mem);
  543:         } else if (start_addr >= mem->start_addr &&
  544:                    (start_addr + size) <= (mem->start_addr +
  545:                                            mem->memory_size)) {
  546:             KVMSlot slot;
  547:             target_phys_addr_t mem_start;
  548:             ram_addr_t mem_size, mem_offset;
  549: 
  550:             /* Not splitting */
  551:             if ((phys_offset - (start_addr - mem->start_addr)) == 
  552:                 mem->phys_offset)
  553:                 return;
  554: 
  555:             /* unregister whole slot */
  556:             memcpy(&slot, mem, sizeof(slot));
  557:             mem->memory_size = 0;
  558:             kvm_set_user_memory_region(s, mem);
  559: 
  560:             /* register prefix slot */
  561:             mem_start = slot.start_addr;
  562:             mem_size = start_addr - slot.start_addr;
  563:             mem_offset = slot.phys_offset;
  564:             if (mem_size)
  565:                 kvm_set_phys_mem(mem_start, mem_size, mem_offset);
  566: 
  567:             /* register new slot */
  568:             kvm_set_phys_mem(start_addr, size, phys_offset);
  569: 
  570:             /* register suffix slot */
  571:             mem_start = start_addr + size;
  572:             mem_offset += mem_size + size;
  573:             mem_size = slot.memory_size - mem_size - size;
  574:             if (mem_size)
  575:                 kvm_set_phys_mem(mem_start, mem_size, mem_offset);
  576: 
  577:             return;
  578:         } else {
  579:             printf("Registering overlapping slot\n");
  580:             abort();
  581:         }
  582:     }
  583:     /* KVM does not need to know about this memory */
  584:     if (flags >= IO_MEM_UNASSIGNED)
  585:         return;
  586: 
  587:     mem = kvm_alloc_slot(s);
  588:     mem->memory_size = size;
  589:     mem->start_addr = start_addr;
  590:     mem->phys_offset = phys_offset;
  591:     mem->flags = 0;
  592: 
  593:     kvm_set_user_memory_region(s, mem);
  594:     /* FIXME deal with errors */
  595: }
  596: 
  597: int kvm_ioctl(KVMState *s, int type, ...)
  598: {
  599:     int ret;
  600:     void *arg;
  601:     va_list ap;
  602: 
  603:     va_start(ap, type);
  604:     arg = va_arg(ap, void *);
  605:     va_end(ap);
  606: 
  607:     ret = ioctl(s->fd, type, arg);
  608:     if (ret == -1)
  609:         ret = -errno;
  610: 
  611:     return ret;
  612: }
  613: 
  614: int kvm_vm_ioctl(KVMState *s, int type, ...)
  615: {
  616:     int ret;
  617:     void *arg;
  618:     va_list ap;
  619: 
  620:     va_start(ap, type);
  621:     arg = va_arg(ap, void *);
  622:     va_end(ap);
  623: 
  624:     ret = ioctl(s->vmfd, type, arg);
  625:     if (ret == -1)
  626:         ret = -errno;
  627: 
  628:     return ret;
  629: }
  630: 
  631: int kvm_vcpu_ioctl(CPUState *env, int type, ...)
  632: {
  633:     int ret;
  634:     void *arg;
  635:     va_list ap;
  636: 
  637:     va_start(ap, type);
  638:     arg = va_arg(ap, void *);
  639:     va_end(ap);
  640: 
  641:     ret = ioctl(env->kvm_fd, type, arg);
  642:     if (ret == -1)
  643:         ret = -errno;
  644: 
  645:     return ret;
  646: }
  647: 
  648: int kvm_has_sync_mmu(void)
  649: {
  650: #ifdef KVM_CAP_SYNC_MMU
  651:     KVMState *s = kvm_state;
  652: 
  653:     if (kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU) > 0)
  654:         return 1;
  655: #endif
  656: 
  657:     return 0;
  658: }

unix.superglobalmegacorp.com