File:  [Qemu by Fabrice Bellard] / qemu / kvm-all.c
Revision 1.1.1.7 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 17:33:59 2018 UTC (2 years, 1 month ago) by root
Branches: qemu, MAIN
CVS tags: qemu0125, qemu0124, qemu0123, qemu0122, qemu0121, qemu0120, HEAD
qemu 0.12.0

    1: /*
    2:  * QEMU KVM support
    3:  *
    4:  * Copyright IBM, Corp. 2008
    5:  *           Red Hat, Inc. 2008
    6:  *
    7:  * Authors:
    8:  *  Anthony Liguori   <aliguori@us.ibm.com>
    9:  *  Glauber Costa     <gcosta@redhat.com>
   10:  *
   11:  * This work is licensed under the terms of the GNU GPL, version 2 or later.
   12:  * See the COPYING file in the top-level directory.
   13:  *
   14:  */
   15: 
   16: #include <sys/types.h>
   17: #include <sys/ioctl.h>
   18: #include <sys/mman.h>
   19: #include <stdarg.h>
   20: 
   21: #include <linux/kvm.h>
   22: 
   23: #include "qemu-common.h"
   24: #include "sysemu.h"
   25: #include "hw/hw.h"
   26: #include "gdbstub.h"
   27: #include "kvm.h"
   28: 
   29: /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
   30: #define PAGE_SIZE TARGET_PAGE_SIZE
   31: 
   32: //#define DEBUG_KVM
   33: 
   34: #ifdef DEBUG_KVM
   35: #define dprintf(fmt, ...) \
   36:     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
   37: #else
   38: #define dprintf(fmt, ...) \
   39:     do { } while (0)
   40: #endif
   41: 
   42: typedef struct KVMSlot
   43: {
   44:     target_phys_addr_t start_addr;
   45:     ram_addr_t memory_size;
   46:     ram_addr_t phys_offset;
   47:     int slot;
   48:     int flags;
   49: } KVMSlot;
   50: 
   51: typedef struct kvm_dirty_log KVMDirtyLog;
   52: 
   53: int kvm_allowed = 0;
   54: 
   55: struct KVMState
   56: {
   57:     KVMSlot slots[32];
   58:     int fd;
   59:     int vmfd;
   60:     int regs_modified;
   61:     int coalesced_mmio;
   62:     int broken_set_mem_region;
   63:     int migration_log;
   64:     int vcpu_events;
   65: #ifdef KVM_CAP_SET_GUEST_DEBUG
   66:     struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
   67: #endif
   68:     int irqchip_in_kernel;
   69:     int pit_in_kernel;
   70: };
   71: 
   72: static KVMState *kvm_state;
   73: 
   74: static KVMSlot *kvm_alloc_slot(KVMState *s)
   75: {
   76:     int i;
   77: 
   78:     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
   79:         /* KVM private memory slots */
   80:         if (i >= 8 && i < 12)
   81:             continue;
   82:         if (s->slots[i].memory_size == 0)
   83:             return &s->slots[i];
   84:     }
   85: 
   86:     fprintf(stderr, "%s: no free slot available\n", __func__);
   87:     abort();
   88: }
   89: 
   90: static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
   91:                                          target_phys_addr_t start_addr,
   92:                                          target_phys_addr_t end_addr)
   93: {
   94:     int i;
   95: 
   96:     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
   97:         KVMSlot *mem = &s->slots[i];
   98: 
   99:         if (start_addr == mem->start_addr &&
  100:             end_addr == mem->start_addr + mem->memory_size) {
  101:             return mem;
  102:         }
  103:     }
  104: 
  105:     return NULL;
  106: }
  107: 
  108: /*
  109:  * Find overlapping slot with lowest start address
  110:  */
  111: static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
  112:                                             target_phys_addr_t start_addr,
  113:                                             target_phys_addr_t end_addr)
  114: {
  115:     KVMSlot *found = NULL;
  116:     int i;
  117: 
  118:     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
  119:         KVMSlot *mem = &s->slots[i];
  120: 
  121:         if (mem->memory_size == 0 ||
  122:             (found && found->start_addr < mem->start_addr)) {
  123:             continue;
  124:         }
  125: 
  126:         if (end_addr > mem->start_addr &&
  127:             start_addr < mem->start_addr + mem->memory_size) {
  128:             found = mem;
  129:         }
  130:     }
  131: 
  132:     return found;
  133: }
  134: 
  135: static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
  136: {
  137:     struct kvm_userspace_memory_region mem;
  138: 
  139:     mem.slot = slot->slot;
  140:     mem.guest_phys_addr = slot->start_addr;
  141:     mem.memory_size = slot->memory_size;
  142:     mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
  143:     mem.flags = slot->flags;
  144:     if (s->migration_log) {
  145:         mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
  146:     }
  147:     return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
  148: }
  149: 
  150: static void kvm_reset_vcpu(void *opaque)
  151: {
  152:     CPUState *env = opaque;
  153: 
  154:     kvm_arch_reset_vcpu(env);
  155:     if (kvm_arch_put_registers(env)) {
  156:         fprintf(stderr, "Fatal: kvm vcpu reset failed\n");
  157:         abort();
  158:     }
  159: }
  160: 
  161: int kvm_irqchip_in_kernel(void)
  162: {
  163:     return kvm_state->irqchip_in_kernel;
  164: }
  165: 
  166: int kvm_pit_in_kernel(void)
  167: {
  168:     return kvm_state->pit_in_kernel;
  169: }
  170: 
  171: 
  172: int kvm_init_vcpu(CPUState *env)
  173: {
  174:     KVMState *s = kvm_state;
  175:     long mmap_size;
  176:     int ret;
  177: 
  178:     dprintf("kvm_init_vcpu\n");
  179: 
  180:     ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
  181:     if (ret < 0) {
  182:         dprintf("kvm_create_vcpu failed\n");
  183:         goto err;
  184:     }
  185: 
  186:     env->kvm_fd = ret;
  187:     env->kvm_state = s;
  188: 
  189:     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
  190:     if (mmap_size < 0) {
  191:         dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
  192:         goto err;
  193:     }
  194: 
  195:     env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
  196:                         env->kvm_fd, 0);
  197:     if (env->kvm_run == MAP_FAILED) {
  198:         ret = -errno;
  199:         dprintf("mmap'ing vcpu state failed\n");
  200:         goto err;
  201:     }
  202: 
  203:     ret = kvm_arch_init_vcpu(env);
  204:     if (ret == 0) {
  205:         qemu_register_reset(kvm_reset_vcpu, env);
  206:         kvm_arch_reset_vcpu(env);
  207:         ret = kvm_arch_put_registers(env);
  208:     }
  209: err:
  210:     return ret;
  211: }
  212: 
  213: /*
  214:  * dirty pages logging control
  215:  */
  216: static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
  217:                                       ram_addr_t size, int flags, int mask)
  218: {
  219:     KVMState *s = kvm_state;
  220:     KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
  221:     int old_flags;
  222: 
  223:     if (mem == NULL)  {
  224:             fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
  225:                     TARGET_FMT_plx "\n", __func__, phys_addr,
  226:                     (target_phys_addr_t)(phys_addr + size - 1));
  227:             return -EINVAL;
  228:     }
  229: 
  230:     old_flags = mem->flags;
  231: 
  232:     flags = (mem->flags & ~mask) | flags;
  233:     mem->flags = flags;
  234: 
  235:     /* If nothing changed effectively, no need to issue ioctl */
  236:     if (s->migration_log) {
  237:         flags |= KVM_MEM_LOG_DIRTY_PAGES;
  238:     }
  239:     if (flags == old_flags) {
  240:             return 0;
  241:     }
  242: 
  243:     return kvm_set_user_memory_region(s, mem);
  244: }
  245: 
  246: int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
  247: {
  248:         return kvm_dirty_pages_log_change(phys_addr, size,
  249:                                           KVM_MEM_LOG_DIRTY_PAGES,
  250:                                           KVM_MEM_LOG_DIRTY_PAGES);
  251: }
  252: 
  253: int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
  254: {
  255:         return kvm_dirty_pages_log_change(phys_addr, size,
  256:                                           0,
  257:                                           KVM_MEM_LOG_DIRTY_PAGES);
  258: }
  259: 
  260: int kvm_set_migration_log(int enable)
  261: {
  262:     KVMState *s = kvm_state;
  263:     KVMSlot *mem;
  264:     int i, err;
  265: 
  266:     s->migration_log = enable;
  267: 
  268:     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
  269:         mem = &s->slots[i];
  270: 
  271:         if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
  272:             continue;
  273:         }
  274:         err = kvm_set_user_memory_region(s, mem);
  275:         if (err) {
  276:             return err;
  277:         }
  278:     }
  279:     return 0;
  280: }
  281: 
  282: static int test_le_bit(unsigned long nr, unsigned char *addr)
  283: {
  284:     return (addr[nr >> 3] >> (nr & 7)) & 1;
  285: }
  286: 
  287: /**
  288:  * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
  289:  * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
  290:  * This means all bits are set to dirty.
  291:  *
  292:  * @start_add: start of logged region.
  293:  * @end_addr: end of logged region.
  294:  */
  295: int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
  296:                                    target_phys_addr_t end_addr)
  297: {
  298:     KVMState *s = kvm_state;
  299:     unsigned long size, allocated_size = 0;
  300:     target_phys_addr_t phys_addr;
  301:     ram_addr_t addr;
  302:     KVMDirtyLog d;
  303:     KVMSlot *mem;
  304:     int ret = 0;
  305: 
  306:     d.dirty_bitmap = NULL;
  307:     while (start_addr < end_addr) {
  308:         mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
  309:         if (mem == NULL) {
  310:             break;
  311:         }
  312: 
  313:         size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
  314:         if (!d.dirty_bitmap) {
  315:             d.dirty_bitmap = qemu_malloc(size);
  316:         } else if (size > allocated_size) {
  317:             d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
  318:         }
  319:         allocated_size = size;
  320:         memset(d.dirty_bitmap, 0, allocated_size);
  321: 
  322:         d.slot = mem->slot;
  323: 
  324:         if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
  325:             dprintf("ioctl failed %d\n", errno);
  326:             ret = -1;
  327:             break;
  328:         }
  329: 
  330:         for (phys_addr = mem->start_addr, addr = mem->phys_offset;
  331:              phys_addr < mem->start_addr + mem->memory_size;
  332:              phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
  333:             unsigned char *bitmap = (unsigned char *)d.dirty_bitmap;
  334:             unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
  335: 
  336:             if (test_le_bit(nr, bitmap)) {
  337:                 cpu_physical_memory_set_dirty(addr);
  338:             }
  339:         }
  340:         start_addr = phys_addr;
  341:     }
  342:     qemu_free(d.dirty_bitmap);
  343: 
  344:     return ret;
  345: }
  346: 
  347: int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
  348: {
  349:     int ret = -ENOSYS;
  350: #ifdef KVM_CAP_COALESCED_MMIO
  351:     KVMState *s = kvm_state;
  352: 
  353:     if (s->coalesced_mmio) {
  354:         struct kvm_coalesced_mmio_zone zone;
  355: 
  356:         zone.addr = start;
  357:         zone.size = size;
  358: 
  359:         ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
  360:     }
  361: #endif
  362: 
  363:     return ret;
  364: }
  365: 
  366: int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
  367: {
  368:     int ret = -ENOSYS;
  369: #ifdef KVM_CAP_COALESCED_MMIO
  370:     KVMState *s = kvm_state;
  371: 
  372:     if (s->coalesced_mmio) {
  373:         struct kvm_coalesced_mmio_zone zone;
  374: 
  375:         zone.addr = start;
  376:         zone.size = size;
  377: 
  378:         ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
  379:     }
  380: #endif
  381: 
  382:     return ret;
  383: }
  384: 
  385: int kvm_check_extension(KVMState *s, unsigned int extension)
  386: {
  387:     int ret;
  388: 
  389:     ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
  390:     if (ret < 0) {
  391:         ret = 0;
  392:     }
  393: 
  394:     return ret;
  395: }
  396: 
  397: int kvm_init(int smp_cpus)
  398: {
  399:     static const char upgrade_note[] =
  400:         "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
  401:         "(see http://sourceforge.net/projects/kvm).\n";
  402:     KVMState *s;
  403:     int ret;
  404:     int i;
  405: 
  406:     if (smp_cpus > 1) {
  407:         fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
  408:         return -EINVAL;
  409:     }
  410: 
  411:     s = qemu_mallocz(sizeof(KVMState));
  412: 
  413: #ifdef KVM_CAP_SET_GUEST_DEBUG
  414:     QTAILQ_INIT(&s->kvm_sw_breakpoints);
  415: #endif
  416:     for (i = 0; i < ARRAY_SIZE(s->slots); i++)
  417:         s->slots[i].slot = i;
  418: 
  419:     s->vmfd = -1;
  420:     s->fd = qemu_open("/dev/kvm", O_RDWR);
  421:     if (s->fd == -1) {
  422:         fprintf(stderr, "Could not access KVM kernel module: %m\n");
  423:         ret = -errno;
  424:         goto err;
  425:     }
  426: 
  427:     ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
  428:     if (ret < KVM_API_VERSION) {
  429:         if (ret > 0)
  430:             ret = -EINVAL;
  431:         fprintf(stderr, "kvm version too old\n");
  432:         goto err;
  433:     }
  434: 
  435:     if (ret > KVM_API_VERSION) {
  436:         ret = -EINVAL;
  437:         fprintf(stderr, "kvm version not supported\n");
  438:         goto err;
  439:     }
  440: 
  441:     s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
  442:     if (s->vmfd < 0)
  443:         goto err;
  444: 
  445:     /* initially, KVM allocated its own memory and we had to jump through
  446:      * hooks to make phys_ram_base point to this.  Modern versions of KVM
  447:      * just use a user allocated buffer so we can use regular pages
  448:      * unmodified.  Make sure we have a sufficiently modern version of KVM.
  449:      */
  450:     if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
  451:         ret = -EINVAL;
  452:         fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
  453:                 upgrade_note);
  454:         goto err;
  455:     }
  456: 
  457:     /* There was a nasty bug in < kvm-80 that prevents memory slots from being
  458:      * destroyed properly.  Since we rely on this capability, refuse to work
  459:      * with any kernel without this capability. */
  460:     if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
  461:         ret = -EINVAL;
  462: 
  463:         fprintf(stderr,
  464:                 "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
  465:                 upgrade_note);
  466:         goto err;
  467:     }
  468: 
  469: #ifdef KVM_CAP_COALESCED_MMIO
  470:     s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
  471: #else
  472:     s->coalesced_mmio = 0;
  473: #endif
  474: 
  475:     s->broken_set_mem_region = 1;
  476: #ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
  477:     ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
  478:     if (ret > 0) {
  479:         s->broken_set_mem_region = 0;
  480:     }
  481: #endif
  482: 
  483:     s->vcpu_events = 0;
  484: #ifdef KVM_CAP_VCPU_EVENTS
  485:     s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
  486: #endif
  487: 
  488:     ret = kvm_arch_init(s, smp_cpus);
  489:     if (ret < 0)
  490:         goto err;
  491: 
  492:     kvm_state = s;
  493: 
  494:     return 0;
  495: 
  496: err:
  497:     if (s) {
  498:         if (s->vmfd != -1)
  499:             close(s->vmfd);
  500:         if (s->fd != -1)
  501:             close(s->fd);
  502:     }
  503:     qemu_free(s);
  504: 
  505:     return ret;
  506: }
  507: 
  508: static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
  509:                          uint32_t count)
  510: {
  511:     int i;
  512:     uint8_t *ptr = data;
  513: 
  514:     for (i = 0; i < count; i++) {
  515:         if (direction == KVM_EXIT_IO_IN) {
  516:             switch (size) {
  517:             case 1:
  518:                 stb_p(ptr, cpu_inb(port));
  519:                 break;
  520:             case 2:
  521:                 stw_p(ptr, cpu_inw(port));
  522:                 break;
  523:             case 4:
  524:                 stl_p(ptr, cpu_inl(port));
  525:                 break;
  526:             }
  527:         } else {
  528:             switch (size) {
  529:             case 1:
  530:                 cpu_outb(port, ldub_p(ptr));
  531:                 break;
  532:             case 2:
  533:                 cpu_outw(port, lduw_p(ptr));
  534:                 break;
  535:             case 4:
  536:                 cpu_outl(port, ldl_p(ptr));
  537:                 break;
  538:             }
  539:         }
  540: 
  541:         ptr += size;
  542:     }
  543: 
  544:     return 1;
  545: }
  546: 
  547: static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
  548: {
  549: #ifdef KVM_CAP_COALESCED_MMIO
  550:     KVMState *s = kvm_state;
  551:     if (s->coalesced_mmio) {
  552:         struct kvm_coalesced_mmio_ring *ring;
  553: 
  554:         ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
  555:         while (ring->first != ring->last) {
  556:             struct kvm_coalesced_mmio *ent;
  557: 
  558:             ent = &ring->coalesced_mmio[ring->first];
  559: 
  560:             cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
  561:             /* FIXME smp_wmb() */
  562:             ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
  563:         }
  564:     }
  565: #endif
  566: }
  567: 
  568: void kvm_cpu_synchronize_state(CPUState *env)
  569: {
  570:     if (!env->kvm_state->regs_modified) {
  571:         kvm_arch_get_registers(env);
  572:         env->kvm_state->regs_modified = 1;
  573:     }
  574: }
  575: 
  576: int kvm_cpu_exec(CPUState *env)
  577: {
  578:     struct kvm_run *run = env->kvm_run;
  579:     int ret;
  580: 
  581:     dprintf("kvm_cpu_exec()\n");
  582: 
  583:     do {
  584:         if (env->exit_request) {
  585:             dprintf("interrupt exit requested\n");
  586:             ret = 0;
  587:             break;
  588:         }
  589: 
  590:         if (env->kvm_state->regs_modified) {
  591:             kvm_arch_put_registers(env);
  592:             env->kvm_state->regs_modified = 0;
  593:         }
  594: 
  595:         kvm_arch_pre_run(env, run);
  596:         qemu_mutex_unlock_iothread();
  597:         ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
  598:         qemu_mutex_lock_iothread();
  599:         kvm_arch_post_run(env, run);
  600: 
  601:         if (ret == -EINTR || ret == -EAGAIN) {
  602:             dprintf("io window exit\n");
  603:             ret = 0;
  604:             break;
  605:         }
  606: 
  607:         if (ret < 0) {
  608:             dprintf("kvm run failed %s\n", strerror(-ret));
  609:             abort();
  610:         }
  611: 
  612:         kvm_run_coalesced_mmio(env, run);
  613: 
  614:         ret = 0; /* exit loop */
  615:         switch (run->exit_reason) {
  616:         case KVM_EXIT_IO:
  617:             dprintf("handle_io\n");
  618:             ret = kvm_handle_io(run->io.port,
  619:                                 (uint8_t *)run + run->io.data_offset,
  620:                                 run->io.direction,
  621:                                 run->io.size,
  622:                                 run->io.count);
  623:             break;
  624:         case KVM_EXIT_MMIO:
  625:             dprintf("handle_mmio\n");
  626:             cpu_physical_memory_rw(run->mmio.phys_addr,
  627:                                    run->mmio.data,
  628:                                    run->mmio.len,
  629:                                    run->mmio.is_write);
  630:             ret = 1;
  631:             break;
  632:         case KVM_EXIT_IRQ_WINDOW_OPEN:
  633:             dprintf("irq_window_open\n");
  634:             break;
  635:         case KVM_EXIT_SHUTDOWN:
  636:             dprintf("shutdown\n");
  637:             qemu_system_reset_request();
  638:             ret = 1;
  639:             break;
  640:         case KVM_EXIT_UNKNOWN:
  641:             dprintf("kvm_exit_unknown\n");
  642:             break;
  643:         case KVM_EXIT_FAIL_ENTRY:
  644:             dprintf("kvm_exit_fail_entry\n");
  645:             break;
  646:         case KVM_EXIT_EXCEPTION:
  647:             dprintf("kvm_exit_exception\n");
  648:             break;
  649:         case KVM_EXIT_DEBUG:
  650:             dprintf("kvm_exit_debug\n");
  651: #ifdef KVM_CAP_SET_GUEST_DEBUG
  652:             if (kvm_arch_debug(&run->debug.arch)) {
  653:                 gdb_set_stop_cpu(env);
  654:                 vm_stop(EXCP_DEBUG);
  655:                 env->exception_index = EXCP_DEBUG;
  656:                 return 0;
  657:             }
  658:             /* re-enter, this exception was guest-internal */
  659:             ret = 1;
  660: #endif /* KVM_CAP_SET_GUEST_DEBUG */
  661:             break;
  662:         default:
  663:             dprintf("kvm_arch_handle_exit\n");
  664:             ret = kvm_arch_handle_exit(env, run);
  665:             break;
  666:         }
  667:     } while (ret > 0);
  668: 
  669:     if (env->exit_request) {
  670:         env->exit_request = 0;
  671:         env->exception_index = EXCP_INTERRUPT;
  672:     }
  673: 
  674:     return ret;
  675: }
  676: 
  677: void kvm_set_phys_mem(target_phys_addr_t start_addr,
  678:                       ram_addr_t size,
  679:                       ram_addr_t phys_offset)
  680: {
  681:     KVMState *s = kvm_state;
  682:     ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
  683:     KVMSlot *mem, old;
  684:     int err;
  685: 
  686:     if (start_addr & ~TARGET_PAGE_MASK) {
  687:         if (flags >= IO_MEM_UNASSIGNED) {
  688:             if (!kvm_lookup_overlapping_slot(s, start_addr,
  689:                                              start_addr + size)) {
  690:                 return;
  691:             }
  692:             fprintf(stderr, "Unaligned split of a KVM memory slot\n");
  693:         } else {
  694:             fprintf(stderr, "Only page-aligned memory slots supported\n");
  695:         }
  696:         abort();
  697:     }
  698: 
  699:     /* KVM does not support read-only slots */
  700:     phys_offset &= ~IO_MEM_ROM;
  701: 
  702:     while (1) {
  703:         mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
  704:         if (!mem) {
  705:             break;
  706:         }
  707: 
  708:         if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
  709:             (start_addr + size <= mem->start_addr + mem->memory_size) &&
  710:             (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
  711:             /* The new slot fits into the existing one and comes with
  712:              * identical parameters - nothing to be done. */
  713:             return;
  714:         }
  715: 
  716:         old = *mem;
  717: 
  718:         /* unregister the overlapping slot */
  719:         mem->memory_size = 0;
  720:         err = kvm_set_user_memory_region(s, mem);
  721:         if (err) {
  722:             fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
  723:                     __func__, strerror(-err));
  724:             abort();
  725:         }
  726: 
  727:         /* Workaround for older KVM versions: we can't join slots, even not by
  728:          * unregistering the previous ones and then registering the larger
  729:          * slot. We have to maintain the existing fragmentation. Sigh.
  730:          *
  731:          * This workaround assumes that the new slot starts at the same
  732:          * address as the first existing one. If not or if some overlapping
  733:          * slot comes around later, we will fail (not seen in practice so far)
  734:          * - and actually require a recent KVM version. */
  735:         if (s->broken_set_mem_region &&
  736:             old.start_addr == start_addr && old.memory_size < size &&
  737:             flags < IO_MEM_UNASSIGNED) {
  738:             mem = kvm_alloc_slot(s);
  739:             mem->memory_size = old.memory_size;
  740:             mem->start_addr = old.start_addr;
  741:             mem->phys_offset = old.phys_offset;
  742:             mem->flags = 0;
  743: 
  744:             err = kvm_set_user_memory_region(s, mem);
  745:             if (err) {
  746:                 fprintf(stderr, "%s: error updating slot: %s\n", __func__,
  747:                         strerror(-err));
  748:                 abort();
  749:             }
  750: 
  751:             start_addr += old.memory_size;
  752:             phys_offset += old.memory_size;
  753:             size -= old.memory_size;
  754:             continue;
  755:         }
  756: 
  757:         /* register prefix slot */
  758:         if (old.start_addr < start_addr) {
  759:             mem = kvm_alloc_slot(s);
  760:             mem->memory_size = start_addr - old.start_addr;
  761:             mem->start_addr = old.start_addr;
  762:             mem->phys_offset = old.phys_offset;
  763:             mem->flags = 0;
  764: 
  765:             err = kvm_set_user_memory_region(s, mem);
  766:             if (err) {
  767:                 fprintf(stderr, "%s: error registering prefix slot: %s\n",
  768:                         __func__, strerror(-err));
  769:                 abort();
  770:             }
  771:         }
  772: 
  773:         /* register suffix slot */
  774:         if (old.start_addr + old.memory_size > start_addr + size) {
  775:             ram_addr_t size_delta;
  776: 
  777:             mem = kvm_alloc_slot(s);
  778:             mem->start_addr = start_addr + size;
  779:             size_delta = mem->start_addr - old.start_addr;
  780:             mem->memory_size = old.memory_size - size_delta;
  781:             mem->phys_offset = old.phys_offset + size_delta;
  782:             mem->flags = 0;
  783: 
  784:             err = kvm_set_user_memory_region(s, mem);
  785:             if (err) {
  786:                 fprintf(stderr, "%s: error registering suffix slot: %s\n",
  787:                         __func__, strerror(-err));
  788:                 abort();
  789:             }
  790:         }
  791:     }
  792: 
  793:     /* in case the KVM bug workaround already "consumed" the new slot */
  794:     if (!size)
  795:         return;
  796: 
  797:     /* KVM does not need to know about this memory */
  798:     if (flags >= IO_MEM_UNASSIGNED)
  799:         return;
  800: 
  801:     mem = kvm_alloc_slot(s);
  802:     mem->memory_size = size;
  803:     mem->start_addr = start_addr;
  804:     mem->phys_offset = phys_offset;
  805:     mem->flags = 0;
  806: 
  807:     err = kvm_set_user_memory_region(s, mem);
  808:     if (err) {
  809:         fprintf(stderr, "%s: error registering slot: %s\n", __func__,
  810:                 strerror(-err));
  811:         abort();
  812:     }
  813: }
  814: 
  815: int kvm_ioctl(KVMState *s, int type, ...)
  816: {
  817:     int ret;
  818:     void *arg;
  819:     va_list ap;
  820: 
  821:     va_start(ap, type);
  822:     arg = va_arg(ap, void *);
  823:     va_end(ap);
  824: 
  825:     ret = ioctl(s->fd, type, arg);
  826:     if (ret == -1)
  827:         ret = -errno;
  828: 
  829:     return ret;
  830: }
  831: 
  832: int kvm_vm_ioctl(KVMState *s, int type, ...)
  833: {
  834:     int ret;
  835:     void *arg;
  836:     va_list ap;
  837: 
  838:     va_start(ap, type);
  839:     arg = va_arg(ap, void *);
  840:     va_end(ap);
  841: 
  842:     ret = ioctl(s->vmfd, type, arg);
  843:     if (ret == -1)
  844:         ret = -errno;
  845: 
  846:     return ret;
  847: }
  848: 
  849: int kvm_vcpu_ioctl(CPUState *env, int type, ...)
  850: {
  851:     int ret;
  852:     void *arg;
  853:     va_list ap;
  854: 
  855:     va_start(ap, type);
  856:     arg = va_arg(ap, void *);
  857:     va_end(ap);
  858: 
  859:     ret = ioctl(env->kvm_fd, type, arg);
  860:     if (ret == -1)
  861:         ret = -errno;
  862: 
  863:     return ret;
  864: }
  865: 
  866: int kvm_has_sync_mmu(void)
  867: {
  868: #ifdef KVM_CAP_SYNC_MMU
  869:     KVMState *s = kvm_state;
  870: 
  871:     return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
  872: #else
  873:     return 0;
  874: #endif
  875: }
  876: 
  877: int kvm_has_vcpu_events(void)
  878: {
  879:     return kvm_state->vcpu_events;
  880: }
  881: 
  882: void kvm_setup_guest_memory(void *start, size_t size)
  883: {
  884:     if (!kvm_has_sync_mmu()) {
  885: #ifdef MADV_DONTFORK
  886:         int ret = madvise(start, size, MADV_DONTFORK);
  887: 
  888:         if (ret) {
  889:             perror("madvice");
  890:             exit(1);
  891:         }
  892: #else
  893:         fprintf(stderr,
  894:                 "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
  895:         exit(1);
  896: #endif
  897:     }
  898: }
  899: 
  900: #ifdef KVM_CAP_SET_GUEST_DEBUG
  901: static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
  902: {
  903: #ifdef CONFIG_IOTHREAD
  904:     if (env == cpu_single_env) {
  905:         func(data);
  906:         return;
  907:     }
  908:     abort();
  909: #else
  910:     func(data);
  911: #endif
  912: }
  913: 
  914: struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
  915:                                                  target_ulong pc)
  916: {
  917:     struct kvm_sw_breakpoint *bp;
  918: 
  919:     QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
  920:         if (bp->pc == pc)
  921:             return bp;
  922:     }
  923:     return NULL;
  924: }
  925: 
  926: int kvm_sw_breakpoints_active(CPUState *env)
  927: {
  928:     return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
  929: }
  930: 
  931: struct kvm_set_guest_debug_data {
  932:     struct kvm_guest_debug dbg;
  933:     CPUState *env;
  934:     int err;
  935: };
  936: 
  937: static void kvm_invoke_set_guest_debug(void *data)
  938: {
  939:     struct kvm_set_guest_debug_data *dbg_data = data;
  940:     CPUState *env = dbg_data->env;
  941: 
  942:     if (env->kvm_state->regs_modified) {
  943:         kvm_arch_put_registers(env);
  944:         env->kvm_state->regs_modified = 0;
  945:     }
  946:     dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
  947: }
  948: 
  949: int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
  950: {
  951:     struct kvm_set_guest_debug_data data;
  952: 
  953:     data.dbg.control = 0;
  954:     if (env->singlestep_enabled)
  955:         data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
  956: 
  957:     kvm_arch_update_guest_debug(env, &data.dbg);
  958:     data.dbg.control |= reinject_trap;
  959:     data.env = env;
  960: 
  961:     on_vcpu(env, kvm_invoke_set_guest_debug, &data);
  962:     return data.err;
  963: }
  964: 
  965: int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
  966:                           target_ulong len, int type)
  967: {
  968:     struct kvm_sw_breakpoint *bp;
  969:     CPUState *env;
  970:     int err;
  971: 
  972:     if (type == GDB_BREAKPOINT_SW) {
  973:         bp = kvm_find_sw_breakpoint(current_env, addr);
  974:         if (bp) {
  975:             bp->use_count++;
  976:             return 0;
  977:         }
  978: 
  979:         bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
  980:         if (!bp)
  981:             return -ENOMEM;
  982: 
  983:         bp->pc = addr;
  984:         bp->use_count = 1;
  985:         err = kvm_arch_insert_sw_breakpoint(current_env, bp);
  986:         if (err) {
  987:             free(bp);
  988:             return err;
  989:         }
  990: 
  991:         QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
  992:                           bp, entry);
  993:     } else {
  994:         err = kvm_arch_insert_hw_breakpoint(addr, len, type);
  995:         if (err)
  996:             return err;
  997:     }
  998: 
  999:     for (env = first_cpu; env != NULL; env = env->next_cpu) {
 1000:         err = kvm_update_guest_debug(env, 0);
 1001:         if (err)
 1002:             return err;
 1003:     }
 1004:     return 0;
 1005: }
 1006: 
 1007: int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
 1008:                           target_ulong len, int type)
 1009: {
 1010:     struct kvm_sw_breakpoint *bp;
 1011:     CPUState *env;
 1012:     int err;
 1013: 
 1014:     if (type == GDB_BREAKPOINT_SW) {
 1015:         bp = kvm_find_sw_breakpoint(current_env, addr);
 1016:         if (!bp)
 1017:             return -ENOENT;
 1018: 
 1019:         if (bp->use_count > 1) {
 1020:             bp->use_count--;
 1021:             return 0;
 1022:         }
 1023: 
 1024:         err = kvm_arch_remove_sw_breakpoint(current_env, bp);
 1025:         if (err)
 1026:             return err;
 1027: 
 1028:         QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
 1029:         qemu_free(bp);
 1030:     } else {
 1031:         err = kvm_arch_remove_hw_breakpoint(addr, len, type);
 1032:         if (err)
 1033:             return err;
 1034:     }
 1035: 
 1036:     for (env = first_cpu; env != NULL; env = env->next_cpu) {
 1037:         err = kvm_update_guest_debug(env, 0);
 1038:         if (err)
 1039:             return err;
 1040:     }
 1041:     return 0;
 1042: }
 1043: 
 1044: void kvm_remove_all_breakpoints(CPUState *current_env)
 1045: {
 1046:     struct kvm_sw_breakpoint *bp, *next;
 1047:     KVMState *s = current_env->kvm_state;
 1048:     CPUState *env;
 1049: 
 1050:     QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
 1051:         if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
 1052:             /* Try harder to find a CPU that currently sees the breakpoint. */
 1053:             for (env = first_cpu; env != NULL; env = env->next_cpu) {
 1054:                 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
 1055:                     break;
 1056:             }
 1057:         }
 1058:     }
 1059:     kvm_arch_remove_all_hw_breakpoints();
 1060: 
 1061:     for (env = first_cpu; env != NULL; env = env->next_cpu)
 1062:         kvm_update_guest_debug(env, 0);
 1063: }
 1064: 
 1065: #else /* !KVM_CAP_SET_GUEST_DEBUG */
 1066: 
 1067: int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
 1068: {
 1069:     return -EINVAL;
 1070: }
 1071: 
 1072: int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
 1073:                           target_ulong len, int type)
 1074: {
 1075:     return -EINVAL;
 1076: }
 1077: 
 1078: int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
 1079:                           target_ulong len, int type)
 1080: {
 1081:     return -EINVAL;
 1082: }
 1083: 
 1084: void kvm_remove_all_breakpoints(CPUState *current_env)
 1085: {
 1086: }
 1087: #endif /* !KVM_CAP_SET_GUEST_DEBUG */

unix.superglobalmegacorp.com