File:  [Qemu by Fabrice Bellard] / qemu / hw / spapr.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 19:50:12 2018 UTC (2 years, 7 months ago) by root
Branches: qemu, MAIN
CVS tags: qemu1101, HEAD
qemu 1.1.1

    1: /*
    2:  * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
    3:  *
    4:  * Copyright (c) 2004-2007 Fabrice Bellard
    5:  * Copyright (c) 2007 Jocelyn Mayer
    6:  * Copyright (c) 2010 David Gibson, IBM Corporation.
    7:  *
    8:  * Permission is hereby granted, free of charge, to any person obtaining a copy
    9:  * of this software and associated documentation files (the "Software"), to deal
   10:  * in the Software without restriction, including without limitation the rights
   11:  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   12:  * copies of the Software, and to permit persons to whom the Software is
   13:  * furnished to do so, subject to the following conditions:
   14:  *
   15:  * The above copyright notice and this permission notice shall be included in
   16:  * all copies or substantial portions of the Software.
   17:  *
   18:  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   19:  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   20:  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
   21:  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   22:  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   23:  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
   24:  * THE SOFTWARE.
   25:  *
   26:  */
   27: #include "sysemu.h"
   28: #include "hw.h"
   29: #include "elf.h"
   30: #include "net.h"
   31: #include "blockdev.h"
   32: #include "cpus.h"
   33: #include "kvm.h"
   34: #include "kvm_ppc.h"
   35: 
   36: #include "hw/boards.h"
   37: #include "hw/ppc.h"
   38: #include "hw/loader.h"
   39: 
   40: #include "hw/spapr.h"
   41: #include "hw/spapr_vio.h"
   42: #include "hw/spapr_pci.h"
   43: #include "hw/xics.h"
   44: 
   45: #include "kvm.h"
   46: #include "kvm_ppc.h"
   47: #include "pci.h"
   48: 
   49: #include "exec-memory.h"
   50: 
   51: #include <libfdt.h>
   52: 
   53: /* SLOF memory layout:
   54:  *
   55:  * SLOF raw image loaded at 0, copies its romfs right below the flat
   56:  * device-tree, then position SLOF itself 31M below that
   57:  *
   58:  * So we set FW_OVERHEAD to 40MB which should account for all of that
   59:  * and more
   60:  *
   61:  * We load our kernel at 4M, leaving space for SLOF initial image
   62:  */
   63: #define FDT_MAX_SIZE            0x10000
   64: #define RTAS_MAX_SIZE           0x10000
   65: #define FW_MAX_SIZE             0x400000
   66: #define FW_FILE_NAME            "slof.bin"
   67: #define FW_OVERHEAD             0x2800000
   68: #define KERNEL_LOAD_ADDR        FW_MAX_SIZE
   69: 
   70: #define MIN_RMA_SLOF            128UL
   71: 
   72: #define TIMEBASE_FREQ           512000000ULL
   73: 
   74: #define MAX_CPUS                256
   75: #define XICS_IRQS               1024
   76: 
   77: #define SPAPR_PCI_BUID          0x800000020000001ULL
   78: #define SPAPR_PCI_MEM_WIN_ADDR  (0x10000000000ULL + 0xA0000000)
   79: #define SPAPR_PCI_MEM_WIN_SIZE  0x20000000
   80: #define SPAPR_PCI_IO_WIN_ADDR   (0x10000000000ULL + 0x80000000)
   81: 
   82: #define PHANDLE_XICP            0x00001111
   83: 
   84: sPAPREnvironment *spapr;
   85: 
   86: qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t *irq_num,
   87:                             enum xics_irq_type type)
   88: {
   89:     uint32_t irq;
   90:     qemu_irq qirq;
   91: 
   92:     if (hint) {
   93:         irq = hint;
   94:         /* FIXME: we should probably check for collisions somehow */
   95:     } else {
   96:         irq = spapr->next_irq++;
   97:     }
   98: 
   99:     qirq = xics_assign_irq(spapr->icp, irq, type);
  100:     if (!qirq) {
  101:         return NULL;
  102:     }
  103: 
  104:     if (irq_num) {
  105:         *irq_num = irq;
  106:     }
  107: 
  108:     return qirq;
  109: }
  110: 
  111: static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr)
  112: {
  113:     int ret = 0, offset;
  114:     CPUPPCState *env;
  115:     char cpu_model[32];
  116:     int smt = kvmppc_smt_threads();
  117: 
  118:     assert(spapr->cpu_model);
  119: 
  120:     for (env = first_cpu; env != NULL; env = env->next_cpu) {
  121:         uint32_t associativity[] = {cpu_to_be32(0x5),
  122:                                     cpu_to_be32(0x0),
  123:                                     cpu_to_be32(0x0),
  124:                                     cpu_to_be32(0x0),
  125:                                     cpu_to_be32(env->numa_node),
  126:                                     cpu_to_be32(env->cpu_index)};
  127: 
  128:         if ((env->cpu_index % smt) != 0) {
  129:             continue;
  130:         }
  131: 
  132:         snprintf(cpu_model, 32, "/cpus/%s@%x", spapr->cpu_model,
  133:                  env->cpu_index);
  134: 
  135:         offset = fdt_path_offset(fdt, cpu_model);
  136:         if (offset < 0) {
  137:             return offset;
  138:         }
  139: 
  140:         ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
  141:                           sizeof(associativity));
  142:         if (ret < 0) {
  143:             return ret;
  144:         }
  145:     }
  146:     return ret;
  147: }
  148: 
  149: static void *spapr_create_fdt_skel(const char *cpu_model,
  150:                                    target_phys_addr_t rma_size,
  151:                                    target_phys_addr_t initrd_base,
  152:                                    target_phys_addr_t initrd_size,
  153:                                    target_phys_addr_t kernel_size,
  154:                                    const char *boot_device,
  155:                                    const char *kernel_cmdline,
  156:                                    long hash_shift)
  157: {
  158:     void *fdt;
  159:     CPUPPCState *env;
  160:     uint64_t mem_reg_property[2];
  161:     uint32_t start_prop = cpu_to_be32(initrd_base);
  162:     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
  163:     uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
  164:     char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
  165:         "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
  166:     uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
  167:     int i;
  168:     char *modelname;
  169:     int smt = kvmppc_smt_threads();
  170:     unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
  171:     uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
  172:     uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
  173:                                 cpu_to_be32(0x0), cpu_to_be32(0x0),
  174:                                 cpu_to_be32(0x0)};
  175:     char mem_name[32];
  176:     target_phys_addr_t node0_size, mem_start;
  177: 
  178: #define _FDT(exp) \
  179:     do { \
  180:         int ret = (exp);                                           \
  181:         if (ret < 0) {                                             \
  182:             fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
  183:                     #exp, fdt_strerror(ret));                      \
  184:             exit(1);                                               \
  185:         }                                                          \
  186:     } while (0)
  187: 
  188:     fdt = g_malloc0(FDT_MAX_SIZE);
  189:     _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
  190: 
  191:     if (kernel_size) {
  192:         _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
  193:     }
  194:     if (initrd_size) {
  195:         _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
  196:     }
  197:     _FDT((fdt_finish_reservemap(fdt)));
  198: 
  199:     /* Root node */
  200:     _FDT((fdt_begin_node(fdt, "")));
  201:     _FDT((fdt_property_string(fdt, "device_type", "chrp")));
  202:     _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
  203: 
  204:     _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
  205:     _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
  206: 
  207:     /* /chosen */
  208:     _FDT((fdt_begin_node(fdt, "chosen")));
  209: 
  210:     /* Set Form1_affinity */
  211:     _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));
  212: 
  213:     _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
  214:     _FDT((fdt_property(fdt, "linux,initrd-start",
  215:                        &start_prop, sizeof(start_prop))));
  216:     _FDT((fdt_property(fdt, "linux,initrd-end",
  217:                        &end_prop, sizeof(end_prop))));
  218:     if (kernel_size) {
  219:         uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
  220:                               cpu_to_be64(kernel_size) };
  221: 
  222:         _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
  223:     }
  224:     _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
  225: 
  226:     _FDT((fdt_end_node(fdt)));
  227: 
  228:     /* memory node(s) */
  229:     node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
  230:     if (rma_size > node0_size) {
  231:         rma_size = node0_size;
  232:     }
  233: 
  234:     /* RMA */
  235:     mem_reg_property[0] = 0;
  236:     mem_reg_property[1] = cpu_to_be64(rma_size);
  237:     _FDT((fdt_begin_node(fdt, "memory@0")));
  238:     _FDT((fdt_property_string(fdt, "device_type", "memory")));
  239:     _FDT((fdt_property(fdt, "reg", mem_reg_property,
  240:         sizeof(mem_reg_property))));
  241:     _FDT((fdt_property(fdt, "ibm,associativity", associativity,
  242:         sizeof(associativity))));
  243:     _FDT((fdt_end_node(fdt)));
  244: 
  245:     /* RAM: Node 0 */
  246:     if (node0_size > rma_size) {
  247:         mem_reg_property[0] = cpu_to_be64(rma_size);
  248:         mem_reg_property[1] = cpu_to_be64(node0_size - rma_size);
  249: 
  250:         sprintf(mem_name, "memory@" TARGET_FMT_lx, rma_size);
  251:         _FDT((fdt_begin_node(fdt, mem_name)));
  252:         _FDT((fdt_property_string(fdt, "device_type", "memory")));
  253:         _FDT((fdt_property(fdt, "reg", mem_reg_property,
  254:                            sizeof(mem_reg_property))));
  255:         _FDT((fdt_property(fdt, "ibm,associativity", associativity,
  256:                            sizeof(associativity))));
  257:         _FDT((fdt_end_node(fdt)));
  258:     }
  259: 
  260:     /* RAM: Node 1 and beyond */
  261:     mem_start = node0_size;
  262:     for (i = 1; i < nb_numa_nodes; i++) {
  263:         mem_reg_property[0] = cpu_to_be64(mem_start);
  264:         mem_reg_property[1] = cpu_to_be64(node_mem[i]);
  265:         associativity[3] = associativity[4] = cpu_to_be32(i);
  266:         sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
  267:         _FDT((fdt_begin_node(fdt, mem_name)));
  268:         _FDT((fdt_property_string(fdt, "device_type", "memory")));
  269:         _FDT((fdt_property(fdt, "reg", mem_reg_property,
  270:             sizeof(mem_reg_property))));
  271:         _FDT((fdt_property(fdt, "ibm,associativity", associativity,
  272:             sizeof(associativity))));
  273:         _FDT((fdt_end_node(fdt)));
  274:         mem_start += node_mem[i];
  275:     }
  276: 
  277:     /* cpus */
  278:     _FDT((fdt_begin_node(fdt, "cpus")));
  279: 
  280:     _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
  281:     _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
  282: 
  283:     modelname = g_strdup(cpu_model);
  284: 
  285:     for (i = 0; i < strlen(modelname); i++) {
  286:         modelname[i] = toupper(modelname[i]);
  287:     }
  288: 
  289:     /* This is needed during FDT finalization */
  290:     spapr->cpu_model = g_strdup(modelname);
  291: 
  292:     for (env = first_cpu; env != NULL; env = env->next_cpu) {
  293:         int index = env->cpu_index;
  294:         uint32_t servers_prop[smp_threads];
  295:         uint32_t gservers_prop[smp_threads * 2];
  296:         char *nodename;
  297:         uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
  298:                            0xffffffff, 0xffffffff};
  299:         uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
  300:         uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
  301: 
  302:         if ((index % smt) != 0) {
  303:             continue;
  304:         }
  305: 
  306:         if (asprintf(&nodename, "%s@%x", modelname, index) < 0) {
  307:             fprintf(stderr, "Allocation failure\n");
  308:             exit(1);
  309:         }
  310: 
  311:         _FDT((fdt_begin_node(fdt, nodename)));
  312: 
  313:         free(nodename);
  314: 
  315:         _FDT((fdt_property_cell(fdt, "reg", index)));
  316:         _FDT((fdt_property_string(fdt, "device_type", "cpu")));
  317: 
  318:         _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
  319:         _FDT((fdt_property_cell(fdt, "dcache-block-size",
  320:                                 env->dcache_line_size)));
  321:         _FDT((fdt_property_cell(fdt, "icache-block-size",
  322:                                 env->icache_line_size)));
  323:         _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
  324:         _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
  325:         _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
  326:         _FDT((fdt_property(fdt, "ibm,pft-size",
  327:                            pft_size_prop, sizeof(pft_size_prop))));
  328:         _FDT((fdt_property_string(fdt, "status", "okay")));
  329:         _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
  330: 
  331:         /* Build interrupt servers and gservers properties */
  332:         for (i = 0; i < smp_threads; i++) {
  333:             servers_prop[i] = cpu_to_be32(index + i);
  334:             /* Hack, direct the group queues back to cpu 0 */
  335:             gservers_prop[i*2] = cpu_to_be32(index + i);
  336:             gservers_prop[i*2 + 1] = 0;
  337:         }
  338:         _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
  339:                            servers_prop, sizeof(servers_prop))));
  340:         _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
  341:                            gservers_prop, sizeof(gservers_prop))));
  342: 
  343:         if (env->mmu_model & POWERPC_MMU_1TSEG) {
  344:             _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
  345:                                segs, sizeof(segs))));
  346:         }
  347: 
  348:         /* Advertise VMX/VSX (vector extensions) if available
  349:          *   0 / no property == no vector extensions
  350:          *   1               == VMX / Altivec available
  351:          *   2               == VSX available */
  352:         if (env->insns_flags & PPC_ALTIVEC) {
  353:             uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
  354: 
  355:             _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
  356:         }
  357: 
  358:         /* Advertise DFP (Decimal Floating Point) if available
  359:          *   0 / no property == no DFP
  360:          *   1               == DFP available */
  361:         if (env->insns_flags2 & PPC2_DFP) {
  362:             _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
  363:         }
  364: 
  365:         _FDT((fdt_end_node(fdt)));
  366:     }
  367: 
  368:     g_free(modelname);
  369: 
  370:     _FDT((fdt_end_node(fdt)));
  371: 
  372:     /* RTAS */
  373:     _FDT((fdt_begin_node(fdt, "rtas")));
  374: 
  375:     _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
  376:                        sizeof(hypertas_prop))));
  377: 
  378:     _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
  379:         refpoints, sizeof(refpoints))));
  380: 
  381:     _FDT((fdt_end_node(fdt)));
  382: 
  383:     /* interrupt controller */
  384:     _FDT((fdt_begin_node(fdt, "interrupt-controller")));
  385: 
  386:     _FDT((fdt_property_string(fdt, "device_type",
  387:                               "PowerPC-External-Interrupt-Presentation")));
  388:     _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
  389:     _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
  390:     _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
  391:                        interrupt_server_ranges_prop,
  392:                        sizeof(interrupt_server_ranges_prop))));
  393:     _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
  394:     _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
  395:     _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
  396: 
  397:     _FDT((fdt_end_node(fdt)));
  398: 
  399:     /* vdevice */
  400:     _FDT((fdt_begin_node(fdt, "vdevice")));
  401: 
  402:     _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
  403:     _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
  404:     _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
  405:     _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
  406:     _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
  407:     _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
  408: 
  409:     _FDT((fdt_end_node(fdt)));
  410: 
  411:     _FDT((fdt_end_node(fdt))); /* close root node */
  412:     _FDT((fdt_finish(fdt)));
  413: 
  414:     return fdt;
  415: }
  416: 
  417: static void spapr_finalize_fdt(sPAPREnvironment *spapr,
  418:                                target_phys_addr_t fdt_addr,
  419:                                target_phys_addr_t rtas_addr,
  420:                                target_phys_addr_t rtas_size)
  421: {
  422:     int ret;
  423:     void *fdt;
  424:     sPAPRPHBState *phb;
  425: 
  426:     fdt = g_malloc(FDT_MAX_SIZE);
  427: 
  428:     /* open out the base tree into a temp buffer for the final tweaks */
  429:     _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
  430: 
  431:     ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
  432:     if (ret < 0) {
  433:         fprintf(stderr, "couldn't setup vio devices in fdt\n");
  434:         exit(1);
  435:     }
  436: 
  437:     QLIST_FOREACH(phb, &spapr->phbs, list) {
  438:         ret = spapr_populate_pci_devices(phb, PHANDLE_XICP, fdt);
  439:     }
  440: 
  441:     if (ret < 0) {
  442:         fprintf(stderr, "couldn't setup PCI devices in fdt\n");
  443:         exit(1);
  444:     }
  445: 
  446:     /* RTAS */
  447:     ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
  448:     if (ret < 0) {
  449:         fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
  450:     }
  451: 
  452:     /* Advertise NUMA via ibm,associativity */
  453:     if (nb_numa_nodes > 1) {
  454:         ret = spapr_set_associativity(fdt, spapr);
  455:         if (ret < 0) {
  456:             fprintf(stderr, "Couldn't set up NUMA device tree properties\n");
  457:         }
  458:     }
  459: 
  460:     spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
  461: 
  462:     _FDT((fdt_pack(fdt)));
  463: 
  464:     if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
  465:         hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n",
  466:                  fdt_totalsize(fdt), FDT_MAX_SIZE);
  467:         exit(1);
  468:     }
  469: 
  470:     cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
  471: 
  472:     g_free(fdt);
  473: }
  474: 
  475: static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
  476: {
  477:     return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
  478: }
  479: 
  480: static void emulate_spapr_hypercall(CPUPPCState *env)
  481: {
  482:     env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
  483: }
  484: 
  485: static void spapr_reset(void *opaque)
  486: {
  487:     sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;
  488: 
  489:     fprintf(stderr, "sPAPR reset\n");
  490: 
  491:     /* flush out the hash table */
  492:     memset(spapr->htab, 0, spapr->htab_size);
  493: 
  494:     /* Load the fdt */
  495:     spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
  496:                        spapr->rtas_size);
  497: 
  498:     /* Set up the entry state */
  499:     first_cpu->gpr[3] = spapr->fdt_addr;
  500:     first_cpu->gpr[5] = 0;
  501:     first_cpu->halted = 0;
  502:     first_cpu->nip = spapr->entry_point;
  503: 
  504: }
  505: 
  506: static void spapr_cpu_reset(void *opaque)
  507: {
  508:     CPUPPCState *env = opaque;
  509: 
  510:     cpu_state_reset(env);
  511: }
  512: 
  513: /* pSeries LPAR / sPAPR hardware init */
  514: static void ppc_spapr_init(ram_addr_t ram_size,
  515:                            const char *boot_device,
  516:                            const char *kernel_filename,
  517:                            const char *kernel_cmdline,
  518:                            const char *initrd_filename,
  519:                            const char *cpu_model)
  520: {
  521:     CPUPPCState *env;
  522:     int i;
  523:     MemoryRegion *sysmem = get_system_memory();
  524:     MemoryRegion *ram = g_new(MemoryRegion, 1);
  525:     target_phys_addr_t rma_alloc_size, rma_size;
  526:     uint32_t initrd_base = 0;
  527:     long kernel_size = 0, initrd_size = 0;
  528:     long load_limit, rtas_limit, fw_size;
  529:     long pteg_shift = 17;
  530:     char *filename;
  531: 
  532:     spapr = g_malloc0(sizeof(*spapr));
  533:     QLIST_INIT(&spapr->phbs);
  534: 
  535:     cpu_ppc_hypercall = emulate_spapr_hypercall;
  536: 
  537:     /* Allocate RMA if necessary */
  538:     rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma", sysmem);
  539: 
  540:     if (rma_alloc_size == -1) {
  541:         hw_error("qemu: Unable to create RMA\n");
  542:         exit(1);
  543:     }
  544:     if (rma_alloc_size && (rma_alloc_size < ram_size)) {
  545:         rma_size = rma_alloc_size;
  546:     } else {
  547:         rma_size = ram_size;
  548:     }
  549: 
  550:     /* We place the device tree and RTAS just below either the top of the RMA,
  551:      * or just below 2GB, whichever is lowere, so that it can be
  552:      * processed with 32-bit real mode code if necessary */
  553:     rtas_limit = MIN(rma_size, 0x80000000);
  554:     spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
  555:     spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
  556:     load_limit = spapr->fdt_addr - FW_OVERHEAD;
  557: 
  558:     /* init CPUs */
  559:     if (cpu_model == NULL) {
  560:         cpu_model = kvm_enabled() ? "host" : "POWER7";
  561:     }
  562:     for (i = 0; i < smp_cpus; i++) {
  563:         env = cpu_init(cpu_model);
  564: 
  565:         if (!env) {
  566:             fprintf(stderr, "Unable to find PowerPC CPU definition\n");
  567:             exit(1);
  568:         }
  569:         /* Set time-base frequency to 512 MHz */
  570:         cpu_ppc_tb_init(env, TIMEBASE_FREQ);
  571:         qemu_register_reset(spapr_cpu_reset, env);
  572: 
  573:         env->hreset_vector = 0x60;
  574:         env->hreset_excp_prefix = 0;
  575:         env->gpr[3] = env->cpu_index;
  576:     }
  577: 
  578:     /* allocate RAM */
  579:     spapr->ram_limit = ram_size;
  580:     if (spapr->ram_limit > rma_alloc_size) {
  581:         ram_addr_t nonrma_base = rma_alloc_size;
  582:         ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;
  583: 
  584:         memory_region_init_ram(ram, "ppc_spapr.ram", nonrma_size);
  585:         vmstate_register_ram_global(ram);
  586:         memory_region_add_subregion(sysmem, nonrma_base, ram);
  587:     }
  588: 
  589:     /* allocate hash page table.  For now we always make this 16mb,
  590:      * later we should probably make it scale to the size of guest
  591:      * RAM */
  592:     spapr->htab_size = 1ULL << (pteg_shift + 7);
  593:     spapr->htab = qemu_memalign(spapr->htab_size, spapr->htab_size);
  594: 
  595:     for (env = first_cpu; env != NULL; env = env->next_cpu) {
  596:         env->external_htab = spapr->htab;
  597:         env->htab_base = -1;
  598:         env->htab_mask = spapr->htab_size - 1;
  599: 
  600:         /* Tell KVM that we're in PAPR mode */
  601:         env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
  602:                              ((pteg_shift + 7) - 18);
  603:         env->spr[SPR_HIOR] = 0;
  604: 
  605:         if (kvm_enabled()) {
  606:             kvmppc_set_papr(env);
  607:         }
  608:     }
  609: 
  610:     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
  611:     spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
  612:                                            rtas_limit - spapr->rtas_addr);
  613:     if (spapr->rtas_size < 0) {
  614:         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
  615:         exit(1);
  616:     }
  617:     if (spapr->rtas_size > RTAS_MAX_SIZE) {
  618:         hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
  619:                  spapr->rtas_size, RTAS_MAX_SIZE);
  620:         exit(1);
  621:     }
  622:     g_free(filename);
  623: 
  624: 
  625:     /* Set up Interrupt Controller */
  626:     spapr->icp = xics_system_init(XICS_IRQS);
  627:     spapr->next_irq = 16;
  628: 
  629:     /* Set up VIO bus */
  630:     spapr->vio_bus = spapr_vio_bus_init();
  631: 
  632:     for (i = 0; i < MAX_SERIAL_PORTS; i++) {
  633:         if (serial_hds[i]) {
  634:             spapr_vty_create(spapr->vio_bus, serial_hds[i]);
  635:         }
  636:     }
  637: 
  638:     /* Set up PCI */
  639:     spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID,
  640:                      SPAPR_PCI_MEM_WIN_ADDR,
  641:                      SPAPR_PCI_MEM_WIN_SIZE,
  642:                      SPAPR_PCI_IO_WIN_ADDR);
  643: 
  644:     for (i = 0; i < nb_nics; i++) {
  645:         NICInfo *nd = &nd_table[i];
  646: 
  647:         if (!nd->model) {
  648:             nd->model = g_strdup("ibmveth");
  649:         }
  650: 
  651:         if (strcmp(nd->model, "ibmveth") == 0) {
  652:             spapr_vlan_create(spapr->vio_bus, nd);
  653:         } else {
  654:             pci_nic_init_nofail(&nd_table[i], nd->model, NULL);
  655:         }
  656:     }
  657: 
  658:     for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
  659:         spapr_vscsi_create(spapr->vio_bus);
  660:     }
  661: 
  662:     if (rma_size < (MIN_RMA_SLOF << 20)) {
  663:         fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
  664:                 "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
  665:         exit(1);
  666:     }
  667: 
  668:     fprintf(stderr, "sPAPR memory map:\n");
  669:     fprintf(stderr, "RTAS                 : 0x%08lx..%08lx\n",
  670:             (unsigned long)spapr->rtas_addr,
  671:             (unsigned long)(spapr->rtas_addr + spapr->rtas_size - 1));
  672:     fprintf(stderr, "FDT                  : 0x%08lx..%08lx\n",
  673:             (unsigned long)spapr->fdt_addr,
  674:             (unsigned long)(spapr->fdt_addr + FDT_MAX_SIZE - 1));
  675: 
  676:     if (kernel_filename) {
  677:         uint64_t lowaddr = 0;
  678: 
  679:         kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
  680:                                NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
  681:         if (kernel_size < 0) {
  682:             kernel_size = load_image_targphys(kernel_filename,
  683:                                               KERNEL_LOAD_ADDR,
  684:                                               load_limit - KERNEL_LOAD_ADDR);
  685:         }
  686:         if (kernel_size < 0) {
  687:             fprintf(stderr, "qemu: could not load kernel '%s'\n",
  688:                     kernel_filename);
  689:             exit(1);
  690:         }
  691:         fprintf(stderr, "Kernel               : 0x%08x..%08lx\n",
  692:                 KERNEL_LOAD_ADDR, KERNEL_LOAD_ADDR + kernel_size - 1);
  693: 
  694:         /* load initrd */
  695:         if (initrd_filename) {
  696:             /* Try to locate the initrd in the gap between the kernel
  697:              * and the firmware. Add a bit of space just in case
  698:              */
  699:             initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
  700:             initrd_size = load_image_targphys(initrd_filename, initrd_base,
  701:                                               load_limit - initrd_base);
  702:             if (initrd_size < 0) {
  703:                 fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
  704:                         initrd_filename);
  705:                 exit(1);
  706:             }
  707:             fprintf(stderr, "Ramdisk              : 0x%08lx..%08lx\n",
  708:                     (long)initrd_base, (long)(initrd_base + initrd_size - 1));
  709:         } else {
  710:             initrd_base = 0;
  711:             initrd_size = 0;
  712:         }
  713:     }
  714: 
  715:     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, FW_FILE_NAME);
  716:     fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
  717:     if (fw_size < 0) {
  718:         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
  719:         exit(1);
  720:     }
  721:     g_free(filename);
  722:     fprintf(stderr, "Firmware load        : 0x%08x..%08lx\n",
  723:             0, fw_size);
  724:     fprintf(stderr, "Firmware runtime     : 0x%08lx..%08lx\n",
  725:             load_limit, (unsigned long)spapr->fdt_addr);
  726: 
  727:     spapr->entry_point = 0x100;
  728: 
  729:     /* SLOF will startup the secondary CPUs using RTAS */
  730:     for (env = first_cpu; env != NULL; env = env->next_cpu) {
  731:         env->halted = 1;
  732:     }
  733: 
  734:     /* Prepare the device tree */
  735:     spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
  736:                                             initrd_base, initrd_size,
  737:                                             kernel_size,
  738:                                             boot_device, kernel_cmdline,
  739:                                             pteg_shift + 7);
  740:     assert(spapr->fdt_skel != NULL);
  741: 
  742:     qemu_register_reset(spapr_reset, spapr);
  743: }
  744: 
  745: static QEMUMachine spapr_machine = {
  746:     .name = "pseries",
  747:     .desc = "pSeries Logical Partition (PAPR compliant)",
  748:     .init = ppc_spapr_init,
  749:     .max_cpus = MAX_CPUS,
  750:     .no_parallel = 1,
  751:     .use_scsi = 1,
  752: };
  753: 
  754: static void spapr_machine_init(void)
  755: {
  756:     qemu_register_machine(&spapr_machine);
  757: }
  758: 
  759: machine_init(spapr_machine_init);

unix.superglobalmegacorp.com