Annotation of qemu/hw/spapr.c, revision 1.1.1.3

1.1       root        1: /*
                      2:  * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
                      3:  *
                      4:  * Copyright (c) 2004-2007 Fabrice Bellard
                      5:  * Copyright (c) 2007 Jocelyn Mayer
                      6:  * Copyright (c) 2010 David Gibson, IBM Corporation.
                      7:  *
                      8:  * Permission is hereby granted, free of charge, to any person obtaining a copy
                      9:  * of this software and associated documentation files (the "Software"), to deal
                     10:  * in the Software without restriction, including without limitation the rights
                     11:  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
                     12:  * copies of the Software, and to permit persons to whom the Software is
                     13:  * furnished to do so, subject to the following conditions:
                     14:  *
                     15:  * The above copyright notice and this permission notice shall be included in
                     16:  * all copies or substantial portions of the Software.
                     17:  *
                     18:  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
                     19:  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
                     20:  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
                     21:  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
                     22:  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
                     23:  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
                     24:  * THE SOFTWARE.
                     25:  *
                     26:  */
                     27: #include "sysemu.h"
                     28: #include "hw.h"
                     29: #include "elf.h"
                     30: #include "net.h"
                     31: #include "blockdev.h"
1.1.1.2   root       32: #include "cpus.h"
                     33: #include "kvm.h"
                     34: #include "kvm_ppc.h"
1.1       root       35: 
                     36: #include "hw/boards.h"
                     37: #include "hw/ppc.h"
                     38: #include "hw/loader.h"
                     39: 
                     40: #include "hw/spapr.h"
                     41: #include "hw/spapr_vio.h"
1.1.1.2   root       42: #include "hw/spapr_pci.h"
1.1       root       43: #include "hw/xics.h"
                     44: 
1.1.1.2   root       45: #include "kvm.h"
                     46: #include "kvm_ppc.h"
                     47: #include "pci.h"
                     48: 
                     49: #include "exec-memory.h"
                     50: 
1.1       root       51: #include <libfdt.h>
                     52: 
1.1.1.3 ! root       53: /* SLOF memory layout:
        !            54:  *
        !            55:  * SLOF raw image loaded at 0, copies its romfs right below the flat
        !            56:  * device-tree, then position SLOF itself 31M below that
        !            57:  *
        !            58:  * So we set FW_OVERHEAD to 40MB which should account for all of that
        !            59:  * and more
        !            60:  *
        !            61:  * We load our kernel at 4M, leaving space for SLOF initial image
        !            62:  */
1.1       root       63: #define FDT_MAX_SIZE            0x10000
                     64: #define RTAS_MAX_SIZE           0x10000
                     65: #define FW_MAX_SIZE             0x400000
                     66: #define FW_FILE_NAME            "slof.bin"
1.1.1.3 ! root       67: #define FW_OVERHEAD             0x2800000
        !            68: #define KERNEL_LOAD_ADDR        FW_MAX_SIZE
1.1       root       69: 
1.1.1.3 ! root       70: #define MIN_RMA_SLOF            128UL
1.1       root       71: 
                     72: #define TIMEBASE_FREQ           512000000ULL
                     73: 
                     74: #define MAX_CPUS                256
1.1.1.3 ! root       75: #define XICS_IRQS               1024
1.1       root       76: 
1.1.1.2   root       77: #define SPAPR_PCI_BUID          0x800000020000001ULL
                     78: #define SPAPR_PCI_MEM_WIN_ADDR  (0x10000000000ULL + 0xA0000000)
                     79: #define SPAPR_PCI_MEM_WIN_SIZE  0x20000000
                     80: #define SPAPR_PCI_IO_WIN_ADDR   (0x10000000000ULL + 0x80000000)
                     81: 
                     82: #define PHANDLE_XICP            0x00001111
                     83: 
1.1       root       84: sPAPREnvironment *spapr;
                     85: 
1.1.1.3 ! root       86: qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t *irq_num,
        !            87:                             enum xics_irq_type type)
1.1.1.2   root       88: {
                     89:     uint32_t irq;
                     90:     qemu_irq qirq;
                     91: 
                     92:     if (hint) {
                     93:         irq = hint;
                     94:         /* FIXME: we should probably check for collisions somehow */
                     95:     } else {
                     96:         irq = spapr->next_irq++;
                     97:     }
                     98: 
1.1.1.3 ! root       99:     qirq = xics_assign_irq(spapr->icp, irq, type);
1.1.1.2   root      100:     if (!qirq) {
                    101:         return NULL;
                    102:     }
                    103: 
                    104:     if (irq_num) {
                    105:         *irq_num = irq;
                    106:     }
                    107: 
                    108:     return qirq;
                    109: }
                    110: 
1.1.1.3 ! root      111: static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr)
        !           112: {
        !           113:     int ret = 0, offset;
        !           114:     CPUPPCState *env;
        !           115:     char cpu_model[32];
        !           116:     int smt = kvmppc_smt_threads();
        !           117: 
        !           118:     assert(spapr->cpu_model);
        !           119: 
        !           120:     for (env = first_cpu; env != NULL; env = env->next_cpu) {
        !           121:         uint32_t associativity[] = {cpu_to_be32(0x5),
        !           122:                                     cpu_to_be32(0x0),
        !           123:                                     cpu_to_be32(0x0),
        !           124:                                     cpu_to_be32(0x0),
        !           125:                                     cpu_to_be32(env->numa_node),
        !           126:                                     cpu_to_be32(env->cpu_index)};
        !           127: 
        !           128:         if ((env->cpu_index % smt) != 0) {
        !           129:             continue;
        !           130:         }
        !           131: 
        !           132:         snprintf(cpu_model, 32, "/cpus/%s@%x", spapr->cpu_model,
        !           133:                  env->cpu_index);
        !           134: 
        !           135:         offset = fdt_path_offset(fdt, cpu_model);
        !           136:         if (offset < 0) {
        !           137:             return offset;
        !           138:         }
        !           139: 
        !           140:         ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
        !           141:                           sizeof(associativity));
        !           142:         if (ret < 0) {
        !           143:             return ret;
        !           144:         }
        !           145:     }
        !           146:     return ret;
        !           147: }
        !           148: 
1.1       root      149: static void *spapr_create_fdt_skel(const char *cpu_model,
1.1.1.2   root      150:                                    target_phys_addr_t rma_size,
1.1       root      151:                                    target_phys_addr_t initrd_base,
                    152:                                    target_phys_addr_t initrd_size,
1.1.1.3 ! root      153:                                    target_phys_addr_t kernel_size,
1.1       root      154:                                    const char *boot_device,
                    155:                                    const char *kernel_cmdline,
                    156:                                    long hash_shift)
                    157: {
                    158:     void *fdt;
1.1.1.3 ! root      159:     CPUPPCState *env;
        !           160:     uint64_t mem_reg_property[2];
1.1       root      161:     uint32_t start_prop = cpu_to_be32(initrd_base);
                    162:     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
                    163:     uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
                    164:     char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
1.1.1.2   root      165:         "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
1.1       root      166:     uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
                    167:     int i;
                    168:     char *modelname;
1.1.1.2   root      169:     int smt = kvmppc_smt_threads();
1.1.1.3 ! root      170:     unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
        !           171:     uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
        !           172:     uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
        !           173:                                 cpu_to_be32(0x0), cpu_to_be32(0x0),
        !           174:                                 cpu_to_be32(0x0)};
        !           175:     char mem_name[32];
        !           176:     target_phys_addr_t node0_size, mem_start;
1.1       root      177: 
                    178: #define _FDT(exp) \
                    179:     do { \
                    180:         int ret = (exp);                                           \
                    181:         if (ret < 0) {                                             \
                    182:             fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
                    183:                     #exp, fdt_strerror(ret));                      \
                    184:             exit(1);                                               \
                    185:         }                                                          \
                    186:     } while (0)
                    187: 
1.1.1.2   root      188:     fdt = g_malloc0(FDT_MAX_SIZE);
1.1       root      189:     _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
                    190: 
1.1.1.3 ! root      191:     if (kernel_size) {
        !           192:         _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
        !           193:     }
        !           194:     if (initrd_size) {
        !           195:         _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
        !           196:     }
1.1       root      197:     _FDT((fdt_finish_reservemap(fdt)));
                    198: 
                    199:     /* Root node */
                    200:     _FDT((fdt_begin_node(fdt, "")));
                    201:     _FDT((fdt_property_string(fdt, "device_type", "chrp")));
                    202:     _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
                    203: 
                    204:     _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
                    205:     _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
                    206: 
                    207:     /* /chosen */
                    208:     _FDT((fdt_begin_node(fdt, "chosen")));
                    209: 
1.1.1.3 ! root      210:     /* Set Form1_affinity */
        !           211:     _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));
        !           212: 
1.1       root      213:     _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
                    214:     _FDT((fdt_property(fdt, "linux,initrd-start",
                    215:                        &start_prop, sizeof(start_prop))));
                    216:     _FDT((fdt_property(fdt, "linux,initrd-end",
                    217:                        &end_prop, sizeof(end_prop))));
1.1.1.3 ! root      218:     if (kernel_size) {
        !           219:         uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
        !           220:                               cpu_to_be64(kernel_size) };
1.1       root      221: 
1.1.1.3 ! root      222:         _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
        !           223:     }
        !           224:     _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
1.1.1.2   root      225: 
1.1       root      226:     _FDT((fdt_end_node(fdt)));
                    227: 
1.1.1.2   root      228:     /* memory node(s) */
1.1.1.3 ! root      229:     node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
        !           230:     if (rma_size > node0_size) {
        !           231:         rma_size = node0_size;
        !           232:     }
1.1       root      233: 
1.1.1.3 ! root      234:     /* RMA */
        !           235:     mem_reg_property[0] = 0;
        !           236:     mem_reg_property[1] = cpu_to_be64(rma_size);
        !           237:     _FDT((fdt_begin_node(fdt, "memory@0")));
1.1       root      238:     _FDT((fdt_property_string(fdt, "device_type", "memory")));
1.1.1.3 ! root      239:     _FDT((fdt_property(fdt, "reg", mem_reg_property,
        !           240:         sizeof(mem_reg_property))));
        !           241:     _FDT((fdt_property(fdt, "ibm,associativity", associativity,
        !           242:         sizeof(associativity))));
1.1       root      243:     _FDT((fdt_end_node(fdt)));
                    244: 
1.1.1.3 ! root      245:     /* RAM: Node 0 */
        !           246:     if (node0_size > rma_size) {
        !           247:         mem_reg_property[0] = cpu_to_be64(rma_size);
        !           248:         mem_reg_property[1] = cpu_to_be64(node0_size - rma_size);
        !           249: 
        !           250:         sprintf(mem_name, "memory@" TARGET_FMT_lx, rma_size);
        !           251:         _FDT((fdt_begin_node(fdt, mem_name)));
        !           252:         _FDT((fdt_property_string(fdt, "device_type", "memory")));
        !           253:         _FDT((fdt_property(fdt, "reg", mem_reg_property,
        !           254:                            sizeof(mem_reg_property))));
        !           255:         _FDT((fdt_property(fdt, "ibm,associativity", associativity,
        !           256:                            sizeof(associativity))));
        !           257:         _FDT((fdt_end_node(fdt)));
        !           258:     }
1.1.1.2   root      259: 
1.1.1.3 ! root      260:     /* RAM: Node 1 and beyond */
        !           261:     mem_start = node0_size;
        !           262:     for (i = 1; i < nb_numa_nodes; i++) {
        !           263:         mem_reg_property[0] = cpu_to_be64(mem_start);
        !           264:         mem_reg_property[1] = cpu_to_be64(node_mem[i]);
        !           265:         associativity[3] = associativity[4] = cpu_to_be32(i);
        !           266:         sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
1.1.1.2   root      267:         _FDT((fdt_begin_node(fdt, mem_name)));
                    268:         _FDT((fdt_property_string(fdt, "device_type", "memory")));
1.1.1.3 ! root      269:         _FDT((fdt_property(fdt, "reg", mem_reg_property,
        !           270:             sizeof(mem_reg_property))));
        !           271:         _FDT((fdt_property(fdt, "ibm,associativity", associativity,
        !           272:             sizeof(associativity))));
1.1.1.2   root      273:         _FDT((fdt_end_node(fdt)));
1.1.1.3 ! root      274:         mem_start += node_mem[i];
1.1.1.2   root      275:     }
                    276: 
1.1       root      277:     /* cpus */
                    278:     _FDT((fdt_begin_node(fdt, "cpus")));
                    279: 
                    280:     _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
                    281:     _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
                    282: 
1.1.1.2   root      283:     modelname = g_strdup(cpu_model);
1.1       root      284: 
                    285:     for (i = 0; i < strlen(modelname); i++) {
                    286:         modelname[i] = toupper(modelname[i]);
                    287:     }
                    288: 
1.1.1.3 ! root      289:     /* This is needed during FDT finalization */
        !           290:     spapr->cpu_model = g_strdup(modelname);
        !           291: 
1.1       root      292:     for (env = first_cpu; env != NULL; env = env->next_cpu) {
                    293:         int index = env->cpu_index;
1.1.1.2   root      294:         uint32_t servers_prop[smp_threads];
                    295:         uint32_t gservers_prop[smp_threads * 2];
1.1       root      296:         char *nodename;
                    297:         uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
                    298:                            0xffffffff, 0xffffffff};
1.1.1.2   root      299:         uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
                    300:         uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
                    301: 
                    302:         if ((index % smt) != 0) {
                    303:             continue;
                    304:         }
1.1       root      305: 
                    306:         if (asprintf(&nodename, "%s@%x", modelname, index) < 0) {
                    307:             fprintf(stderr, "Allocation failure\n");
                    308:             exit(1);
                    309:         }
                    310: 
                    311:         _FDT((fdt_begin_node(fdt, nodename)));
                    312: 
                    313:         free(nodename);
                    314: 
                    315:         _FDT((fdt_property_cell(fdt, "reg", index)));
                    316:         _FDT((fdt_property_string(fdt, "device_type", "cpu")));
                    317: 
                    318:         _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
                    319:         _FDT((fdt_property_cell(fdt, "dcache-block-size",
                    320:                                 env->dcache_line_size)));
                    321:         _FDT((fdt_property_cell(fdt, "icache-block-size",
                    322:                                 env->icache_line_size)));
1.1.1.2   root      323:         _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
                    324:         _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
1.1       root      325:         _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
                    326:         _FDT((fdt_property(fdt, "ibm,pft-size",
                    327:                            pft_size_prop, sizeof(pft_size_prop))));
                    328:         _FDT((fdt_property_string(fdt, "status", "okay")));
                    329:         _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
1.1.1.2   root      330: 
                    331:         /* Build interrupt servers and gservers properties */
                    332:         for (i = 0; i < smp_threads; i++) {
                    333:             servers_prop[i] = cpu_to_be32(index + i);
                    334:             /* Hack, direct the group queues back to cpu 0 */
                    335:             gservers_prop[i*2] = cpu_to_be32(index + i);
                    336:             gservers_prop[i*2 + 1] = 0;
                    337:         }
                    338:         _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
                    339:                            servers_prop, sizeof(servers_prop))));
1.1       root      340:         _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
1.1.1.2   root      341:                            gservers_prop, sizeof(gservers_prop))));
1.1       root      342: 
                    343:         if (env->mmu_model & POWERPC_MMU_1TSEG) {
                    344:             _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
                    345:                                segs, sizeof(segs))));
                    346:         }
                    347: 
1.1.1.2   root      348:         /* Advertise VMX/VSX (vector extensions) if available
                    349:          *   0 / no property == no vector extensions
                    350:          *   1               == VMX / Altivec available
                    351:          *   2               == VSX available */
                    352:         if (env->insns_flags & PPC_ALTIVEC) {
                    353:             uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
                    354: 
                    355:             _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
                    356:         }
                    357: 
                    358:         /* Advertise DFP (Decimal Floating Point) if available
                    359:          *   0 / no property == no DFP
                    360:          *   1               == DFP available */
                    361:         if (env->insns_flags2 & PPC2_DFP) {
                    362:             _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
                    363:         }
                    364: 
1.1       root      365:         _FDT((fdt_end_node(fdt)));
                    366:     }
                    367: 
1.1.1.2   root      368:     g_free(modelname);
1.1       root      369: 
                    370:     _FDT((fdt_end_node(fdt)));
                    371: 
                    372:     /* RTAS */
                    373:     _FDT((fdt_begin_node(fdt, "rtas")));
                    374: 
                    375:     _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
                    376:                        sizeof(hypertas_prop))));
                    377: 
1.1.1.3 ! root      378:     _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
        !           379:         refpoints, sizeof(refpoints))));
        !           380: 
1.1       root      381:     _FDT((fdt_end_node(fdt)));
                    382: 
                    383:     /* interrupt controller */
1.1.1.2   root      384:     _FDT((fdt_begin_node(fdt, "interrupt-controller")));
1.1       root      385: 
                    386:     _FDT((fdt_property_string(fdt, "device_type",
                    387:                               "PowerPC-External-Interrupt-Presentation")));
                    388:     _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
                    389:     _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
                    390:     _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
                    391:                        interrupt_server_ranges_prop,
                    392:                        sizeof(interrupt_server_ranges_prop))));
1.1.1.2   root      393:     _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
                    394:     _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
                    395:     _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
1.1       root      396: 
                    397:     _FDT((fdt_end_node(fdt)));
                    398: 
                    399:     /* vdevice */
                    400:     _FDT((fdt_begin_node(fdt, "vdevice")));
                    401: 
                    402:     _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
                    403:     _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
                    404:     _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
                    405:     _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
                    406:     _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
                    407:     _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
                    408: 
                    409:     _FDT((fdt_end_node(fdt)));
                    410: 
                    411:     _FDT((fdt_end_node(fdt))); /* close root node */
                    412:     _FDT((fdt_finish(fdt)));
                    413: 
                    414:     return fdt;
                    415: }
                    416: 
                    417: static void spapr_finalize_fdt(sPAPREnvironment *spapr,
                    418:                                target_phys_addr_t fdt_addr,
                    419:                                target_phys_addr_t rtas_addr,
                    420:                                target_phys_addr_t rtas_size)
                    421: {
                    422:     int ret;
                    423:     void *fdt;
1.1.1.2   root      424:     sPAPRPHBState *phb;
1.1       root      425: 
1.1.1.2   root      426:     fdt = g_malloc(FDT_MAX_SIZE);
1.1       root      427: 
                    428:     /* open out the base tree into a temp buffer for the final tweaks */
                    429:     _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
                    430: 
                    431:     ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
                    432:     if (ret < 0) {
                    433:         fprintf(stderr, "couldn't setup vio devices in fdt\n");
                    434:         exit(1);
                    435:     }
                    436: 
1.1.1.2   root      437:     QLIST_FOREACH(phb, &spapr->phbs, list) {
                    438:         ret = spapr_populate_pci_devices(phb, PHANDLE_XICP, fdt);
                    439:     }
                    440: 
                    441:     if (ret < 0) {
                    442:         fprintf(stderr, "couldn't setup PCI devices in fdt\n");
                    443:         exit(1);
                    444:     }
                    445: 
1.1       root      446:     /* RTAS */
                    447:     ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
                    448:     if (ret < 0) {
                    449:         fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
                    450:     }
                    451: 
1.1.1.3 ! root      452:     /* Advertise NUMA via ibm,associativity */
        !           453:     if (nb_numa_nodes > 1) {
        !           454:         ret = spapr_set_associativity(fdt, spapr);
        !           455:         if (ret < 0) {
        !           456:             fprintf(stderr, "Couldn't set up NUMA device tree properties\n");
        !           457:         }
        !           458:     }
        !           459: 
1.1.1.2   root      460:     spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
                    461: 
1.1       root      462:     _FDT((fdt_pack(fdt)));
                    463: 
1.1.1.3 ! root      464:     if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
        !           465:         hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n",
        !           466:                  fdt_totalsize(fdt), FDT_MAX_SIZE);
        !           467:         exit(1);
        !           468:     }
        !           469: 
1.1       root      470:     cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
                    471: 
1.1.1.2   root      472:     g_free(fdt);
1.1       root      473: }
                    474: 
                    475: static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
                    476: {
                    477:     return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
                    478: }
                    479: 
1.1.1.3 ! root      480: static void emulate_spapr_hypercall(CPUPPCState *env)
1.1       root      481: {
                    482:     env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
                    483: }
                    484: 
                    485: static void spapr_reset(void *opaque)
                    486: {
                    487:     sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;
                    488: 
                    489:     fprintf(stderr, "sPAPR reset\n");
                    490: 
                    491:     /* flush out the hash table */
                    492:     memset(spapr->htab, 0, spapr->htab_size);
                    493: 
                    494:     /* Load the fdt */
                    495:     spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
                    496:                        spapr->rtas_size);
                    497: 
                    498:     /* Set up the entry state */
                    499:     first_cpu->gpr[3] = spapr->fdt_addr;
                    500:     first_cpu->gpr[5] = 0;
                    501:     first_cpu->halted = 0;
                    502:     first_cpu->nip = spapr->entry_point;
                    503: 
                    504: }
                    505: 
1.1.1.3 ! root      506: static void spapr_cpu_reset(void *opaque)
        !           507: {
        !           508:     CPUPPCState *env = opaque;
        !           509: 
        !           510:     cpu_state_reset(env);
        !           511: }
        !           512: 
1.1       root      513: /* pSeries LPAR / sPAPR hardware init */
                    514: static void ppc_spapr_init(ram_addr_t ram_size,
                    515:                            const char *boot_device,
                    516:                            const char *kernel_filename,
                    517:                            const char *kernel_cmdline,
                    518:                            const char *initrd_filename,
                    519:                            const char *cpu_model)
                    520: {
1.1.1.3 ! root      521:     CPUPPCState *env;
1.1       root      522:     int i;
1.1.1.2   root      523:     MemoryRegion *sysmem = get_system_memory();
                    524:     MemoryRegion *ram = g_new(MemoryRegion, 1);
                    525:     target_phys_addr_t rma_alloc_size, rma_size;
1.1.1.3 ! root      526:     uint32_t initrd_base = 0;
        !           527:     long kernel_size = 0, initrd_size = 0;
        !           528:     long load_limit, rtas_limit, fw_size;
1.1       root      529:     long pteg_shift = 17;
                    530:     char *filename;
                    531: 
1.1.1.2   root      532:     spapr = g_malloc0(sizeof(*spapr));
                    533:     QLIST_INIT(&spapr->phbs);
                    534: 
1.1       root      535:     cpu_ppc_hypercall = emulate_spapr_hypercall;
                    536: 
1.1.1.2   root      537:     /* Allocate RMA if necessary */
                    538:     rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma", sysmem);
                    539: 
                    540:     if (rma_alloc_size == -1) {
                    541:         hw_error("qemu: Unable to create RMA\n");
                    542:         exit(1);
                    543:     }
                    544:     if (rma_alloc_size && (rma_alloc_size < ram_size)) {
                    545:         rma_size = rma_alloc_size;
                    546:     } else {
                    547:         rma_size = ram_size;
                    548:     }
                    549: 
1.1.1.3 ! root      550:     /* We place the device tree and RTAS just below either the top of the RMA,
1.1.1.2   root      551:      * or just below 2GB, whichever is lowere, so that it can be
                    552:      * processed with 32-bit real mode code if necessary */
1.1.1.3 ! root      553:     rtas_limit = MIN(rma_size, 0x80000000);
        !           554:     spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
        !           555:     spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
        !           556:     load_limit = spapr->fdt_addr - FW_OVERHEAD;
1.1       root      557: 
                    558:     /* init CPUs */
                    559:     if (cpu_model == NULL) {
1.1.1.2   root      560:         cpu_model = kvm_enabled() ? "host" : "POWER7";
1.1       root      561:     }
                    562:     for (i = 0; i < smp_cpus; i++) {
                    563:         env = cpu_init(cpu_model);
                    564: 
                    565:         if (!env) {
                    566:             fprintf(stderr, "Unable to find PowerPC CPU definition\n");
                    567:             exit(1);
                    568:         }
                    569:         /* Set time-base frequency to 512 MHz */
                    570:         cpu_ppc_tb_init(env, TIMEBASE_FREQ);
1.1.1.3 ! root      571:         qemu_register_reset(spapr_cpu_reset, env);
1.1       root      572: 
                    573:         env->hreset_vector = 0x60;
                    574:         env->hreset_excp_prefix = 0;
                    575:         env->gpr[3] = env->cpu_index;
                    576:     }
                    577: 
                    578:     /* allocate RAM */
1.1.1.2   root      579:     spapr->ram_limit = ram_size;
                    580:     if (spapr->ram_limit > rma_alloc_size) {
                    581:         ram_addr_t nonrma_base = rma_alloc_size;
                    582:         ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;
                    583: 
1.1.1.3 ! root      584:         memory_region_init_ram(ram, "ppc_spapr.ram", nonrma_size);
        !           585:         vmstate_register_ram_global(ram);
1.1.1.2   root      586:         memory_region_add_subregion(sysmem, nonrma_base, ram);
                    587:     }
1.1       root      588: 
                    589:     /* allocate hash page table.  For now we always make this 16mb,
                    590:      * later we should probably make it scale to the size of guest
                    591:      * RAM */
                    592:     spapr->htab_size = 1ULL << (pteg_shift + 7);
1.1.1.2   root      593:     spapr->htab = qemu_memalign(spapr->htab_size, spapr->htab_size);
1.1       root      594: 
                    595:     for (env = first_cpu; env != NULL; env = env->next_cpu) {
                    596:         env->external_htab = spapr->htab;
                    597:         env->htab_base = -1;
                    598:         env->htab_mask = spapr->htab_size - 1;
1.1.1.2   root      599: 
                    600:         /* Tell KVM that we're in PAPR mode */
                    601:         env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
                    602:                              ((pteg_shift + 7) - 18);
                    603:         env->spr[SPR_HIOR] = 0;
                    604: 
                    605:         if (kvm_enabled()) {
                    606:             kvmppc_set_papr(env);
                    607:         }
1.1       root      608:     }
                    609: 
                    610:     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
                    611:     spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
1.1.1.3 ! root      612:                                            rtas_limit - spapr->rtas_addr);
1.1       root      613:     if (spapr->rtas_size < 0) {
                    614:         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
                    615:         exit(1);
                    616:     }
1.1.1.3 ! root      617:     if (spapr->rtas_size > RTAS_MAX_SIZE) {
        !           618:         hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
        !           619:                  spapr->rtas_size, RTAS_MAX_SIZE);
        !           620:         exit(1);
        !           621:     }
1.1.1.2   root      622:     g_free(filename);
1.1       root      623: 
1.1.1.3 ! root      624: 
1.1       root      625:     /* Set up Interrupt Controller */
                    626:     spapr->icp = xics_system_init(XICS_IRQS);
1.1.1.2   root      627:     spapr->next_irq = 16;
1.1       root      628: 
                    629:     /* Set up VIO bus */
                    630:     spapr->vio_bus = spapr_vio_bus_init();
                    631: 
1.1.1.2   root      632:     for (i = 0; i < MAX_SERIAL_PORTS; i++) {
1.1       root      633:         if (serial_hds[i]) {
1.1.1.3 ! root      634:             spapr_vty_create(spapr->vio_bus, serial_hds[i]);
1.1       root      635:         }
                    636:     }
                    637: 
1.1.1.2   root      638:     /* Set up PCI */
                    639:     spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID,
                    640:                      SPAPR_PCI_MEM_WIN_ADDR,
                    641:                      SPAPR_PCI_MEM_WIN_SIZE,
                    642:                      SPAPR_PCI_IO_WIN_ADDR);
                    643: 
                    644:     for (i = 0; i < nb_nics; i++) {
1.1       root      645:         NICInfo *nd = &nd_table[i];
                    646: 
                    647:         if (!nd->model) {
1.1.1.2   root      648:             nd->model = g_strdup("ibmveth");
1.1       root      649:         }
                    650: 
                    651:         if (strcmp(nd->model, "ibmveth") == 0) {
1.1.1.3 ! root      652:             spapr_vlan_create(spapr->vio_bus, nd);
1.1       root      653:         } else {
1.1.1.2   root      654:             pci_nic_init_nofail(&nd_table[i], nd->model, NULL);
1.1       root      655:         }
                    656:     }
                    657: 
                    658:     for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
1.1.1.3 ! root      659:         spapr_vscsi_create(spapr->vio_bus);
        !           660:     }
        !           661: 
        !           662:     if (rma_size < (MIN_RMA_SLOF << 20)) {
        !           663:         fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
        !           664:                 "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
        !           665:         exit(1);
1.1       root      666:     }
                    667: 
1.1.1.3 ! root      668:     fprintf(stderr, "sPAPR memory map:\n");
        !           669:     fprintf(stderr, "RTAS                 : 0x%08lx..%08lx\n",
        !           670:             (unsigned long)spapr->rtas_addr,
        !           671:             (unsigned long)(spapr->rtas_addr + spapr->rtas_size - 1));
        !           672:     fprintf(stderr, "FDT                  : 0x%08lx..%08lx\n",
        !           673:             (unsigned long)spapr->fdt_addr,
        !           674:             (unsigned long)(spapr->fdt_addr + FDT_MAX_SIZE - 1));
        !           675: 
1.1       root      676:     if (kernel_filename) {
                    677:         uint64_t lowaddr = 0;
                    678: 
                    679:         kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
                    680:                                NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
                    681:         if (kernel_size < 0) {
                    682:             kernel_size = load_image_targphys(kernel_filename,
                    683:                                               KERNEL_LOAD_ADDR,
1.1.1.3 ! root      684:                                               load_limit - KERNEL_LOAD_ADDR);
1.1       root      685:         }
                    686:         if (kernel_size < 0) {
                    687:             fprintf(stderr, "qemu: could not load kernel '%s'\n",
                    688:                     kernel_filename);
                    689:             exit(1);
                    690:         }
1.1.1.3 ! root      691:         fprintf(stderr, "Kernel               : 0x%08x..%08lx\n",
        !           692:                 KERNEL_LOAD_ADDR, KERNEL_LOAD_ADDR + kernel_size - 1);
1.1       root      693: 
                    694:         /* load initrd */
                    695:         if (initrd_filename) {
1.1.1.3 ! root      696:             /* Try to locate the initrd in the gap between the kernel
        !           697:              * and the firmware. Add a bit of space just in case
        !           698:              */
        !           699:             initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
1.1       root      700:             initrd_size = load_image_targphys(initrd_filename, initrd_base,
1.1.1.3 ! root      701:                                               load_limit - initrd_base);
1.1       root      702:             if (initrd_size < 0) {
                    703:                 fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
                    704:                         initrd_filename);
                    705:                 exit(1);
                    706:             }
1.1.1.3 ! root      707:             fprintf(stderr, "Ramdisk              : 0x%08lx..%08lx\n",
        !           708:                     (long)initrd_base, (long)(initrd_base + initrd_size - 1));
1.1       root      709:         } else {
                    710:             initrd_base = 0;
                    711:             initrd_size = 0;
                    712:         }
1.1.1.3 ! root      713:     }
1.1       root      714: 
1.1.1.3 ! root      715:     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, FW_FILE_NAME);
        !           716:     fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
        !           717:     if (fw_size < 0) {
        !           718:         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
        !           719:         exit(1);
        !           720:     }
        !           721:     g_free(filename);
        !           722:     fprintf(stderr, "Firmware load        : 0x%08x..%08lx\n",
        !           723:             0, fw_size);
        !           724:     fprintf(stderr, "Firmware runtime     : 0x%08lx..%08lx\n",
        !           725:             load_limit, (unsigned long)spapr->fdt_addr);
        !           726: 
        !           727:     spapr->entry_point = 0x100;
        !           728: 
        !           729:     /* SLOF will startup the secondary CPUs using RTAS */
        !           730:     for (env = first_cpu; env != NULL; env = env->next_cpu) {
        !           731:         env->halted = 1;
1.1       root      732:     }
                    733: 
                    734:     /* Prepare the device tree */
1.1.1.2   root      735:     spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
1.1       root      736:                                             initrd_base, initrd_size,
1.1.1.3 ! root      737:                                             kernel_size,
1.1       root      738:                                             boot_device, kernel_cmdline,
                    739:                                             pteg_shift + 7);
                    740:     assert(spapr->fdt_skel != NULL);
                    741: 
                    742:     qemu_register_reset(spapr_reset, spapr);
                    743: }
                    744: 
                    745: static QEMUMachine spapr_machine = {
                    746:     .name = "pseries",
                    747:     .desc = "pSeries Logical Partition (PAPR compliant)",
                    748:     .init = ppc_spapr_init,
                    749:     .max_cpus = MAX_CPUS,
                    750:     .no_parallel = 1,
                    751:     .use_scsi = 1,
                    752: };
                    753: 
                    754: static void spapr_machine_init(void)
                    755: {
                    756:     qemu_register_machine(&spapr_machine);
                    757: }
                    758: 
                    759: machine_init(spapr_machine_init);

unix.superglobalmegacorp.com