1 /*
2 * PPC64 (SPAPR) platform support
3 *
4 * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
5 *
6 * Portions of FDT setup borrowed from QEMU, copyright 2010 David Gibson, IBM
7 * Corporation.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License version 2 as published
11 * by the Free Software Foundation.
12 */
13
14 #include "kvm/fdt.h"
15 #include "kvm/kvm.h"
16 #include "kvm/util.h"
17 #include "cpu_info.h"
18
19 #include "spapr.h"
20 #include "spapr_hvcons.h"
21 #include "spapr_pci.h"
22
23 #include <linux/kvm.h>
24
25 #include <sys/types.h>
26 #include <sys/ioctl.h>
27 #include <sys/mman.h>
28 #include <stdbool.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <stdio.h>
33 #include <fcntl.h>
34 #include <asm/unistd.h>
35 #include <errno.h>
36
37 #include <linux/byteorder.h>
38
39 #define HPT_ORDER 24
40
41 #define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
42
43 static char kern_cmdline[2048];
44
45 struct kvm_ext kvm_req_ext[] = {
46 { DEFINE_KVM_EXT(KVM_CAP_PPC_UNSET_IRQ) },
47 { DEFINE_KVM_EXT(KVM_CAP_PPC_IRQ_LEVEL) },
48 { 0, 0 }
49 };
50
kvm__arch_default_ram_address(void)51 u64 kvm__arch_default_ram_address(void)
52 {
53 return 0;
54 }
55
kvm__arch_validate_cfg(struct kvm * kvm)56 void kvm__arch_validate_cfg(struct kvm *kvm)
57 {
58 }
59
mfpvr(void)60 static uint32_t mfpvr(void)
61 {
62 uint32_t r;
63 asm volatile ("mfpvr %0" : "=r"(r));
64 return r;
65 }
66
kvm__arch_cpu_supports_vm(void)67 bool kvm__arch_cpu_supports_vm(void)
68 {
69 return true;
70 }
71
kvm__init_ram(struct kvm * kvm)72 void kvm__init_ram(struct kvm *kvm)
73 {
74 u64 phys_start, phys_size;
75 void *host_mem;
76
77 phys_start = 0;
78 phys_size = kvm->ram_size;
79 host_mem = kvm->ram_start;
80
81 /*
82 * We put MMIO at PPC_MMIO_START, high up. Make sure that this doesn't
83 * crash into the end of RAM -- on PPC64 at least, this is so high
84 * (63TB!) that this is unlikely.
85 */
86 if (phys_size >= PPC_MMIO_START)
87 die("Too much memory (%lld, what a nice problem): "
88 "overlaps MMIO!\n",
89 phys_size);
90
91 kvm__register_ram(kvm, phys_start, phys_size, host_mem);
92 }
93
kvm__arch_set_cmdline(char * cmdline,bool video)94 void kvm__arch_set_cmdline(char *cmdline, bool video)
95 {
96 /* We don't need anything unusual in here. */
97 }
98
99 /* Architecture-specific KVM init */
kvm__arch_init(struct kvm * kvm)100 void kvm__arch_init(struct kvm *kvm)
101 {
102 const char *hugetlbfs_path = kvm->cfg.hugetlbfs_path;
103 int cap_ppc_rma;
104 unsigned long hpt;
105
106 kvm->ram_size = kvm->cfg.ram_size;
107
108 /* Map "default" hugetblfs path to the standard 16M mount point */
109 if (hugetlbfs_path && !strcmp(hugetlbfs_path, "default"))
110 hugetlbfs_path = HUGETLBFS_PATH;
111
112 kvm->ram_start = mmap_anon_or_hugetlbfs(kvm, hugetlbfs_path, kvm->ram_size);
113
114 if (kvm->ram_start == MAP_FAILED)
115 die("Couldn't map %lld bytes for RAM (%d)\n",
116 kvm->ram_size, errno);
117
118 /* FDT goes at top of memory, RTAS just below */
119 kvm->arch.fdt_gra = kvm->ram_size - FDT_MAX_SIZE;
120 /* FIXME: Not all PPC systems have RTAS */
121 kvm->arch.rtas_gra = kvm->arch.fdt_gra - RTAS_MAX_SIZE;
122 madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE);
123
124 /* FIXME: SPAPR-PR specific; allocate a guest HPT. */
125 if (posix_memalign((void **)&hpt, (1<<HPT_ORDER), (1<<HPT_ORDER)))
126 die("Can't allocate %d bytes for HPT\n", (1<<HPT_ORDER));
127
128 kvm->arch.sdr1 = ((hpt + 0x3ffffULL) & ~0x3ffffULL) | (HPT_ORDER-18);
129
130 kvm->arch.pvr = mfpvr();
131
132 /* FIXME: This is book3s-specific */
133 cap_ppc_rma = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_PPC_RMA);
134 if (cap_ppc_rma == 2)
135 die("Need contiguous RMA allocation on this hardware, "
136 "which is not yet supported.");
137
138 /* Do these before FDT setup, IRQ setup, etc. */
139 /* FIXME: SPAPR-specific */
140 hypercall_init();
141 register_core_rtas();
142 /* Now that hypercalls are initialised, register a couple for the console: */
143 spapr_hvcons_init();
144 spapr_create_phb(kvm, "pci", SPAPR_PCI_BUID,
145 SPAPR_PCI_MEM_WIN_ADDR,
146 SPAPR_PCI_MEM_WIN_SIZE,
147 SPAPR_PCI_IO_WIN_ADDR,
148 SPAPR_PCI_IO_WIN_SIZE);
149 }
150
kvm__arch_delete_ram(struct kvm * kvm)151 void kvm__arch_delete_ram(struct kvm *kvm)
152 {
153 munmap(kvm->ram_start, kvm->ram_size);
154 }
155
kvm__irq_trigger(struct kvm * kvm,int irq)156 void kvm__irq_trigger(struct kvm *kvm, int irq)
157 {
158 kvm__irq_line(kvm, irq, 1);
159 kvm__irq_line(kvm, irq, 0);
160 }
161
kvm__arch_read_term(struct kvm * kvm)162 void kvm__arch_read_term(struct kvm *kvm)
163 {
164 /* FIXME: Should register callbacks to platform-specific polls */
165 spapr_hvcons_poll(kvm);
166 }
167
kvm__arch_load_kernel_image(struct kvm * kvm,int fd_kernel,int fd_initrd,const char * kernel_cmdline)168 bool kvm__arch_load_kernel_image(struct kvm *kvm, int fd_kernel, int fd_initrd,
169 const char *kernel_cmdline)
170 {
171 void *p;
172 void *k_start;
173 ssize_t filesize;
174
175 p = k_start = guest_flat_to_host(kvm, KERNEL_LOAD_ADDR);
176
177 filesize = read_file(fd_kernel, p, INITRD_LOAD_ADDR - KERNEL_LOAD_ADDR);
178 if (filesize < 0) {
179 if (errno == ENOMEM)
180 die("Kernel overlaps initrd!");
181
182 die_perror("kernel read");
183 }
184 pr_info("Loaded kernel to 0x%x (%ld bytes)", KERNEL_LOAD_ADDR,
185 filesize);
186 if (fd_initrd != -1) {
187 if (p-k_start > INITRD_LOAD_ADDR)
188 die("Kernel overlaps initrd!");
189
190 /* Round up kernel size to 8byte alignment, and load initrd right after. */
191 p = guest_flat_to_host(kvm, INITRD_LOAD_ADDR);
192
193 filesize = read_file(fd_initrd, p,
194 (kvm->ram_start + kvm->ram_size) - p);
195 if (filesize < 0) {
196 if (errno == ENOMEM)
197 die("initrd too big to contain in guest RAM.\n");
198 die_perror("initrd read");
199 }
200
201 pr_info("Loaded initrd to 0x%x (%ld bytes)",
202 INITRD_LOAD_ADDR, filesize);
203 kvm->arch.initrd_gra = INITRD_LOAD_ADDR;
204 kvm->arch.initrd_size = filesize;
205 } else {
206 kvm->arch.initrd_size = 0;
207 }
208 strncpy(kern_cmdline, kernel_cmdline, 2048);
209 kern_cmdline[2047] = '\0';
210
211 return true;
212 }
213
214 struct fdt_prop {
215 void *value;
216 int size;
217 };
218
generate_segment_page_sizes(struct kvm_ppc_smmu_info * info,struct fdt_prop * prop)219 static void generate_segment_page_sizes(struct kvm_ppc_smmu_info *info, struct fdt_prop *prop)
220 {
221 struct kvm_ppc_one_seg_page_size *sps;
222 int i, j, size;
223 u32 *p;
224
225 for (size = 0, i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
226 sps = &info->sps[i];
227
228 if (sps->page_shift == 0)
229 break;
230
231 /* page shift, slb enc & count */
232 size += 3;
233
234 for (j = 0; j < KVM_PPC_PAGE_SIZES_MAX_SZ; j++) {
235 if (info->sps[i].enc[j].page_shift == 0)
236 break;
237
238 /* page shift & pte enc */
239 size += 2;
240 }
241 }
242
243 if (!size) {
244 prop->value = NULL;
245 prop->size = 0;
246 return;
247 }
248
249 /* Convert size to bytes */
250 prop->size = size * sizeof(u32);
251
252 prop->value = malloc(prop->size);
253 if (!prop->value)
254 die_perror("malloc failed");
255
256 p = (u32 *)prop->value;
257 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
258 sps = &info->sps[i];
259
260 if (sps->page_shift == 0)
261 break;
262
263 *p++ = cpu_to_be32(sps->page_shift);
264 *p++ = cpu_to_be32(sps->slb_enc);
265
266 for (j = 0; j < KVM_PPC_PAGE_SIZES_MAX_SZ; j++)
267 if (!info->sps[i].enc[j].page_shift)
268 break;
269
270 *p++ = cpu_to_be32(j); /* count of enc */
271
272 for (j = 0; j < KVM_PPC_PAGE_SIZES_MAX_SZ; j++) {
273 if (!info->sps[i].enc[j].page_shift)
274 break;
275
276 *p++ = cpu_to_be32(info->sps[i].enc[j].page_shift);
277 *p++ = cpu_to_be32(info->sps[i].enc[j].pte_enc);
278 }
279 }
280 }
281
282 #define SMT_THREADS 4
283
284 /*
285 * Set up the FDT for the kernel: This function is currently fairly SPAPR-heavy,
286 * and whilst most PPC targets will require CPU/memory nodes, others like RTAS
287 * should eventually be added separately.
288 */
setup_fdt(struct kvm * kvm)289 static int setup_fdt(struct kvm *kvm)
290 {
291 uint64_t mem_reg_property[] = { 0, cpu_to_be64(kvm->ram_size) };
292 int smp_cpus = kvm->nrcpus;
293 uint32_t int_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
294 char hypertas_prop_kvm[] = "hcall-pft\0hcall-term\0"
295 "hcall-dabr\0hcall-interrupt\0hcall-tce\0hcall-vio\0"
296 "hcall-splpar\0hcall-bulk\0hcall-set-mode";
297 int i, j;
298 char cpu_name[30];
299 u8 staging_fdt[FDT_MAX_SIZE];
300 struct cpu_info *cpu_info = find_cpu_info(kvm);
301 struct fdt_prop segment_page_sizes;
302 u32 segment_sizes_1T[] = {cpu_to_be32(0x1c), cpu_to_be32(0x28), 0xffffffff, 0xffffffff};
303
304 /* Generate an appropriate DT at kvm->arch.fdt_gra */
305 void *fdt_dest = guest_flat_to_host(kvm, kvm->arch.fdt_gra);
306 void *fdt = staging_fdt;
307
308 _FDT(fdt_create(fdt, FDT_MAX_SIZE));
309 _FDT(fdt_finish_reservemap(fdt));
310
311 _FDT(fdt_begin_node(fdt, ""));
312
313 _FDT(fdt_property_string(fdt, "device_type", "chrp"));
314 _FDT(fdt_property_string(fdt, "model", "IBM pSeries (kvmtool)"));
315 _FDT(fdt_property_cell(fdt, "#address-cells", 0x2));
316 _FDT(fdt_property_cell(fdt, "#size-cells", 0x2));
317
318 /* RTAS */
319 _FDT(fdt_begin_node(fdt, "rtas"));
320 /* This is what the kernel uses to switch 'We're an LPAR'! */
321 _FDT(fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop_kvm,
322 sizeof(hypertas_prop_kvm)));
323 _FDT(fdt_property_cell(fdt, "linux,rtas-base", kvm->arch.rtas_gra));
324 _FDT(fdt_property_cell(fdt, "linux,rtas-entry", kvm->arch.rtas_gra));
325 _FDT(fdt_property_cell(fdt, "rtas-size", kvm->arch.rtas_size));
326 /* Now add properties for all RTAS tokens: */
327 if (spapr_rtas_fdt_setup(kvm, fdt))
328 die("Couldn't create RTAS FDT properties\n");
329
330 _FDT(fdt_end_node(fdt));
331
332 /* /chosen */
333 _FDT(fdt_begin_node(fdt, "chosen"));
334 /* cmdline */
335 _FDT(fdt_property_string(fdt, "bootargs", kern_cmdline));
336 /* Initrd */
337 if (kvm->arch.initrd_size != 0) {
338 uint32_t ird_st_prop = cpu_to_be32(kvm->arch.initrd_gra);
339 uint32_t ird_end_prop = cpu_to_be32(kvm->arch.initrd_gra +
340 kvm->arch.initrd_size);
341 _FDT(fdt_property(fdt, "linux,initrd-start",
342 &ird_st_prop, sizeof(ird_st_prop)));
343 _FDT(fdt_property(fdt, "linux,initrd-end",
344 &ird_end_prop, sizeof(ird_end_prop)));
345 }
346
347 /*
348 * stdout-path: This is assuming we're using the HV console. Also, the
349 * address is hardwired until we do a VIO bus.
350 */
351 _FDT(fdt_property_string(fdt, "linux,stdout-path",
352 "/vdevice/vty@30000000"));
353 _FDT(fdt_end_node(fdt));
354
355 /*
356 * Memory: We don't alloc. a separate RMA yet. If we ever need to
357 * (CAP_PPC_RMA == 2) then have one memory node for 0->RMAsize, and
358 * another RMAsize->endOfMem.
359 */
360 _FDT(fdt_begin_node(fdt, "memory@0"));
361 _FDT(fdt_property_string(fdt, "device_type", "memory"));
362 _FDT(fdt_property(fdt, "reg", mem_reg_property,
363 sizeof(mem_reg_property)));
364 _FDT(fdt_end_node(fdt));
365
366 generate_segment_page_sizes(&cpu_info->mmu_info, &segment_page_sizes);
367
368 /* CPUs */
369 _FDT(fdt_begin_node(fdt, "cpus"));
370 _FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
371 _FDT(fdt_property_cell(fdt, "#size-cells", 0x0));
372
373 for (i = 0; i < smp_cpus; i += SMT_THREADS) {
374 int32_t pft_size_prop[] = { 0, cpu_to_be32(HPT_ORDER) };
375 uint32_t servers_prop[SMT_THREADS];
376 uint32_t gservers_prop[SMT_THREADS * 2];
377 int threads = (smp_cpus - i) >= SMT_THREADS ? SMT_THREADS :
378 smp_cpus - i;
379
380 sprintf(cpu_name, "PowerPC,%s@%d", cpu_info->name, i);
381 _FDT(fdt_begin_node(fdt, cpu_name));
382 sprintf(cpu_name, "PowerPC,%s", cpu_info->name);
383 _FDT(fdt_property_string(fdt, "name", cpu_name));
384 _FDT(fdt_property_string(fdt, "device_type", "cpu"));
385
386 _FDT(fdt_property_cell(fdt, "reg", i));
387 _FDT(fdt_property_cell(fdt, "cpu-version", kvm->arch.pvr));
388
389 _FDT(fdt_property_cell(fdt, "dcache-block-size", cpu_info->d_bsize));
390 _FDT(fdt_property_cell(fdt, "icache-block-size", cpu_info->i_bsize));
391
392 if (cpu_info->tb_freq)
393 _FDT(fdt_property_cell(fdt, "timebase-frequency", cpu_info->tb_freq));
394
395 /* Lies, but safeish lies! */
396 _FDT(fdt_property_cell(fdt, "clock-frequency", 0xddbab200));
397
398 if (cpu_info->mmu_info.slb_size)
399 _FDT(fdt_property_cell(fdt, "ibm,slb-size", cpu_info->mmu_info.slb_size));
400
401 /*
402 * HPT size is hardwired; KVM currently fixes it at 16MB but the
403 * moment that changes we'll need to read it out of the kernel.
404 */
405 _FDT(fdt_property(fdt, "ibm,pft-size", pft_size_prop,
406 sizeof(pft_size_prop)));
407
408 _FDT(fdt_property_string(fdt, "status", "okay"));
409 _FDT(fdt_property(fdt, "64-bit", NULL, 0));
410 /* A server for each thread in this core */
411 for (j = 0; j < SMT_THREADS; j++) {
412 servers_prop[j] = cpu_to_be32(i+j);
413 /*
414 * Hack borrowed from QEMU, direct the group queues back
415 * to cpu 0:
416 */
417 gservers_prop[j*2] = cpu_to_be32(i+j);
418 gservers_prop[j*2 + 1] = 0;
419 }
420 _FDT(fdt_property(fdt, "ibm,ppc-interrupt-server#s",
421 servers_prop, threads * sizeof(uint32_t)));
422 _FDT(fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
423 gservers_prop,
424 threads * 2 * sizeof(uint32_t)));
425
426 if (segment_page_sizes.value)
427 _FDT(fdt_property(fdt, "ibm,segment-page-sizes",
428 segment_page_sizes.value,
429 segment_page_sizes.size));
430
431 if (cpu_info->mmu_info.flags & KVM_PPC_1T_SEGMENTS)
432 _FDT(fdt_property(fdt, "ibm,processor-segment-sizes",
433 segment_sizes_1T, sizeof(segment_sizes_1T)));
434
435 /* VSX / DFP options: */
436 if (cpu_info->flags & CPUINFO_FLAG_VMX)
437 _FDT(fdt_property_cell(fdt, "ibm,vmx",
438 (cpu_info->flags &
439 CPUINFO_FLAG_VSX) ? 2 : 1));
440 if (cpu_info->flags & CPUINFO_FLAG_DFP)
441 _FDT(fdt_property_cell(fdt, "ibm,dfp", 0x1));
442 _FDT(fdt_end_node(fdt));
443 }
444 _FDT(fdt_end_node(fdt));
445
446 /* IRQ controller */
447 _FDT(fdt_begin_node(fdt, "interrupt-controller@0"));
448
449 _FDT(fdt_property_string(fdt, "device_type",
450 "PowerPC-External-Interrupt-Presentation"));
451 _FDT(fdt_property_string(fdt, "compatible", "IBM,ppc-xicp"));
452 _FDT(fdt_property_cell(fdt, "reg", 0));
453 _FDT(fdt_property(fdt, "interrupt-controller", NULL, 0));
454 _FDT(fdt_property(fdt, "ibm,interrupt-server-ranges",
455 int_server_ranges_prop,
456 sizeof(int_server_ranges_prop)));
457 _FDT(fdt_property_cell(fdt, "#interrupt-cells", 2));
458 _FDT(fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP));
459 _FDT(fdt_property_cell(fdt, "phandle", PHANDLE_XICP));
460 _FDT(fdt_end_node(fdt));
461
462 /*
463 * VIO: See comment in linux,stdout-path; we don't yet represent a VIO
464 * bus/address allocation so addresses are hardwired here.
465 */
466 _FDT(fdt_begin_node(fdt, "vdevice"));
467 _FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
468 _FDT(fdt_property_cell(fdt, "#size-cells", 0x0));
469 _FDT(fdt_property_string(fdt, "device_type", "vdevice"));
470 _FDT(fdt_property_string(fdt, "compatible", "IBM,vdevice"));
471 _FDT(fdt_begin_node(fdt, "vty@30000000"));
472 _FDT(fdt_property_string(fdt, "name", "vty"));
473 _FDT(fdt_property_string(fdt, "device_type", "serial"));
474 _FDT(fdt_property_string(fdt, "compatible", "hvterm1"));
475 _FDT(fdt_property_cell(fdt, "reg", 0x30000000));
476 _FDT(fdt_end_node(fdt));
477 _FDT(fdt_end_node(fdt));
478
479 /* Finalise: */
480 _FDT(fdt_end_node(fdt)); /* Root node */
481 _FDT(fdt_finish(fdt));
482
483 _FDT(fdt_open_into(fdt, fdt_dest, FDT_MAX_SIZE));
484
485 /* PCI */
486 if (spapr_populate_pci_devices(kvm, PHANDLE_XICP, fdt_dest))
487 die("Fail populating PCI device nodes");
488
489 _FDT(fdt_add_mem_rsv(fdt_dest, kvm->arch.rtas_gra, kvm->arch.rtas_size));
490 _FDT(fdt_pack(fdt_dest));
491
492 free(segment_page_sizes.value);
493
494 return 0;
495 }
496 firmware_init(setup_fdt);
497
498 /**
499 * kvm__arch_setup_firmware
500 */
kvm__arch_setup_firmware(struct kvm * kvm)501 int kvm__arch_setup_firmware(struct kvm *kvm)
502 {
503 /*
504 * Set up RTAS stub. All it is is a single hypercall:
505 * 0: 7c 64 1b 78 mr r4,r3
506 * 4: 3c 60 00 00 lis r3,0
507 * 8: 60 63 f0 00 ori r3,r3,61440
508 * c: 44 00 00 22 sc 1
509 * 10: 4e 80 00 20 blr
510 */
511 uint32_t *rtas = guest_flat_to_host(kvm, kvm->arch.rtas_gra);
512
513 rtas[0] = cpu_to_be32(0x7c641b78);
514 rtas[1] = cpu_to_be32(0x3c600000);
515 rtas[2] = cpu_to_be32(0x6063f000);
516 rtas[3] = cpu_to_be32(0x44000022);
517 rtas[4] = cpu_to_be32(0x4e800020);
518 kvm->arch.rtas_size = 20;
519
520 pr_info("Set up %ld bytes of RTAS at 0x%lx\n",
521 kvm->arch.rtas_size, kvm->arch.rtas_gra);
522
523 /* Load SLOF */
524
525 return 0;
526 }
527
kvm__arch_free_firmware(struct kvm * kvm)528 int kvm__arch_free_firmware(struct kvm *kvm)
529 {
530 return 0;
531 }
532