xref: /kvmtool/x86/mptable.c (revision ab9e4f1c87d8f7d26d4b47cdd51037391786e491)
10c7c14a7SCyrill Gorcunov #include "kvm/kvm.h"
20c7c14a7SCyrill Gorcunov #include "kvm/bios.h"
30c7c14a7SCyrill Gorcunov #include "kvm/apic.h"
40c7c14a7SCyrill Gorcunov #include "kvm/mptable.h"
50c7c14a7SCyrill Gorcunov #include "kvm/util.h"
60c7c14a7SCyrill Gorcunov 
7*ab9e4f1cSSasha Levin #include <linux/kernel.h>
80c7c14a7SCyrill Gorcunov #include <string.h>
90c7c14a7SCyrill Gorcunov 
10ca99b3d3SCyrill Gorcunov /*
11ca99b3d3SCyrill Gorcunov  * If kernel is not configured yet this macro
12ca99b3d3SCyrill Gorcunov  * might not be defined, fix it by own definition
13ca99b3d3SCyrill Gorcunov  */
14ca99b3d3SCyrill Gorcunov #ifndef NR_CPUS
15ca99b3d3SCyrill Gorcunov #define NR_CPUS KVM_NR_CPUS
16ca99b3d3SCyrill Gorcunov #endif
17ca99b3d3SCyrill Gorcunov 
180c7c14a7SCyrill Gorcunov #include <asm/mpspec_def.h>
190c7c14a7SCyrill Gorcunov 
200c7c14a7SCyrill Gorcunov /*
210c7c14a7SCyrill Gorcunov  * FIXME: please make sure the addresses borrowed
220c7c14a7SCyrill Gorcunov  * for apic/ioapic never overlaped! We need a global
230c7c14a7SCyrill Gorcunov  * tracker of system resources (including io, mmio,
240c7c14a7SCyrill Gorcunov  * and friends).
250c7c14a7SCyrill Gorcunov  */
260c7c14a7SCyrill Gorcunov 
270c7c14a7SCyrill Gorcunov static unsigned int mpf_checksum(unsigned char *mp, int len)
280c7c14a7SCyrill Gorcunov {
290c7c14a7SCyrill Gorcunov 	unsigned int sum = 0;
300c7c14a7SCyrill Gorcunov 
310c7c14a7SCyrill Gorcunov 	while (len--)
320c7c14a7SCyrill Gorcunov 		sum += *mp++;
330c7c14a7SCyrill Gorcunov 
340c7c14a7SCyrill Gorcunov 	return sum & 0xFF;
350c7c14a7SCyrill Gorcunov }
360c7c14a7SCyrill Gorcunov 
370c7c14a7SCyrill Gorcunov static unsigned int gen_cpu_flag(unsigned int cpu, unsigned int ncpu)
380c7c14a7SCyrill Gorcunov {
390c7c14a7SCyrill Gorcunov 	/* sets enabled/disabled | BSP/AP processor */
400c7c14a7SCyrill Gorcunov 	return ( (cpu < ncpu) ? CPU_ENABLED       : 0) |
410c7c14a7SCyrill Gorcunov 		((cpu == 0)   ? CPU_BOOTPROCESSOR : 0x00);
420c7c14a7SCyrill Gorcunov }
430c7c14a7SCyrill Gorcunov 
440c7c14a7SCyrill Gorcunov #define MPTABLE_SIG_FLOATING	"_MP_"
450c7c14a7SCyrill Gorcunov #define MPTABLE_OEM		"KVMCPU00"
460c7c14a7SCyrill Gorcunov #define MPTABLE_PRODUCTID	"0.1         "
470c7c14a7SCyrill Gorcunov #define MPTABLE_PCIBUSTYPE	"PCI   "
480c7c14a7SCyrill Gorcunov #define MPTABLE_ISABUSTYPE	"ISA   "
490c7c14a7SCyrill Gorcunov 
500c7c14a7SCyrill Gorcunov #define MPTABLE_STRNCPY(d, s)	memcpy(d, s, sizeof(d))
510c7c14a7SCyrill Gorcunov 
520c7c14a7SCyrill Gorcunov /* It should be more than enough */
530c7c14a7SCyrill Gorcunov #define MPTABLE_MAX_SIZE	(32 << 20)
540c7c14a7SCyrill Gorcunov 
550c7c14a7SCyrill Gorcunov /*
560c7c14a7SCyrill Gorcunov  * Too many cpus will require x2apic mode
570c7c14a7SCyrill Gorcunov  * and rather ACPI support so we limit it
580c7c14a7SCyrill Gorcunov  * here for a while.
590c7c14a7SCyrill Gorcunov  */
600c7c14a7SCyrill Gorcunov #define MPTABLE_MAX_CPUS	255
610c7c14a7SCyrill Gorcunov 
620c7c14a7SCyrill Gorcunov /**
630c7c14a7SCyrill Gorcunov  * mptable_setup - create mptable and fill guest memory with it
640c7c14a7SCyrill Gorcunov  */
650c7c14a7SCyrill Gorcunov void mptable_setup(struct kvm *kvm, unsigned int ncpus)
660c7c14a7SCyrill Gorcunov {
670c7c14a7SCyrill Gorcunov 	unsigned long real_mpc_table, size;
680c7c14a7SCyrill Gorcunov 	struct mpf_intel *mpf_intel;
690c7c14a7SCyrill Gorcunov 	struct mpc_table *mpc_table;
700c7c14a7SCyrill Gorcunov 	struct mpc_cpu *mpc_cpu;
710c7c14a7SCyrill Gorcunov 	struct mpc_bus *mpc_bus;
720c7c14a7SCyrill Gorcunov 	struct mpc_ioapic *mpc_ioapic;
730c7c14a7SCyrill Gorcunov 	struct mpc_intsrc *mpc_intsrc;
740c7c14a7SCyrill Gorcunov 
750c7c14a7SCyrill Gorcunov 	const int pcibusid = 0;
760c7c14a7SCyrill Gorcunov 	const int isabusid = 1;
770c7c14a7SCyrill Gorcunov 
780c7c14a7SCyrill Gorcunov 	unsigned int i, nentries = 0;
790c7c14a7SCyrill Gorcunov 	unsigned int ioapicid;
800c7c14a7SCyrill Gorcunov 	void *last_addr;
810c7c14a7SCyrill Gorcunov 
820c7c14a7SCyrill Gorcunov 	/* That is where MP table will be in guest memory */
830c7c14a7SCyrill Gorcunov 	real_mpc_table = ALIGN(MB_BIOS_BEGIN + bios_rom_size, 16);
840c7c14a7SCyrill Gorcunov 
850c7c14a7SCyrill Gorcunov 	if (ncpus > MPTABLE_MAX_CPUS) {
860c7c14a7SCyrill Gorcunov 		warning("Too many cpus: %d limited to %d",
870c7c14a7SCyrill Gorcunov 			ncpus, MPTABLE_MAX_CPUS);
880c7c14a7SCyrill Gorcunov 		ncpus = MPTABLE_MAX_CPUS;
890c7c14a7SCyrill Gorcunov 	}
900c7c14a7SCyrill Gorcunov 
910c7c14a7SCyrill Gorcunov 	mpc_table = calloc(1, MPTABLE_MAX_SIZE);
920c7c14a7SCyrill Gorcunov 	if (!mpc_table)
930c7c14a7SCyrill Gorcunov 		die("Out of memory");
940c7c14a7SCyrill Gorcunov 
950c7c14a7SCyrill Gorcunov 	MPTABLE_STRNCPY(mpc_table->signature,	MPC_SIGNATURE);
960c7c14a7SCyrill Gorcunov 	MPTABLE_STRNCPY(mpc_table->oem,		MPTABLE_OEM);
970c7c14a7SCyrill Gorcunov 	MPTABLE_STRNCPY(mpc_table->productid,	MPTABLE_PRODUCTID);
980c7c14a7SCyrill Gorcunov 
990c7c14a7SCyrill Gorcunov 	mpc_table->spec		= 4;
1000c7c14a7SCyrill Gorcunov 	mpc_table->lapic	= APIC_ADDR(0);
1010c7c14a7SCyrill Gorcunov 	mpc_table->oemcount	= ncpus; /* will be updated again at end */
1020c7c14a7SCyrill Gorcunov 
1030c7c14a7SCyrill Gorcunov 	/*
1040c7c14a7SCyrill Gorcunov 	 * CPUs enumeration. Technically speaking we should
1050c7c14a7SCyrill Gorcunov 	 * ask either host or HV for apic version supported
1060c7c14a7SCyrill Gorcunov 	 * but for a while we simply put some random value
1070c7c14a7SCyrill Gorcunov 	 * here.
1080c7c14a7SCyrill Gorcunov 	 */
1090c7c14a7SCyrill Gorcunov 	mpc_cpu = (void *)&mpc_table[1];
1100c7c14a7SCyrill Gorcunov 	for (i = 0; i < ncpus; i++) {
1110c7c14a7SCyrill Gorcunov 		mpc_cpu->type		= MP_PROCESSOR;
1120c7c14a7SCyrill Gorcunov 		mpc_cpu->apicid		= i;
1130c7c14a7SCyrill Gorcunov 		mpc_cpu->apicver	= KVM_APIC_VERSION;
1140c7c14a7SCyrill Gorcunov 		mpc_cpu->cpuflag	= gen_cpu_flag(i, ncpus);
1150c7c14a7SCyrill Gorcunov 		mpc_cpu->cpufeature	= 0x600; /* some default value */
1160c7c14a7SCyrill Gorcunov 		mpc_cpu->featureflag	= 0x201; /* some default value */
1170c7c14a7SCyrill Gorcunov 		mpc_cpu++;
1180c7c14a7SCyrill Gorcunov 	}
1190c7c14a7SCyrill Gorcunov 
1200c7c14a7SCyrill Gorcunov 	last_addr = (void *)mpc_cpu;
1210c7c14a7SCyrill Gorcunov 	nentries += ncpus;
1220c7c14a7SCyrill Gorcunov 
1230c7c14a7SCyrill Gorcunov 	/*
1240c7c14a7SCyrill Gorcunov 	 * PCI buses.
1250c7c14a7SCyrill Gorcunov 	 * FIXME: Some callback here to obtain real number
1260c7c14a7SCyrill Gorcunov 	 * of PCI buses present in system.
1270c7c14a7SCyrill Gorcunov 	 */
1280c7c14a7SCyrill Gorcunov 	mpc_bus		= last_addr;
1290c7c14a7SCyrill Gorcunov 	mpc_bus->type	= MP_BUS;
1300c7c14a7SCyrill Gorcunov 	mpc_bus->busid	= pcibusid;
1310c7c14a7SCyrill Gorcunov 	MPTABLE_STRNCPY(mpc_bus->bustype, MPTABLE_PCIBUSTYPE);
1320c7c14a7SCyrill Gorcunov 
1330c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_bus[1];
1340c7c14a7SCyrill Gorcunov 	nentries++;
1350c7c14a7SCyrill Gorcunov 
1360c7c14a7SCyrill Gorcunov 	/*
1370c7c14a7SCyrill Gorcunov 	 * ISA bus.
1380c7c14a7SCyrill Gorcunov 	 * FIXME: Same issue as for PCI bus.
1390c7c14a7SCyrill Gorcunov 	 */
1400c7c14a7SCyrill Gorcunov 	mpc_bus		= last_addr;
1410c7c14a7SCyrill Gorcunov 	mpc_bus->type	= MP_BUS;
1420c7c14a7SCyrill Gorcunov 	mpc_bus->busid	= isabusid;
1430c7c14a7SCyrill Gorcunov 	MPTABLE_STRNCPY(mpc_bus->bustype, MPTABLE_ISABUSTYPE);
1440c7c14a7SCyrill Gorcunov 
1450c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_bus[1];
1460c7c14a7SCyrill Gorcunov 	nentries++;
1470c7c14a7SCyrill Gorcunov 
1480c7c14a7SCyrill Gorcunov 	/*
1490c7c14a7SCyrill Gorcunov 	 * IO-APIC chip.
1500c7c14a7SCyrill Gorcunov 	 */
1510c7c14a7SCyrill Gorcunov 	ioapicid		= ncpus + 1;
1520c7c14a7SCyrill Gorcunov 	mpc_ioapic		= last_addr;
1530c7c14a7SCyrill Gorcunov 	mpc_ioapic->type	= MP_IOAPIC;
1540c7c14a7SCyrill Gorcunov 	mpc_ioapic->apicid	= ioapicid;
1550c7c14a7SCyrill Gorcunov 	mpc_ioapic->apicver	= KVM_APIC_VERSION;
1560c7c14a7SCyrill Gorcunov 	mpc_ioapic->flags	= MPC_APIC_USABLE;
1570c7c14a7SCyrill Gorcunov 	mpc_ioapic->apicaddr	= IOAPIC_ADDR(0);
1580c7c14a7SCyrill Gorcunov 
1590c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_ioapic[1];
1600c7c14a7SCyrill Gorcunov 	nentries++;
1610c7c14a7SCyrill Gorcunov 
1620c7c14a7SCyrill Gorcunov 	/*
1630c7c14a7SCyrill Gorcunov 	 * IRQ sources.
1640c7c14a7SCyrill Gorcunov 	 *
1650c7c14a7SCyrill Gorcunov 	 * FIXME: Same issue as with buses. We definitely
1660c7c14a7SCyrill Gorcunov 	 * need kind of collector routine which enumerate
1670c7c14a7SCyrill Gorcunov 	 * resources used first and pass them here.
1680c7c14a7SCyrill Gorcunov 	 * At moment we know we have only virtio block device
1690c7c14a7SCyrill Gorcunov 	 * and virtio console but this is g00berfish.
1700c7c14a7SCyrill Gorcunov 	 *
1710c7c14a7SCyrill Gorcunov 	 * Also note we use PCI irqs here, no for ISA bus yet.
1720c7c14a7SCyrill Gorcunov 	 */
1730c7c14a7SCyrill Gorcunov 	mpc_intsrc		= last_addr;
1740c7c14a7SCyrill Gorcunov 	mpc_intsrc->type	= MP_INTSRC;
1750c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqtype	= mp_INT;
1760c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqflag	= MP_IRQDIR_DEFAULT;
1770c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbus	= pcibusid;
1780c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbusirq	= 2; /* virtio console irq pin */
1790c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstapic	= ioapicid;
1800c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstirq	= 13; /* VIRTIO_CONSOLE_IRQ */
1810c7c14a7SCyrill Gorcunov 
1820c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_intsrc[1];
1830c7c14a7SCyrill Gorcunov 	nentries++;
1840c7c14a7SCyrill Gorcunov 
1850c7c14a7SCyrill Gorcunov 	mpc_intsrc		= last_addr;
1860c7c14a7SCyrill Gorcunov 	mpc_intsrc->type	= MP_INTSRC;
1870c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqtype	= mp_INT;
1880c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqflag	= MP_IRQDIR_DEFAULT;
1890c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbus	= pcibusid;
1900c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbusirq	= 1; /* virtio block irq pin */
1910c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstapic	= ioapicid;
1920c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstirq	= 15; /* VIRTIO_BLK_IRQ */
1930c7c14a7SCyrill Gorcunov 
1940c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_intsrc[1];
1950c7c14a7SCyrill Gorcunov 	nentries++;
1960c7c14a7SCyrill Gorcunov 
1970c7c14a7SCyrill Gorcunov 	mpc_intsrc		= last_addr;
1980c7c14a7SCyrill Gorcunov 	mpc_intsrc->type	= MP_INTSRC;
1990c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqtype	= mp_INT;
2000c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqflag	= MP_IRQDIR_DEFAULT;
2010c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbus	= pcibusid;
2020c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbusirq	= 3; /* virtio net irq pin */
2030c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstapic	= ioapicid;
2040c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstirq	= 14; /* VIRTIO_NET_IRQ */
2050c7c14a7SCyrill Gorcunov 
2060c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_intsrc[1];
2070c7c14a7SCyrill Gorcunov 	nentries++;
2080c7c14a7SCyrill Gorcunov 
2090c7c14a7SCyrill Gorcunov 	/*
2100c7c14a7SCyrill Gorcunov 	 * Local IRQs assignment (LINT0, LINT1)
2110c7c14a7SCyrill Gorcunov 	 */
2120c7c14a7SCyrill Gorcunov 	mpc_intsrc		= last_addr;
2130c7c14a7SCyrill Gorcunov 	mpc_intsrc->type	= MP_LINTSRC;
2140c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqtype	= mp_ExtINT;
2150c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqtype	= mp_INT;
2160c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqflag	= MP_IRQDIR_DEFAULT;
2170c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbus	= isabusid;
2180c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbusirq	= 0;
2190c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstapic	= 0; /* FIXME: BSP apic */
2200c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstirq	= 0; /* LINT0 */
2210c7c14a7SCyrill Gorcunov 
2220c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_intsrc[1];
2230c7c14a7SCyrill Gorcunov 	nentries++;
2240c7c14a7SCyrill Gorcunov 
2250c7c14a7SCyrill Gorcunov 	mpc_intsrc		= last_addr;
2260c7c14a7SCyrill Gorcunov 	mpc_intsrc->type	= MP_LINTSRC;
2270c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqtype	= mp_NMI;
2280c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqflag	= MP_IRQDIR_DEFAULT;
2290c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbus	= isabusid;
2300c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbusirq	= 0;
2310c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstapic	= 0; /* FIXME: BSP apic */
2320c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstirq	= 1; /* LINT1 */
2330c7c14a7SCyrill Gorcunov 
2340c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_intsrc[1];
2350c7c14a7SCyrill Gorcunov 	nentries++;
2360c7c14a7SCyrill Gorcunov 
2370c7c14a7SCyrill Gorcunov 	/*
2380c7c14a7SCyrill Gorcunov 	 * Floating MP table finally.
2390c7c14a7SCyrill Gorcunov 	 */
2400c7c14a7SCyrill Gorcunov 	mpf_intel = (void *)ALIGN((unsigned long)last_addr, 16);
2410c7c14a7SCyrill Gorcunov 
2420c7c14a7SCyrill Gorcunov 	MPTABLE_STRNCPY(mpf_intel->signature, MPTABLE_SIG_FLOATING);
2430c7c14a7SCyrill Gorcunov 	mpf_intel->length	= 1;
2440c7c14a7SCyrill Gorcunov 	mpf_intel->specification= 4;
2450c7c14a7SCyrill Gorcunov 	mpf_intel->physptr	= (unsigned int)real_mpc_table;
2460c7c14a7SCyrill Gorcunov 	mpf_intel->checksum	= -mpf_checksum((unsigned char *)mpf_intel, sizeof(*mpf_intel));
2470c7c14a7SCyrill Gorcunov 
2480c7c14a7SCyrill Gorcunov 	/*
2490c7c14a7SCyrill Gorcunov 	 * No last_addr inclrement here please, we need last
2500c7c14a7SCyrill Gorcunov 	 * active position here to compute table size.
2510c7c14a7SCyrill Gorcunov 	 */
2520c7c14a7SCyrill Gorcunov 
2530c7c14a7SCyrill Gorcunov 	/*
2540c7c14a7SCyrill Gorcunov 	 * Don't forget to update header in fixed table.
2550c7c14a7SCyrill Gorcunov 	*/
2560c7c14a7SCyrill Gorcunov 	mpc_table->oemcount	= nentries;
2570c7c14a7SCyrill Gorcunov 	mpc_table->length	= last_addr - (void *)mpc_table;
2580c7c14a7SCyrill Gorcunov 	mpc_table->checksum	= -mpf_checksum((unsigned char *)mpc_table, mpc_table->length);
2590c7c14a7SCyrill Gorcunov 
2600c7c14a7SCyrill Gorcunov 
2610c7c14a7SCyrill Gorcunov 	/*
2620c7c14a7SCyrill Gorcunov 	 * We will copy the whole table, no need to separate
2630c7c14a7SCyrill Gorcunov 	 * floating structure and table itself.
2640c7c14a7SCyrill Gorcunov 	 */
2650c7c14a7SCyrill Gorcunov 	size = (unsigned long)mpf_intel + sizeof(*mpf_intel) - (unsigned long)mpc_table;
2660c7c14a7SCyrill Gorcunov 
2670c7c14a7SCyrill Gorcunov 	/*
2680c7c14a7SCyrill Gorcunov 	 * The finial check -- never get out of system bios
2690c7c14a7SCyrill Gorcunov 	 * area. Lets also check for allocated memory overrun,
2700c7c14a7SCyrill Gorcunov 	 * in real it's late but still usefull.
2710c7c14a7SCyrill Gorcunov 	 */
2720c7c14a7SCyrill Gorcunov 
2730c7c14a7SCyrill Gorcunov 	if (size > (unsigned long)(MB_BIOS_END - bios_rom_size) ||
2740c7c14a7SCyrill Gorcunov 	    size > MPTABLE_MAX_SIZE)
2750c7c14a7SCyrill Gorcunov 		die("MP table is too big");
2760c7c14a7SCyrill Gorcunov 
2770c7c14a7SCyrill Gorcunov 	/*
2780c7c14a7SCyrill Gorcunov 	 * OK, it is time to move it to guest memory.
2790c7c14a7SCyrill Gorcunov 	 */
2800c7c14a7SCyrill Gorcunov 	memcpy(guest_flat_to_host(kvm, real_mpc_table), mpc_table, size);
2810c7c14a7SCyrill Gorcunov 
2820c7c14a7SCyrill Gorcunov 	free(mpc_table);
2830c7c14a7SCyrill Gorcunov }
284