xref: /kvmtool/x86/mptable.c (revision ca99b3d34022301a54d55d8ca4d00d5721832431)
10c7c14a7SCyrill Gorcunov #include "kvm/kvm.h"
20c7c14a7SCyrill Gorcunov #include "kvm/bios.h"
30c7c14a7SCyrill Gorcunov #include "kvm/apic.h"
40c7c14a7SCyrill Gorcunov #include "kvm/mptable.h"
50c7c14a7SCyrill Gorcunov #include "kvm/util.h"
60c7c14a7SCyrill Gorcunov 
70c7c14a7SCyrill Gorcunov #include <string.h>
80c7c14a7SCyrill Gorcunov 
9*ca99b3d3SCyrill Gorcunov /*
10*ca99b3d3SCyrill Gorcunov  * If kernel is not configured yet this macro
11*ca99b3d3SCyrill Gorcunov  * might not be defined, fix it by own definition
12*ca99b3d3SCyrill Gorcunov  */
13*ca99b3d3SCyrill Gorcunov #ifndef NR_CPUS
14*ca99b3d3SCyrill Gorcunov #define NR_CPUS KVM_NR_CPUS
15*ca99b3d3SCyrill Gorcunov #endif
16*ca99b3d3SCyrill Gorcunov 
170c7c14a7SCyrill Gorcunov #include <asm/mpspec_def.h>
180c7c14a7SCyrill Gorcunov 
190c7c14a7SCyrill Gorcunov /*
200c7c14a7SCyrill Gorcunov  * FIXME: please make sure the addresses borrowed
210c7c14a7SCyrill Gorcunov  * for apic/ioapic never overlaped! We need a global
220c7c14a7SCyrill Gorcunov  * tracker of system resources (including io, mmio,
230c7c14a7SCyrill Gorcunov  * and friends).
240c7c14a7SCyrill Gorcunov  */
250c7c14a7SCyrill Gorcunov 
260c7c14a7SCyrill Gorcunov static unsigned int mpf_checksum(unsigned char *mp, int len)
270c7c14a7SCyrill Gorcunov {
280c7c14a7SCyrill Gorcunov 	unsigned int sum = 0;
290c7c14a7SCyrill Gorcunov 
300c7c14a7SCyrill Gorcunov 	while (len--)
310c7c14a7SCyrill Gorcunov 		sum += *mp++;
320c7c14a7SCyrill Gorcunov 
330c7c14a7SCyrill Gorcunov 	return sum & 0xFF;
340c7c14a7SCyrill Gorcunov }
350c7c14a7SCyrill Gorcunov 
360c7c14a7SCyrill Gorcunov static unsigned int gen_cpu_flag(unsigned int cpu, unsigned int ncpu)
370c7c14a7SCyrill Gorcunov {
380c7c14a7SCyrill Gorcunov 	/* sets enabled/disabled | BSP/AP processor */
390c7c14a7SCyrill Gorcunov 	return ( (cpu < ncpu) ? CPU_ENABLED       : 0) |
400c7c14a7SCyrill Gorcunov 		((cpu == 0)   ? CPU_BOOTPROCESSOR : 0x00);
410c7c14a7SCyrill Gorcunov }
420c7c14a7SCyrill Gorcunov 
430c7c14a7SCyrill Gorcunov #define MPTABLE_SIG_FLOATING	"_MP_"
440c7c14a7SCyrill Gorcunov #define MPTABLE_OEM		"KVMCPU00"
450c7c14a7SCyrill Gorcunov #define MPTABLE_PRODUCTID	"0.1         "
460c7c14a7SCyrill Gorcunov #define MPTABLE_PCIBUSTYPE	"PCI   "
470c7c14a7SCyrill Gorcunov #define MPTABLE_ISABUSTYPE	"ISA   "
480c7c14a7SCyrill Gorcunov 
490c7c14a7SCyrill Gorcunov #define MPTABLE_STRNCPY(d, s)	memcpy(d, s, sizeof(d))
500c7c14a7SCyrill Gorcunov 
510c7c14a7SCyrill Gorcunov /* It should be more than enough */
520c7c14a7SCyrill Gorcunov #define MPTABLE_MAX_SIZE	(32 << 20)
530c7c14a7SCyrill Gorcunov 
540c7c14a7SCyrill Gorcunov /*
550c7c14a7SCyrill Gorcunov  * Too many cpus will require x2apic mode
560c7c14a7SCyrill Gorcunov  * and rather ACPI support so we limit it
570c7c14a7SCyrill Gorcunov  * here for a while.
580c7c14a7SCyrill Gorcunov  */
590c7c14a7SCyrill Gorcunov #define MPTABLE_MAX_CPUS	255
600c7c14a7SCyrill Gorcunov 
610c7c14a7SCyrill Gorcunov /**
620c7c14a7SCyrill Gorcunov  * mptable_setup - create mptable and fill guest memory with it
630c7c14a7SCyrill Gorcunov  */
640c7c14a7SCyrill Gorcunov void mptable_setup(struct kvm *kvm, unsigned int ncpus)
650c7c14a7SCyrill Gorcunov {
660c7c14a7SCyrill Gorcunov 	unsigned long real_mpc_table, size;
670c7c14a7SCyrill Gorcunov 	struct mpf_intel *mpf_intel;
680c7c14a7SCyrill Gorcunov 	struct mpc_table *mpc_table;
690c7c14a7SCyrill Gorcunov 	struct mpc_cpu *mpc_cpu;
700c7c14a7SCyrill Gorcunov 	struct mpc_bus *mpc_bus;
710c7c14a7SCyrill Gorcunov 	struct mpc_ioapic *mpc_ioapic;
720c7c14a7SCyrill Gorcunov 	struct mpc_intsrc *mpc_intsrc;
730c7c14a7SCyrill Gorcunov 
740c7c14a7SCyrill Gorcunov 	const int pcibusid = 0;
750c7c14a7SCyrill Gorcunov 	const int isabusid = 1;
760c7c14a7SCyrill Gorcunov 
770c7c14a7SCyrill Gorcunov 	unsigned int i, nentries = 0;
780c7c14a7SCyrill Gorcunov 	unsigned int ioapicid;
790c7c14a7SCyrill Gorcunov 	void *last_addr;
800c7c14a7SCyrill Gorcunov 
810c7c14a7SCyrill Gorcunov 	/* That is where MP table will be in guest memory */
820c7c14a7SCyrill Gorcunov 	real_mpc_table = ALIGN(MB_BIOS_BEGIN + bios_rom_size, 16);
830c7c14a7SCyrill Gorcunov 
840c7c14a7SCyrill Gorcunov 	if (ncpus > MPTABLE_MAX_CPUS) {
850c7c14a7SCyrill Gorcunov 		warning("Too many cpus: %d limited to %d",
860c7c14a7SCyrill Gorcunov 			ncpus, MPTABLE_MAX_CPUS);
870c7c14a7SCyrill Gorcunov 		ncpus = MPTABLE_MAX_CPUS;
880c7c14a7SCyrill Gorcunov 	}
890c7c14a7SCyrill Gorcunov 
900c7c14a7SCyrill Gorcunov 	mpc_table = calloc(1, MPTABLE_MAX_SIZE);
910c7c14a7SCyrill Gorcunov 	if (!mpc_table)
920c7c14a7SCyrill Gorcunov 		die("Out of memory");
930c7c14a7SCyrill Gorcunov 
940c7c14a7SCyrill Gorcunov 	MPTABLE_STRNCPY(mpc_table->signature,	MPC_SIGNATURE);
950c7c14a7SCyrill Gorcunov 	MPTABLE_STRNCPY(mpc_table->oem,		MPTABLE_OEM);
960c7c14a7SCyrill Gorcunov 	MPTABLE_STRNCPY(mpc_table->productid,	MPTABLE_PRODUCTID);
970c7c14a7SCyrill Gorcunov 
980c7c14a7SCyrill Gorcunov 	mpc_table->spec		= 4;
990c7c14a7SCyrill Gorcunov 	mpc_table->lapic	= APIC_ADDR(0);
1000c7c14a7SCyrill Gorcunov 	mpc_table->oemcount	= ncpus; /* will be updated again at end */
1010c7c14a7SCyrill Gorcunov 
1020c7c14a7SCyrill Gorcunov 	/*
1030c7c14a7SCyrill Gorcunov 	 * CPUs enumeration. Technically speaking we should
1040c7c14a7SCyrill Gorcunov 	 * ask either host or HV for apic version supported
1050c7c14a7SCyrill Gorcunov 	 * but for a while we simply put some random value
1060c7c14a7SCyrill Gorcunov 	 * here.
1070c7c14a7SCyrill Gorcunov 	 */
1080c7c14a7SCyrill Gorcunov 	mpc_cpu = (void *)&mpc_table[1];
1090c7c14a7SCyrill Gorcunov 	for (i = 0; i < ncpus; i++) {
1100c7c14a7SCyrill Gorcunov 		mpc_cpu->type		= MP_PROCESSOR;
1110c7c14a7SCyrill Gorcunov 		mpc_cpu->apicid		= i;
1120c7c14a7SCyrill Gorcunov 		mpc_cpu->apicver	= KVM_APIC_VERSION;
1130c7c14a7SCyrill Gorcunov 		mpc_cpu->cpuflag	= gen_cpu_flag(i, ncpus);
1140c7c14a7SCyrill Gorcunov 		mpc_cpu->cpufeature	= 0x600; /* some default value */
1150c7c14a7SCyrill Gorcunov 		mpc_cpu->featureflag	= 0x201; /* some default value */
1160c7c14a7SCyrill Gorcunov 		mpc_cpu++;
1170c7c14a7SCyrill Gorcunov 	}
1180c7c14a7SCyrill Gorcunov 
1190c7c14a7SCyrill Gorcunov 	last_addr = (void *)mpc_cpu;
1200c7c14a7SCyrill Gorcunov 	nentries += ncpus;
1210c7c14a7SCyrill Gorcunov 
1220c7c14a7SCyrill Gorcunov 	/*
1230c7c14a7SCyrill Gorcunov 	 * PCI buses.
1240c7c14a7SCyrill Gorcunov 	 * FIXME: Some callback here to obtain real number
1250c7c14a7SCyrill Gorcunov 	 * of PCI buses present in system.
1260c7c14a7SCyrill Gorcunov 	 */
1270c7c14a7SCyrill Gorcunov 	mpc_bus		= last_addr;
1280c7c14a7SCyrill Gorcunov 	mpc_bus->type	= MP_BUS;
1290c7c14a7SCyrill Gorcunov 	mpc_bus->busid	= pcibusid;
1300c7c14a7SCyrill Gorcunov 	MPTABLE_STRNCPY(mpc_bus->bustype, MPTABLE_PCIBUSTYPE);
1310c7c14a7SCyrill Gorcunov 
1320c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_bus[1];
1330c7c14a7SCyrill Gorcunov 	nentries++;
1340c7c14a7SCyrill Gorcunov 
1350c7c14a7SCyrill Gorcunov 	/*
1360c7c14a7SCyrill Gorcunov 	 * ISA bus.
1370c7c14a7SCyrill Gorcunov 	 * FIXME: Same issue as for PCI bus.
1380c7c14a7SCyrill Gorcunov 	 */
1390c7c14a7SCyrill Gorcunov 	mpc_bus		= last_addr;
1400c7c14a7SCyrill Gorcunov 	mpc_bus->type	= MP_BUS;
1410c7c14a7SCyrill Gorcunov 	mpc_bus->busid	= isabusid;
1420c7c14a7SCyrill Gorcunov 	MPTABLE_STRNCPY(mpc_bus->bustype, MPTABLE_ISABUSTYPE);
1430c7c14a7SCyrill Gorcunov 
1440c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_bus[1];
1450c7c14a7SCyrill Gorcunov 	nentries++;
1460c7c14a7SCyrill Gorcunov 
1470c7c14a7SCyrill Gorcunov 	/*
1480c7c14a7SCyrill Gorcunov 	 * IO-APIC chip.
1490c7c14a7SCyrill Gorcunov 	 */
1500c7c14a7SCyrill Gorcunov 	ioapicid		= ncpus + 1;
1510c7c14a7SCyrill Gorcunov 	mpc_ioapic		= last_addr;
1520c7c14a7SCyrill Gorcunov 	mpc_ioapic->type	= MP_IOAPIC;
1530c7c14a7SCyrill Gorcunov 	mpc_ioapic->apicid	= ioapicid;
1540c7c14a7SCyrill Gorcunov 	mpc_ioapic->apicver	= KVM_APIC_VERSION;
1550c7c14a7SCyrill Gorcunov 	mpc_ioapic->flags	= MPC_APIC_USABLE;
1560c7c14a7SCyrill Gorcunov 	mpc_ioapic->apicaddr	= IOAPIC_ADDR(0);
1570c7c14a7SCyrill Gorcunov 
1580c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_ioapic[1];
1590c7c14a7SCyrill Gorcunov 	nentries++;
1600c7c14a7SCyrill Gorcunov 
1610c7c14a7SCyrill Gorcunov 	/*
1620c7c14a7SCyrill Gorcunov 	 * IRQ sources.
1630c7c14a7SCyrill Gorcunov 	 *
1640c7c14a7SCyrill Gorcunov 	 * FIXME: Same issue as with buses. We definitely
1650c7c14a7SCyrill Gorcunov 	 * need kind of collector routine which enumerate
1660c7c14a7SCyrill Gorcunov 	 * resources used first and pass them here.
1670c7c14a7SCyrill Gorcunov 	 * At moment we know we have only virtio block device
1680c7c14a7SCyrill Gorcunov 	 * and virtio console but this is g00berfish.
1690c7c14a7SCyrill Gorcunov 	 *
1700c7c14a7SCyrill Gorcunov 	 * Also note we use PCI irqs here, no for ISA bus yet.
1710c7c14a7SCyrill Gorcunov 	 */
1720c7c14a7SCyrill Gorcunov 	mpc_intsrc		= last_addr;
1730c7c14a7SCyrill Gorcunov 	mpc_intsrc->type	= MP_INTSRC;
1740c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqtype	= mp_INT;
1750c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqflag	= MP_IRQDIR_DEFAULT;
1760c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbus	= pcibusid;
1770c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbusirq	= 2; /* virtio console irq pin */
1780c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstapic	= ioapicid;
1790c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstirq	= 13; /* VIRTIO_CONSOLE_IRQ */
1800c7c14a7SCyrill Gorcunov 
1810c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_intsrc[1];
1820c7c14a7SCyrill Gorcunov 	nentries++;
1830c7c14a7SCyrill Gorcunov 
1840c7c14a7SCyrill Gorcunov 	mpc_intsrc		= last_addr;
1850c7c14a7SCyrill Gorcunov 	mpc_intsrc->type	= MP_INTSRC;
1860c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqtype	= mp_INT;
1870c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqflag	= MP_IRQDIR_DEFAULT;
1880c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbus	= pcibusid;
1890c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbusirq	= 1; /* virtio block irq pin */
1900c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstapic	= ioapicid;
1910c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstirq	= 15; /* VIRTIO_BLK_IRQ */
1920c7c14a7SCyrill Gorcunov 
1930c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_intsrc[1];
1940c7c14a7SCyrill Gorcunov 	nentries++;
1950c7c14a7SCyrill Gorcunov 
1960c7c14a7SCyrill Gorcunov 	mpc_intsrc		= last_addr;
1970c7c14a7SCyrill Gorcunov 	mpc_intsrc->type	= MP_INTSRC;
1980c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqtype	= mp_INT;
1990c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqflag	= MP_IRQDIR_DEFAULT;
2000c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbus	= pcibusid;
2010c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbusirq	= 3; /* virtio net irq pin */
2020c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstapic	= ioapicid;
2030c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstirq	= 14; /* VIRTIO_NET_IRQ */
2040c7c14a7SCyrill Gorcunov 
2050c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_intsrc[1];
2060c7c14a7SCyrill Gorcunov 	nentries++;
2070c7c14a7SCyrill Gorcunov 
2080c7c14a7SCyrill Gorcunov 	/*
2090c7c14a7SCyrill Gorcunov 	 * Local IRQs assignment (LINT0, LINT1)
2100c7c14a7SCyrill Gorcunov 	 */
2110c7c14a7SCyrill Gorcunov 	mpc_intsrc		= last_addr;
2120c7c14a7SCyrill Gorcunov 	mpc_intsrc->type	= MP_LINTSRC;
2130c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqtype	= mp_ExtINT;
2140c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqtype	= mp_INT;
2150c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqflag	= MP_IRQDIR_DEFAULT;
2160c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbus	= isabusid;
2170c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbusirq	= 0;
2180c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstapic	= 0; /* FIXME: BSP apic */
2190c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstirq	= 0; /* LINT0 */
2200c7c14a7SCyrill Gorcunov 
2210c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_intsrc[1];
2220c7c14a7SCyrill Gorcunov 	nentries++;
2230c7c14a7SCyrill Gorcunov 
2240c7c14a7SCyrill Gorcunov 	mpc_intsrc		= last_addr;
2250c7c14a7SCyrill Gorcunov 	mpc_intsrc->type	= MP_LINTSRC;
2260c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqtype	= mp_NMI;
2270c7c14a7SCyrill Gorcunov 	mpc_intsrc->irqflag	= MP_IRQDIR_DEFAULT;
2280c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbus	= isabusid;
2290c7c14a7SCyrill Gorcunov 	mpc_intsrc->srcbusirq	= 0;
2300c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstapic	= 0; /* FIXME: BSP apic */
2310c7c14a7SCyrill Gorcunov 	mpc_intsrc->dstirq	= 1; /* LINT1 */
2320c7c14a7SCyrill Gorcunov 
2330c7c14a7SCyrill Gorcunov 	last_addr = (void *)&mpc_intsrc[1];
2340c7c14a7SCyrill Gorcunov 	nentries++;
2350c7c14a7SCyrill Gorcunov 
2360c7c14a7SCyrill Gorcunov 	/*
2370c7c14a7SCyrill Gorcunov 	 * Floating MP table finally.
2380c7c14a7SCyrill Gorcunov 	 */
2390c7c14a7SCyrill Gorcunov 	mpf_intel = (void *)ALIGN((unsigned long)last_addr, 16);
2400c7c14a7SCyrill Gorcunov 
2410c7c14a7SCyrill Gorcunov 	MPTABLE_STRNCPY(mpf_intel->signature, MPTABLE_SIG_FLOATING);
2420c7c14a7SCyrill Gorcunov 	mpf_intel->length	= 1;
2430c7c14a7SCyrill Gorcunov 	mpf_intel->specification= 4;
2440c7c14a7SCyrill Gorcunov 	mpf_intel->physptr	= (unsigned int)real_mpc_table;
2450c7c14a7SCyrill Gorcunov 	mpf_intel->checksum	= -mpf_checksum((unsigned char *)mpf_intel, sizeof(*mpf_intel));
2460c7c14a7SCyrill Gorcunov 
2470c7c14a7SCyrill Gorcunov 	/*
2480c7c14a7SCyrill Gorcunov 	 * No last_addr inclrement here please, we need last
2490c7c14a7SCyrill Gorcunov 	 * active position here to compute table size.
2500c7c14a7SCyrill Gorcunov 	 */
2510c7c14a7SCyrill Gorcunov 
2520c7c14a7SCyrill Gorcunov 	/*
2530c7c14a7SCyrill Gorcunov 	 * Don't forget to update header in fixed table.
2540c7c14a7SCyrill Gorcunov 	*/
2550c7c14a7SCyrill Gorcunov 	mpc_table->oemcount	= nentries;
2560c7c14a7SCyrill Gorcunov 	mpc_table->length	= last_addr - (void *)mpc_table;
2570c7c14a7SCyrill Gorcunov 	mpc_table->checksum	= -mpf_checksum((unsigned char *)mpc_table, mpc_table->length);
2580c7c14a7SCyrill Gorcunov 
2590c7c14a7SCyrill Gorcunov 
2600c7c14a7SCyrill Gorcunov 	/*
2610c7c14a7SCyrill Gorcunov 	 * We will copy the whole table, no need to separate
2620c7c14a7SCyrill Gorcunov 	 * floating structure and table itself.
2630c7c14a7SCyrill Gorcunov 	 */
2640c7c14a7SCyrill Gorcunov 	size = (unsigned long)mpf_intel + sizeof(*mpf_intel) - (unsigned long)mpc_table;
2650c7c14a7SCyrill Gorcunov 
2660c7c14a7SCyrill Gorcunov 	/*
2670c7c14a7SCyrill Gorcunov 	 * The finial check -- never get out of system bios
2680c7c14a7SCyrill Gorcunov 	 * area. Lets also check for allocated memory overrun,
2690c7c14a7SCyrill Gorcunov 	 * in real it's late but still usefull.
2700c7c14a7SCyrill Gorcunov 	 */
2710c7c14a7SCyrill Gorcunov 
2720c7c14a7SCyrill Gorcunov 	if (size > (unsigned long)(MB_BIOS_END - bios_rom_size) ||
2730c7c14a7SCyrill Gorcunov 	    size > MPTABLE_MAX_SIZE)
2740c7c14a7SCyrill Gorcunov 		die("MP table is too big");
2750c7c14a7SCyrill Gorcunov 
2760c7c14a7SCyrill Gorcunov 	/*
2770c7c14a7SCyrill Gorcunov 	 * OK, it is time to move it to guest memory.
2780c7c14a7SCyrill Gorcunov 	 */
2790c7c14a7SCyrill Gorcunov 	memcpy(guest_flat_to_host(kvm, real_mpc_table), mpc_table, size);
2800c7c14a7SCyrill Gorcunov 
2810c7c14a7SCyrill Gorcunov 	free(mpc_table);
2820c7c14a7SCyrill Gorcunov }
283