xref: /qemu/include/standard-headers/linux/vmclock-abi.h (revision 0e3327b690b76b7c3966b028110ee053cc16a385)
1*3634039bSDavid Woodhouse /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
2*3634039bSDavid Woodhouse 
3*3634039bSDavid Woodhouse /*
4*3634039bSDavid Woodhouse  * This structure provides a vDSO-style clock to VM guests, exposing the
5*3634039bSDavid Woodhouse  * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch
6*3634039bSDavid Woodhouse  * counter, etc.) and real time. It is designed to address the problem of
7*3634039bSDavid Woodhouse  * live migration, which other clock enlightenments do not.
8*3634039bSDavid Woodhouse  *
9*3634039bSDavid Woodhouse  * When a guest is live migrated, this affects the clock in two ways.
10*3634039bSDavid Woodhouse  *
11*3634039bSDavid Woodhouse  * First, even between identical hosts the actual frequency of the underlying
12*3634039bSDavid Woodhouse  * counter will change within the tolerances of its specification (typically
13*3634039bSDavid Woodhouse  * ±50PPM, or 4 seconds a day). This frequency also varies over time on the
14*3634039bSDavid Woodhouse  * same host, but can be tracked by NTP as it generally varies slowly. With
15*3634039bSDavid Woodhouse  * live migration there is a step change in the frequency, with no warning.
16*3634039bSDavid Woodhouse  *
17*3634039bSDavid Woodhouse  * Second, there may be a step change in the value of the counter itself, as
18*3634039bSDavid Woodhouse  * its accuracy is limited by the precision of the NTP synchronization on the
19*3634039bSDavid Woodhouse  * source and destination hosts.
20*3634039bSDavid Woodhouse  *
21*3634039bSDavid Woodhouse  * So any calibration (NTP, PTP, etc.) which the guest has done on the source
22*3634039bSDavid Woodhouse  * host before migration is invalid, and needs to be redone on the new host.
23*3634039bSDavid Woodhouse  *
24*3634039bSDavid Woodhouse  * In its most basic mode, this structure provides only an indication to the
25*3634039bSDavid Woodhouse  * guest that live migration has occurred. This allows the guest to know that
26*3634039bSDavid Woodhouse  * its clock is invalid and take remedial action. For applications that need
27*3634039bSDavid Woodhouse  * reliable accurate timestamps (e.g. distributed databases), the structure
28*3634039bSDavid Woodhouse  * can be mapped all the way to userspace. This allows the application to see
29*3634039bSDavid Woodhouse  * directly for itself that the clock is disrupted and take appropriate
30*3634039bSDavid Woodhouse  * action, even when using a vDSO-style method to get the time instead of a
31*3634039bSDavid Woodhouse  * system call.
32*3634039bSDavid Woodhouse  *
33*3634039bSDavid Woodhouse  * In its more advanced mode. this structure can also be used to expose the
34*3634039bSDavid Woodhouse  * precise relationship of the CPU counter to real time, as calibrated by the
35*3634039bSDavid Woodhouse  * host. This means that userspace applications can have accurate time
36*3634039bSDavid Woodhouse  * immediately after live migration, rather than having to pause operations
37*3634039bSDavid Woodhouse  * and wait for NTP to recover. This mode does, of course, rely on the
38*3634039bSDavid Woodhouse  * counter being reliable and consistent across CPUs.
39*3634039bSDavid Woodhouse  *
40*3634039bSDavid Woodhouse  * Note that this must be true UTC, never with smeared leap seconds. If a
41*3634039bSDavid Woodhouse  * guest wishes to construct a smeared clock, it can do so. Presenting a
42*3634039bSDavid Woodhouse  * smeared clock through this interface would be problematic because it
43*3634039bSDavid Woodhouse  * actually messes with the apparent counter *period*. A linear smearing
44*3634039bSDavid Woodhouse  * of 1 ms per second would effectively tweak the counter period by 1000PPM
45*3634039bSDavid Woodhouse  * at the start/end of the smearing period, while a sinusoidal smear would
46*3634039bSDavid Woodhouse  * basically be impossible to represent.
47*3634039bSDavid Woodhouse  *
48*3634039bSDavid Woodhouse  * This structure is offered with the intent that it be adopted into the
49*3634039bSDavid Woodhouse  * nascent virtio-rtc standard, as a virtio-rtc that does not address the live
50*3634039bSDavid Woodhouse  * migration problem seems a little less than fit for purpose. For that
51*3634039bSDavid Woodhouse  * reason, certain fields use precisely the same numeric definitions as in
52*3634039bSDavid Woodhouse  * the virtio-rtc proposal. The structure can also be exposed through an ACPI
53*3634039bSDavid Woodhouse  * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for
54*3634039bSDavid Woodhouse  * the fact that it uses a real _CRS to convey the address of the structure
55*3634039bSDavid Woodhouse  * (which should be a full page, to allow for mapping directly to userspace).
56*3634039bSDavid Woodhouse  */
57*3634039bSDavid Woodhouse 
58*3634039bSDavid Woodhouse #ifndef __VMCLOCK_ABI_H__
59*3634039bSDavid Woodhouse #define __VMCLOCK_ABI_H__
60*3634039bSDavid Woodhouse 
61*3634039bSDavid Woodhouse #include "standard-headers/linux/types.h"
62*3634039bSDavid Woodhouse 
63*3634039bSDavid Woodhouse struct vmclock_abi {
64*3634039bSDavid Woodhouse 	/* CONSTANT FIELDS */
65*3634039bSDavid Woodhouse 	uint32_t magic;
66*3634039bSDavid Woodhouse #define VMCLOCK_MAGIC	0x4b4c4356 /* "VCLK" */
67*3634039bSDavid Woodhouse 	uint32_t size;		/* Size of region containing this structure */
68*3634039bSDavid Woodhouse 	uint16_t version;	/* 1 */
69*3634039bSDavid Woodhouse 	uint8_t counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */
70*3634039bSDavid Woodhouse #define VMCLOCK_COUNTER_ARM_VCNT	0
71*3634039bSDavid Woodhouse #define VMCLOCK_COUNTER_X86_TSC		1
72*3634039bSDavid Woodhouse #define VMCLOCK_COUNTER_INVALID		0xff
73*3634039bSDavid Woodhouse 	uint8_t time_type; /* Matches VIRTIO_RTC_TYPE_xxx */
74*3634039bSDavid Woodhouse #define VMCLOCK_TIME_UTC			0	/* Since 1970-01-01 00:00:00z */
75*3634039bSDavid Woodhouse #define VMCLOCK_TIME_TAI			1	/* Since 1970-01-01 00:00:00z */
76*3634039bSDavid Woodhouse #define VMCLOCK_TIME_MONOTONIC			2	/* Since undefined epoch */
77*3634039bSDavid Woodhouse #define VMCLOCK_TIME_INVALID_SMEARED		3	/* Not supported */
78*3634039bSDavid Woodhouse #define VMCLOCK_TIME_INVALID_MAYBE_SMEARED	4	/* Not supported */
79*3634039bSDavid Woodhouse 
80*3634039bSDavid Woodhouse 	/* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */
81*3634039bSDavid Woodhouse 	uint32_t seq_count;	/* Low bit means an update is in progress */
82*3634039bSDavid Woodhouse 	/*
83*3634039bSDavid Woodhouse 	 * This field changes to another non-repeating value when the CPU
84*3634039bSDavid Woodhouse 	 * counter is disrupted, for example on live migration. This lets
85*3634039bSDavid Woodhouse 	 * the guest know that it should discard any calibration it has
86*3634039bSDavid Woodhouse 	 * performed of the counter against external sources (NTP/PTP/etc.).
87*3634039bSDavid Woodhouse 	 */
88*3634039bSDavid Woodhouse 	uint64_t disruption_marker;
89*3634039bSDavid Woodhouse 	uint64_t flags;
90*3634039bSDavid Woodhouse 	/* Indicates that the tai_offset_sec field is valid */
91*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_TAI_OFFSET_VALID		(1 << 0)
92*3634039bSDavid Woodhouse 	/*
93*3634039bSDavid Woodhouse 	 * Optionally used to notify guests of pending maintenance events.
94*3634039bSDavid Woodhouse 	 * A guest which provides latency-sensitive services may wish to
95*3634039bSDavid Woodhouse 	 * remove itself from service if an event is coming up. Two flags
96*3634039bSDavid Woodhouse 	 * indicate the approximate imminence of the event.
97*3634039bSDavid Woodhouse 	 */
98*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_DISRUPTION_SOON		(1 << 1) /* About a day */
99*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_DISRUPTION_IMMINENT	(1 << 2) /* About an hour */
100*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID	(1 << 3)
101*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID	(1 << 4)
102*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_TIME_ESTERROR_VALID	(1 << 5)
103*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_TIME_MAXERROR_VALID	(1 << 6)
104*3634039bSDavid Woodhouse 	/*
105*3634039bSDavid Woodhouse 	 * If the MONOTONIC flag is set then (other than leap seconds) it is
106*3634039bSDavid Woodhouse 	 * guaranteed that the time calculated according this structure at
107*3634039bSDavid Woodhouse 	 * any given moment shall never appear to be later than the time
108*3634039bSDavid Woodhouse 	 * calculated via the structure at any *later* moment.
109*3634039bSDavid Woodhouse 	 *
110*3634039bSDavid Woodhouse 	 * In particular, a timestamp based on a counter reading taken
111*3634039bSDavid Woodhouse 	 * immediately after setting the low bit of seq_count (and the
112*3634039bSDavid Woodhouse 	 * associated memory barrier), using the previously-valid time and
113*3634039bSDavid Woodhouse 	 * period fields, shall never be later than a timestamp based on
114*3634039bSDavid Woodhouse 	 * a counter reading taken immediately before *clearing* the low
115*3634039bSDavid Woodhouse 	 * bit again after the update, using the about-to-be-valid fields.
116*3634039bSDavid Woodhouse 	 */
117*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_TIME_MONOTONIC		(1 << 7)
118*3634039bSDavid Woodhouse 
119*3634039bSDavid Woodhouse 	uint8_t pad[2];
120*3634039bSDavid Woodhouse 	uint8_t clock_status;
121*3634039bSDavid Woodhouse #define VMCLOCK_STATUS_UNKNOWN		0
122*3634039bSDavid Woodhouse #define VMCLOCK_STATUS_INITIALIZING	1
123*3634039bSDavid Woodhouse #define VMCLOCK_STATUS_SYNCHRONIZED	2
124*3634039bSDavid Woodhouse #define VMCLOCK_STATUS_FREERUNNING	3
125*3634039bSDavid Woodhouse #define VMCLOCK_STATUS_UNRELIABLE	4
126*3634039bSDavid Woodhouse 
127*3634039bSDavid Woodhouse 	/*
128*3634039bSDavid Woodhouse 	 * The time exposed through this device is never smeared. This field
129*3634039bSDavid Woodhouse 	 * corresponds to the 'subtype' field in virtio-rtc, which indicates
130*3634039bSDavid Woodhouse 	 * the smearing method. However in this case it provides a *hint* to
131*3634039bSDavid Woodhouse 	 * the guest operating system, such that *if* the guest OS wants to
132*3634039bSDavid Woodhouse 	 * provide its users with an alternative clock which does not follow
133*3634039bSDavid Woodhouse 	 * UTC, it may do so in a fashion consistent with the other systems
134*3634039bSDavid Woodhouse 	 * in the nearby environment.
135*3634039bSDavid Woodhouse 	 */
136*3634039bSDavid Woodhouse 	uint8_t leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */
137*3634039bSDavid Woodhouse #define VMCLOCK_SMEARING_STRICT		0
138*3634039bSDavid Woodhouse #define VMCLOCK_SMEARING_NOON_LINEAR	1
139*3634039bSDavid Woodhouse #define VMCLOCK_SMEARING_UTC_SLS	2
140*3634039bSDavid Woodhouse 	uint16_t tai_offset_sec; /* Actually two's complement signed */
141*3634039bSDavid Woodhouse 	uint8_t leap_indicator;
142*3634039bSDavid Woodhouse 	/*
143*3634039bSDavid Woodhouse 	 * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined
144*3634039bSDavid Woodhouse 	 * in the current draft of virtio-rtc, but since smearing cannot be
145*3634039bSDavid Woodhouse 	 * used with the shared memory device, some values are not used.
146*3634039bSDavid Woodhouse 	 *
147*3634039bSDavid Woodhouse 	 * The _POST_POS and _POST_NEG values allow the guest to perform
148*3634039bSDavid Woodhouse 	 * its own smearing during the day or so after a leap second when
149*3634039bSDavid Woodhouse 	 * such smearing may need to continue being applied for a leap
150*3634039bSDavid Woodhouse 	 * second which is now theoretically "historical".
151*3634039bSDavid Woodhouse 	 */
152*3634039bSDavid Woodhouse #define VMCLOCK_LEAP_NONE	0x00	/* No known nearby leap second */
153*3634039bSDavid Woodhouse #define VMCLOCK_LEAP_PRE_POS	0x01	/* Positive leap second at EOM */
154*3634039bSDavid Woodhouse #define VMCLOCK_LEAP_PRE_NEG	0x02	/* Negative leap second at EOM */
155*3634039bSDavid Woodhouse #define VMCLOCK_LEAP_POS	0x03	/* Set during 23:59:60 second */
156*3634039bSDavid Woodhouse #define VMCLOCK_LEAP_POST_POS	0x04
157*3634039bSDavid Woodhouse #define VMCLOCK_LEAP_POST_NEG	0x05
158*3634039bSDavid Woodhouse 
159*3634039bSDavid Woodhouse 	/* Bit shift for counter_period_frac_sec and its error rate */
160*3634039bSDavid Woodhouse 	uint8_t counter_period_shift;
161*3634039bSDavid Woodhouse 	/*
162*3634039bSDavid Woodhouse 	 * Paired values of counter and UTC at a given point in time.
163*3634039bSDavid Woodhouse 	 */
164*3634039bSDavid Woodhouse 	uint64_t counter_value;
165*3634039bSDavid Woodhouse 	/*
166*3634039bSDavid Woodhouse 	 * Counter period, and error margin of same. The unit of these
167*3634039bSDavid Woodhouse 	 * fields is 1/2^(64 + counter_period_shift) of a second.
168*3634039bSDavid Woodhouse 	 */
169*3634039bSDavid Woodhouse 	uint64_t counter_period_frac_sec;
170*3634039bSDavid Woodhouse 	uint64_t counter_period_esterror_rate_frac_sec;
171*3634039bSDavid Woodhouse 	uint64_t counter_period_maxerror_rate_frac_sec;
172*3634039bSDavid Woodhouse 
173*3634039bSDavid Woodhouse 	/*
174*3634039bSDavid Woodhouse 	 * Time according to time_type field above.
175*3634039bSDavid Woodhouse 	 */
176*3634039bSDavid Woodhouse 	uint64_t time_sec;		/* Seconds since time_type epoch */
177*3634039bSDavid Woodhouse 	uint64_t time_frac_sec;		/* Units of 1/2^64 of a second */
178*3634039bSDavid Woodhouse 	uint64_t time_esterror_nanosec;
179*3634039bSDavid Woodhouse 	uint64_t time_maxerror_nanosec;
180*3634039bSDavid Woodhouse };
181*3634039bSDavid Woodhouse 
182*3634039bSDavid Woodhouse #endif /*  __VMCLOCK_ABI_H__ */
183