1*3634039bSDavid Woodhouse /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ 2*3634039bSDavid Woodhouse 3*3634039bSDavid Woodhouse /* 4*3634039bSDavid Woodhouse * This structure provides a vDSO-style clock to VM guests, exposing the 5*3634039bSDavid Woodhouse * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch 6*3634039bSDavid Woodhouse * counter, etc.) and real time. It is designed to address the problem of 7*3634039bSDavid Woodhouse * live migration, which other clock enlightenments do not. 8*3634039bSDavid Woodhouse * 9*3634039bSDavid Woodhouse * When a guest is live migrated, this affects the clock in two ways. 10*3634039bSDavid Woodhouse * 11*3634039bSDavid Woodhouse * First, even between identical hosts the actual frequency of the underlying 12*3634039bSDavid Woodhouse * counter will change within the tolerances of its specification (typically 13*3634039bSDavid Woodhouse * ±50PPM, or 4 seconds a day). This frequency also varies over time on the 14*3634039bSDavid Woodhouse * same host, but can be tracked by NTP as it generally varies slowly. With 15*3634039bSDavid Woodhouse * live migration there is a step change in the frequency, with no warning. 16*3634039bSDavid Woodhouse * 17*3634039bSDavid Woodhouse * Second, there may be a step change in the value of the counter itself, as 18*3634039bSDavid Woodhouse * its accuracy is limited by the precision of the NTP synchronization on the 19*3634039bSDavid Woodhouse * source and destination hosts. 20*3634039bSDavid Woodhouse * 21*3634039bSDavid Woodhouse * So any calibration (NTP, PTP, etc.) which the guest has done on the source 22*3634039bSDavid Woodhouse * host before migration is invalid, and needs to be redone on the new host. 23*3634039bSDavid Woodhouse * 24*3634039bSDavid Woodhouse * In its most basic mode, this structure provides only an indication to the 25*3634039bSDavid Woodhouse * guest that live migration has occurred. This allows the guest to know that 26*3634039bSDavid Woodhouse * its clock is invalid and take remedial action. For applications that need 27*3634039bSDavid Woodhouse * reliable accurate timestamps (e.g. distributed databases), the structure 28*3634039bSDavid Woodhouse * can be mapped all the way to userspace. This allows the application to see 29*3634039bSDavid Woodhouse * directly for itself that the clock is disrupted and take appropriate 30*3634039bSDavid Woodhouse * action, even when using a vDSO-style method to get the time instead of a 31*3634039bSDavid Woodhouse * system call. 32*3634039bSDavid Woodhouse * 33*3634039bSDavid Woodhouse * In its more advanced mode. this structure can also be used to expose the 34*3634039bSDavid Woodhouse * precise relationship of the CPU counter to real time, as calibrated by the 35*3634039bSDavid Woodhouse * host. This means that userspace applications can have accurate time 36*3634039bSDavid Woodhouse * immediately after live migration, rather than having to pause operations 37*3634039bSDavid Woodhouse * and wait for NTP to recover. This mode does, of course, rely on the 38*3634039bSDavid Woodhouse * counter being reliable and consistent across CPUs. 39*3634039bSDavid Woodhouse * 40*3634039bSDavid Woodhouse * Note that this must be true UTC, never with smeared leap seconds. If a 41*3634039bSDavid Woodhouse * guest wishes to construct a smeared clock, it can do so. Presenting a 42*3634039bSDavid Woodhouse * smeared clock through this interface would be problematic because it 43*3634039bSDavid Woodhouse * actually messes with the apparent counter *period*. A linear smearing 44*3634039bSDavid Woodhouse * of 1 ms per second would effectively tweak the counter period by 1000PPM 45*3634039bSDavid Woodhouse * at the start/end of the smearing period, while a sinusoidal smear would 46*3634039bSDavid Woodhouse * basically be impossible to represent. 47*3634039bSDavid Woodhouse * 48*3634039bSDavid Woodhouse * This structure is offered with the intent that it be adopted into the 49*3634039bSDavid Woodhouse * nascent virtio-rtc standard, as a virtio-rtc that does not address the live 50*3634039bSDavid Woodhouse * migration problem seems a little less than fit for purpose. For that 51*3634039bSDavid Woodhouse * reason, certain fields use precisely the same numeric definitions as in 52*3634039bSDavid Woodhouse * the virtio-rtc proposal. The structure can also be exposed through an ACPI 53*3634039bSDavid Woodhouse * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for 54*3634039bSDavid Woodhouse * the fact that it uses a real _CRS to convey the address of the structure 55*3634039bSDavid Woodhouse * (which should be a full page, to allow for mapping directly to userspace). 56*3634039bSDavid Woodhouse */ 57*3634039bSDavid Woodhouse 58*3634039bSDavid Woodhouse #ifndef __VMCLOCK_ABI_H__ 59*3634039bSDavid Woodhouse #define __VMCLOCK_ABI_H__ 60*3634039bSDavid Woodhouse 61*3634039bSDavid Woodhouse #include "standard-headers/linux/types.h" 62*3634039bSDavid Woodhouse 63*3634039bSDavid Woodhouse struct vmclock_abi { 64*3634039bSDavid Woodhouse /* CONSTANT FIELDS */ 65*3634039bSDavid Woodhouse uint32_t magic; 66*3634039bSDavid Woodhouse #define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ 67*3634039bSDavid Woodhouse uint32_t size; /* Size of region containing this structure */ 68*3634039bSDavid Woodhouse uint16_t version; /* 1 */ 69*3634039bSDavid Woodhouse uint8_t counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */ 70*3634039bSDavid Woodhouse #define VMCLOCK_COUNTER_ARM_VCNT 0 71*3634039bSDavid Woodhouse #define VMCLOCK_COUNTER_X86_TSC 1 72*3634039bSDavid Woodhouse #define VMCLOCK_COUNTER_INVALID 0xff 73*3634039bSDavid Woodhouse uint8_t time_type; /* Matches VIRTIO_RTC_TYPE_xxx */ 74*3634039bSDavid Woodhouse #define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ 75*3634039bSDavid Woodhouse #define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ 76*3634039bSDavid Woodhouse #define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ 77*3634039bSDavid Woodhouse #define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ 78*3634039bSDavid Woodhouse #define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ 79*3634039bSDavid Woodhouse 80*3634039bSDavid Woodhouse /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */ 81*3634039bSDavid Woodhouse uint32_t seq_count; /* Low bit means an update is in progress */ 82*3634039bSDavid Woodhouse /* 83*3634039bSDavid Woodhouse * This field changes to another non-repeating value when the CPU 84*3634039bSDavid Woodhouse * counter is disrupted, for example on live migration. This lets 85*3634039bSDavid Woodhouse * the guest know that it should discard any calibration it has 86*3634039bSDavid Woodhouse * performed of the counter against external sources (NTP/PTP/etc.). 87*3634039bSDavid Woodhouse */ 88*3634039bSDavid Woodhouse uint64_t disruption_marker; 89*3634039bSDavid Woodhouse uint64_t flags; 90*3634039bSDavid Woodhouse /* Indicates that the tai_offset_sec field is valid */ 91*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) 92*3634039bSDavid Woodhouse /* 93*3634039bSDavid Woodhouse * Optionally used to notify guests of pending maintenance events. 94*3634039bSDavid Woodhouse * A guest which provides latency-sensitive services may wish to 95*3634039bSDavid Woodhouse * remove itself from service if an event is coming up. Two flags 96*3634039bSDavid Woodhouse * indicate the approximate imminence of the event. 97*3634039bSDavid Woodhouse */ 98*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ 99*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ 100*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) 101*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) 102*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) 103*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) 104*3634039bSDavid Woodhouse /* 105*3634039bSDavid Woodhouse * If the MONOTONIC flag is set then (other than leap seconds) it is 106*3634039bSDavid Woodhouse * guaranteed that the time calculated according this structure at 107*3634039bSDavid Woodhouse * any given moment shall never appear to be later than the time 108*3634039bSDavid Woodhouse * calculated via the structure at any *later* moment. 109*3634039bSDavid Woodhouse * 110*3634039bSDavid Woodhouse * In particular, a timestamp based on a counter reading taken 111*3634039bSDavid Woodhouse * immediately after setting the low bit of seq_count (and the 112*3634039bSDavid Woodhouse * associated memory barrier), using the previously-valid time and 113*3634039bSDavid Woodhouse * period fields, shall never be later than a timestamp based on 114*3634039bSDavid Woodhouse * a counter reading taken immediately before *clearing* the low 115*3634039bSDavid Woodhouse * bit again after the update, using the about-to-be-valid fields. 116*3634039bSDavid Woodhouse */ 117*3634039bSDavid Woodhouse #define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) 118*3634039bSDavid Woodhouse 119*3634039bSDavid Woodhouse uint8_t pad[2]; 120*3634039bSDavid Woodhouse uint8_t clock_status; 121*3634039bSDavid Woodhouse #define VMCLOCK_STATUS_UNKNOWN 0 122*3634039bSDavid Woodhouse #define VMCLOCK_STATUS_INITIALIZING 1 123*3634039bSDavid Woodhouse #define VMCLOCK_STATUS_SYNCHRONIZED 2 124*3634039bSDavid Woodhouse #define VMCLOCK_STATUS_FREERUNNING 3 125*3634039bSDavid Woodhouse #define VMCLOCK_STATUS_UNRELIABLE 4 126*3634039bSDavid Woodhouse 127*3634039bSDavid Woodhouse /* 128*3634039bSDavid Woodhouse * The time exposed through this device is never smeared. This field 129*3634039bSDavid Woodhouse * corresponds to the 'subtype' field in virtio-rtc, which indicates 130*3634039bSDavid Woodhouse * the smearing method. However in this case it provides a *hint* to 131*3634039bSDavid Woodhouse * the guest operating system, such that *if* the guest OS wants to 132*3634039bSDavid Woodhouse * provide its users with an alternative clock which does not follow 133*3634039bSDavid Woodhouse * UTC, it may do so in a fashion consistent with the other systems 134*3634039bSDavid Woodhouse * in the nearby environment. 135*3634039bSDavid Woodhouse */ 136*3634039bSDavid Woodhouse uint8_t leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */ 137*3634039bSDavid Woodhouse #define VMCLOCK_SMEARING_STRICT 0 138*3634039bSDavid Woodhouse #define VMCLOCK_SMEARING_NOON_LINEAR 1 139*3634039bSDavid Woodhouse #define VMCLOCK_SMEARING_UTC_SLS 2 140*3634039bSDavid Woodhouse uint16_t tai_offset_sec; /* Actually two's complement signed */ 141*3634039bSDavid Woodhouse uint8_t leap_indicator; 142*3634039bSDavid Woodhouse /* 143*3634039bSDavid Woodhouse * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined 144*3634039bSDavid Woodhouse * in the current draft of virtio-rtc, but since smearing cannot be 145*3634039bSDavid Woodhouse * used with the shared memory device, some values are not used. 146*3634039bSDavid Woodhouse * 147*3634039bSDavid Woodhouse * The _POST_POS and _POST_NEG values allow the guest to perform 148*3634039bSDavid Woodhouse * its own smearing during the day or so after a leap second when 149*3634039bSDavid Woodhouse * such smearing may need to continue being applied for a leap 150*3634039bSDavid Woodhouse * second which is now theoretically "historical". 151*3634039bSDavid Woodhouse */ 152*3634039bSDavid Woodhouse #define VMCLOCK_LEAP_NONE 0x00 /* No known nearby leap second */ 153*3634039bSDavid Woodhouse #define VMCLOCK_LEAP_PRE_POS 0x01 /* Positive leap second at EOM */ 154*3634039bSDavid Woodhouse #define VMCLOCK_LEAP_PRE_NEG 0x02 /* Negative leap second at EOM */ 155*3634039bSDavid Woodhouse #define VMCLOCK_LEAP_POS 0x03 /* Set during 23:59:60 second */ 156*3634039bSDavid Woodhouse #define VMCLOCK_LEAP_POST_POS 0x04 157*3634039bSDavid Woodhouse #define VMCLOCK_LEAP_POST_NEG 0x05 158*3634039bSDavid Woodhouse 159*3634039bSDavid Woodhouse /* Bit shift for counter_period_frac_sec and its error rate */ 160*3634039bSDavid Woodhouse uint8_t counter_period_shift; 161*3634039bSDavid Woodhouse /* 162*3634039bSDavid Woodhouse * Paired values of counter and UTC at a given point in time. 163*3634039bSDavid Woodhouse */ 164*3634039bSDavid Woodhouse uint64_t counter_value; 165*3634039bSDavid Woodhouse /* 166*3634039bSDavid Woodhouse * Counter period, and error margin of same. The unit of these 167*3634039bSDavid Woodhouse * fields is 1/2^(64 + counter_period_shift) of a second. 168*3634039bSDavid Woodhouse */ 169*3634039bSDavid Woodhouse uint64_t counter_period_frac_sec; 170*3634039bSDavid Woodhouse uint64_t counter_period_esterror_rate_frac_sec; 171*3634039bSDavid Woodhouse uint64_t counter_period_maxerror_rate_frac_sec; 172*3634039bSDavid Woodhouse 173*3634039bSDavid Woodhouse /* 174*3634039bSDavid Woodhouse * Time according to time_type field above. 175*3634039bSDavid Woodhouse */ 176*3634039bSDavid Woodhouse uint64_t time_sec; /* Seconds since time_type epoch */ 177*3634039bSDavid Woodhouse uint64_t time_frac_sec; /* Units of 1/2^64 of a second */ 178*3634039bSDavid Woodhouse uint64_t time_esterror_nanosec; 179*3634039bSDavid Woodhouse uint64_t time_maxerror_nanosec; 180*3634039bSDavid Woodhouse }; 181*3634039bSDavid Woodhouse 182*3634039bSDavid Woodhouse #endif /* __VMCLOCK_ABI_H__ */ 183