1 #include "libcflat.h" 2 #include "smp.h" 3 #include "atomic.h" 4 #include "processor.h" 5 #include "kvmclock.h" 6 7 #define unlikely(x) __builtin_expect(!!(x), 0) 8 #define likely(x) __builtin_expect(!!(x), 1) 9 10 11 struct pvclock_vcpu_time_info __attribute__((aligned(4))) hv_clock[MAX_CPU]; 12 struct pvclock_wall_clock wall_clock; 13 static unsigned char valid_flags = 0; 14 static atomic64_t last_value = ATOMIC64_INIT(0); 15 16 /* 17 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 18 * yielding a 64-bit result. 19 */ 20 static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) 21 { 22 u64 product; 23 #ifdef __i386__ 24 u32 tmp1, tmp2; 25 #endif 26 27 if (shift < 0) 28 delta >>= -shift; 29 else 30 delta <<= shift; 31 32 #ifdef __i386__ 33 __asm__ ( 34 "mul %5 ; " 35 "mov %4,%%eax ; " 36 "mov %%edx,%4 ; " 37 "mul %5 ; " 38 "xor %5,%5 ; " 39 "add %4,%%eax ; " 40 "adc %5,%%edx ; " 41 : "=A" (product), "=r" (tmp1), "=r" (tmp2) 42 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); 43 #elif defined(__x86_64__) 44 __asm__ ( 45 "mul %%rdx ; shrd $32,%%rdx,%%rax" 46 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); 47 #else 48 #error implement me! 49 #endif 50 51 return product; 52 } 53 54 #ifdef __i386__ 55 # define do_div(n,base) ({ \ 56 u32 __base = (base); \ 57 u32 __rem; \ 58 __rem = ((u64)(n)) % __base; \ 59 (n) = ((u64)(n)) / __base; \ 60 __rem; \ 61 }) 62 #else 63 u32 __attribute__((weak)) __div64_32(u64 *n, u32 base) 64 { 65 u64 rem = *n; 66 u64 b = base; 67 u64 res, d = 1; 68 u32 high = rem >> 32; 69 70 /* Reduce the thing a bit first */ 71 res = 0; 72 if (high >= base) { 73 high /= base; 74 res = (u64) high << 32; 75 rem -= (u64) (high*base) << 32; 76 } 77 78 while ((s64)b > 0 && b < rem) { 79 b = b+b; 80 d = d+d; 81 } 82 83 do { 84 if (rem >= b) { 85 rem -= b; 86 res += d; 87 } 88 b >>= 1; 89 d >>= 1; 90 } while (d); 91 92 *n = res; 93 return rem; 94 } 95 96 # define do_div(n,base) ({ \ 97 u32 __base = (base); \ 98 u32 __rem; \ 99 (void)(((typeof((n)) *)0) == ((u64 *)0)); \ 100 if (likely(((n) >> 32) == 0)) { \ 101 __rem = (u32)(n) % __base; \ 102 (n) = (u32)(n) / __base; \ 103 } else \ 104 __rem = __div64_32(&(n), __base); \ 105 __rem; \ 106 }) 107 #endif 108 109 /** 110 * set_normalized_timespec - set timespec sec and nsec parts and normalize 111 * 112 * @ts: pointer to timespec variable to be set 113 * @sec: seconds to set 114 * @nsec: nanoseconds to set 115 * 116 * Set seconds and nanoseconds field of a timespec variable and 117 * normalize to the timespec storage format 118 * 119 * Note: The tv_nsec part is always in the range of 120 * 0 <= tv_nsec < NSEC_PER_SEC 121 * For negative values only the tv_sec field is negative ! 122 */ 123 void set_normalized_timespec(struct timespec *ts, long sec, s64 nsec) 124 { 125 while (nsec >= NSEC_PER_SEC) { 126 /* 127 * The following asm() prevents the compiler from 128 * optimising this loop into a modulo operation. See 129 * also __iter_div_u64_rem() in include/linux/time.h 130 */ 131 asm("" : "+rm"(nsec)); 132 nsec -= NSEC_PER_SEC; 133 ++sec; 134 } 135 while (nsec < 0) { 136 asm("" : "+rm"(nsec)); 137 nsec += NSEC_PER_SEC; 138 --sec; 139 } 140 ts->tv_sec = sec; 141 ts->tv_nsec = nsec; 142 } 143 144 static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) 145 { 146 u64 delta = rdtsc() - shadow->tsc_timestamp; 147 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); 148 } 149 150 /* 151 * Reads a consistent set of time-base values from hypervisor, 152 * into a shadow data area. 153 */ 154 static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, 155 struct pvclock_vcpu_time_info *src) 156 { 157 do { 158 dst->version = src->version; 159 rmb(); /* fetch version before data */ 160 dst->tsc_timestamp = src->tsc_timestamp; 161 dst->system_timestamp = src->system_time; 162 dst->tsc_to_nsec_mul = src->tsc_to_system_mul; 163 dst->tsc_shift = src->tsc_shift; 164 dst->flags = src->flags; 165 rmb(); /* test version after fetching data */ 166 } while ((src->version & 1) || (dst->version != src->version)); 167 168 return dst->version; 169 } 170 171 cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) 172 { 173 struct pvclock_shadow_time shadow; 174 unsigned version; 175 cycle_t ret, offset; 176 u64 last; 177 178 do { 179 version = pvclock_get_time_values(&shadow, src); 180 mb(); 181 offset = pvclock_get_nsec_offset(&shadow); 182 ret = shadow.system_timestamp + offset; 183 mb(); 184 } while (version != src->version); 185 186 if ((valid_flags & PVCLOCK_RAW_CYCLE_BIT) || 187 ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && 188 (shadow.flags & PVCLOCK_TSC_STABLE_BIT))) 189 return ret; 190 191 /* 192 * Assumption here is that last_value, a global accumulator, always goes 193 * forward. If we are less than that, we should not be much smaller. 194 * We assume there is an error marging we're inside, and then the correction 195 * does not sacrifice accuracy. 196 * 197 * For reads: global may have changed between test and return, 198 * but this means someone else updated poked the clock at a later time. 199 * We just need to make sure we are not seeing a backwards event. 200 * 201 * For updates: last_value = ret is not enough, since two vcpus could be 202 * updating at the same time, and one of them could be slightly behind, 203 * making the assumption that last_value always go forward fail to hold. 204 */ 205 last = atomic64_read(&last_value); 206 do { 207 if (ret < last) 208 return last; 209 last = atomic64_cmpxchg(&last_value, last, ret); 210 } while (unlikely(last != ret)); 211 212 return ret; 213 } 214 215 cycle_t kvm_clock_read() 216 { 217 struct pvclock_vcpu_time_info *src; 218 cycle_t ret; 219 int index = smp_id(); 220 221 src = &hv_clock[index]; 222 ret = pvclock_clocksource_read(src); 223 return ret; 224 } 225 226 void kvm_clock_init(void *data) 227 { 228 int index = smp_id(); 229 struct pvclock_vcpu_time_info *hvc = &hv_clock[index]; 230 231 printf("kvm-clock: cpu %d, msr 0x:%lx \n", index, hvc); 232 wrmsr(MSR_KVM_SYSTEM_TIME, (unsigned long)hvc | 1); 233 } 234 235 void kvm_clock_clear(void *data) 236 { 237 wrmsr(MSR_KVM_SYSTEM_TIME, 0LL); 238 } 239 240 void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, 241 struct pvclock_vcpu_time_info *vcpu_time, 242 struct timespec *ts) 243 { 244 u32 version; 245 u64 delta; 246 struct timespec now; 247 248 /* get wallclock at system boot */ 249 do { 250 version = wall_clock->version; 251 rmb(); /* fetch version before time */ 252 now.tv_sec = wall_clock->sec; 253 now.tv_nsec = wall_clock->nsec; 254 rmb(); /* fetch time before checking version */ 255 } while ((wall_clock->version & 1) || (version != wall_clock->version)); 256 257 delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ 258 delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; 259 260 now.tv_nsec = do_div(delta, NSEC_PER_SEC); 261 now.tv_sec = delta; 262 263 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); 264 } 265 266 void kvm_get_wallclock(struct timespec *ts) 267 { 268 struct pvclock_vcpu_time_info *vcpu_time; 269 int index = smp_id(); 270 271 wrmsr(MSR_KVM_WALL_CLOCK, (unsigned long)&wall_clock); 272 vcpu_time = &hv_clock[index]; 273 pvclock_read_wallclock(&wall_clock, vcpu_time, ts); 274 } 275 276 void pvclock_set_flags(unsigned char flags) 277 { 278 valid_flags = flags; 279 } 280