1 #include "libcflat.h" 2 #include "smp.h" 3 #include "atomic.h" 4 #include "processor.h" 5 #include "kvmclock.h" 6 #include "asm/barrier.h" 7 8 #define unlikely(x) __builtin_expect(!!(x), 0) 9 #define likely(x) __builtin_expect(!!(x), 1) 10 11 12 struct pvclock_vcpu_time_info __attribute__((aligned(4))) hv_clock[MAX_CPU]; 13 struct pvclock_wall_clock wall_clock; 14 static unsigned char valid_flags = 0; 15 static atomic64_t last_value = ATOMIC64_INIT(0); 16 17 /* 18 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 19 * yielding a 64-bit result. 20 */ 21 static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) 22 { 23 u64 product; 24 #ifdef __i386__ 25 u32 tmp1, tmp2; 26 #endif 27 28 if (shift < 0) 29 delta >>= -shift; 30 else 31 delta <<= shift; 32 33 #ifdef __i386__ 34 __asm__ ( 35 "mul %5 ; " 36 "mov %4,%%eax ; " 37 "mov %%edx,%4 ; " 38 "mul %5 ; " 39 "xor %5,%5 ; " 40 "add %4,%%eax ; " 41 "adc %5,%%edx ; " 42 : "=A" (product), "=r" (tmp1), "=r" (tmp2) 43 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); 44 #elif defined(__x86_64__) 45 __asm__ ( 46 "mul %%rdx ; shrd $32,%%rdx,%%rax" 47 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); 48 #else 49 #error implement me! 50 #endif 51 52 return product; 53 } 54 55 #ifdef __i386__ 56 # define do_div(n,base) ({ \ 57 u32 __base = (base); \ 58 u32 __rem; \ 59 __rem = ((u64)(n)) % __base; \ 60 (n) = ((u64)(n)) / __base; \ 61 __rem; \ 62 }) 63 #else 64 u32 __attribute__((weak)) __div64_32(u64 *n, u32 base) 65 { 66 u64 rem = *n; 67 u64 b = base; 68 u64 res, d = 1; 69 u32 high = rem >> 32; 70 71 /* Reduce the thing a bit first */ 72 res = 0; 73 if (high >= base) { 74 high /= base; 75 res = (u64) high << 32; 76 rem -= (u64) (high*base) << 32; 77 } 78 79 while ((s64)b > 0 && b < rem) { 80 b = b+b; 81 d = d+d; 82 } 83 84 do { 85 if (rem >= b) { 86 rem -= b; 87 res += d; 88 } 89 b >>= 1; 90 d >>= 1; 91 } while (d); 92 93 *n = res; 94 return rem; 95 } 96 97 # define do_div(n,base) ({ \ 98 u32 __base = (base); \ 99 u32 __rem; \ 100 (void)(((typeof((n)) *)0) == ((u64 *)0)); \ 101 if (likely(((n) >> 32) == 0)) { \ 102 __rem = (u32)(n) % __base; \ 103 (n) = (u32)(n) / __base; \ 104 } else \ 105 __rem = __div64_32(&(n), __base); \ 106 __rem; \ 107 }) 108 #endif 109 110 /** 111 * set_normalized_timespec - set timespec sec and nsec parts and normalize 112 * 113 * @ts: pointer to timespec variable to be set 114 * @sec: seconds to set 115 * @nsec: nanoseconds to set 116 * 117 * Set seconds and nanoseconds field of a timespec variable and 118 * normalize to the timespec storage format 119 * 120 * Note: The tv_nsec part is always in the range of 121 * 0 <= tv_nsec < NSEC_PER_SEC 122 * For negative values only the tv_sec field is negative ! 123 */ 124 void set_normalized_timespec(struct timespec *ts, long sec, s64 nsec) 125 { 126 while (nsec >= NSEC_PER_SEC) { 127 /* 128 * The following asm() prevents the compiler from 129 * optimising this loop into a modulo operation. See 130 * also __iter_div_u64_rem() in include/linux/time.h 131 */ 132 asm("" : "+rm"(nsec)); 133 nsec -= NSEC_PER_SEC; 134 ++sec; 135 } 136 while (nsec < 0) { 137 asm("" : "+rm"(nsec)); 138 nsec += NSEC_PER_SEC; 139 --sec; 140 } 141 ts->tv_sec = sec; 142 ts->tv_nsec = nsec; 143 } 144 145 static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) 146 { 147 u64 delta = rdtsc() - shadow->tsc_timestamp; 148 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); 149 } 150 151 /* 152 * Reads a consistent set of time-base values from hypervisor, 153 * into a shadow data area. 154 */ 155 static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, 156 struct pvclock_vcpu_time_info *src) 157 { 158 do { 159 dst->version = src->version; 160 rmb(); /* fetch version before data */ 161 dst->tsc_timestamp = src->tsc_timestamp; 162 dst->system_timestamp = src->system_time; 163 dst->tsc_to_nsec_mul = src->tsc_to_system_mul; 164 dst->tsc_shift = src->tsc_shift; 165 dst->flags = src->flags; 166 rmb(); /* test version after fetching data */ 167 } while ((src->version & 1) || (dst->version != src->version)); 168 169 return dst->version; 170 } 171 172 cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) 173 { 174 struct pvclock_shadow_time shadow; 175 unsigned version; 176 cycle_t ret, offset; 177 u64 last; 178 179 do { 180 version = pvclock_get_time_values(&shadow, src); 181 mb(); 182 offset = pvclock_get_nsec_offset(&shadow); 183 ret = shadow.system_timestamp + offset; 184 mb(); 185 } while (version != src->version); 186 187 if ((valid_flags & PVCLOCK_RAW_CYCLE_BIT) || 188 ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && 189 (shadow.flags & PVCLOCK_TSC_STABLE_BIT))) 190 return ret; 191 192 /* 193 * Assumption here is that last_value, a global accumulator, always goes 194 * forward. If we are less than that, we should not be much smaller. 195 * We assume there is an error marging we're inside, and then the correction 196 * does not sacrifice accuracy. 197 * 198 * For reads: global may have changed between test and return, 199 * but this means someone else updated poked the clock at a later time. 200 * We just need to make sure we are not seeing a backwards event. 201 * 202 * For updates: last_value = ret is not enough, since two vcpus could be 203 * updating at the same time, and one of them could be slightly behind, 204 * making the assumption that last_value always go forward fail to hold. 205 */ 206 last = atomic64_read(&last_value); 207 do { 208 if (ret < last) 209 return last; 210 last = atomic64_cmpxchg(&last_value, last, ret); 211 } while (unlikely(last != ret)); 212 213 return ret; 214 } 215 216 cycle_t kvm_clock_read() 217 { 218 struct pvclock_vcpu_time_info *src; 219 cycle_t ret; 220 int index = smp_id(); 221 222 src = &hv_clock[index]; 223 ret = pvclock_clocksource_read(src); 224 return ret; 225 } 226 227 void kvm_clock_init(void *data) 228 { 229 int index = smp_id(); 230 struct pvclock_vcpu_time_info *hvc = &hv_clock[index]; 231 232 printf("kvm-clock: cpu %d, msr %p\n", index, hvc); 233 wrmsr(MSR_KVM_SYSTEM_TIME_NEW, (unsigned long)hvc | 1); 234 } 235 236 void kvm_clock_clear(void *data) 237 { 238 wrmsr(MSR_KVM_SYSTEM_TIME_NEW, 0LL); 239 } 240 241 void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, 242 struct pvclock_vcpu_time_info *vcpu_time, 243 struct timespec *ts) 244 { 245 u32 version; 246 u64 delta; 247 struct timespec now; 248 249 /* get wallclock at system boot */ 250 do { 251 version = wall_clock->version; 252 rmb(); /* fetch version before time */ 253 now.tv_sec = wall_clock->sec; 254 now.tv_nsec = wall_clock->nsec; 255 rmb(); /* fetch time before checking version */ 256 } while ((wall_clock->version & 1) || (version != wall_clock->version)); 257 258 delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ 259 delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; 260 261 now.tv_nsec = do_div(delta, NSEC_PER_SEC); 262 now.tv_sec = delta; 263 264 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); 265 } 266 267 void kvm_get_wallclock(struct timespec *ts) 268 { 269 struct pvclock_vcpu_time_info *vcpu_time; 270 int index = smp_id(); 271 272 wrmsr(MSR_KVM_WALL_CLOCK_NEW, (unsigned long)&wall_clock); 273 vcpu_time = &hv_clock[index]; 274 pvclock_read_wallclock(&wall_clock, vcpu_time, ts); 275 } 276 277 void pvclock_set_flags(unsigned char flags) 278 { 279 valid_flags = flags; 280 } 281