1 #include "libcflat.h" 2 #include "smp.h" 3 #include "atomic.h" 4 #include "processor.h" 5 #include "kvmclock.h" 6 #include "asm/barrier.h" 7 8 #define unlikely(x) __builtin_expect(!!(x), 0) 9 #define likely(x) __builtin_expect(!!(x), 1) 10 11 12 struct pvclock_vcpu_time_info __attribute__((aligned(4))) hv_clock[MAX_CPU]; 13 struct pvclock_wall_clock wall_clock; 14 static unsigned char valid_flags = 0; 15 static atomic64_t last_value = ATOMIC64_INIT(0); 16 17 /* 18 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 19 * yielding a 64-bit result. 20 */ 21 static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) 22 { 23 u64 product; 24 #ifdef __i386__ 25 u32 tmp1, tmp2; 26 #endif 27 28 if (shift < 0) 29 delta >>= -shift; 30 else 31 delta <<= shift; 32 33 #ifdef __i386__ 34 __asm__ ( 35 "mul %5 ; " 36 "mov %4,%%eax ; " 37 "mov %%edx,%4 ; " 38 "mul %5 ; " 39 "xor %5,%5 ; " 40 "add %4,%%eax ; " 41 "adc %5,%%edx ; " 42 : "=A" (product), "=r" (tmp1), "=r" (tmp2) 43 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); 44 #elif defined(__x86_64__) 45 __asm__ ( 46 "mul %%rdx ; shrd $32,%%rdx,%%rax" 47 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); 48 #else 49 #error implement me! 50 #endif 51 52 return product; 53 } 54 55 #ifdef __i386__ 56 # define do_div(n,base) ({ \ 57 u32 __base = (base); \ 58 u32 __rem; \ 59 __rem = ((u64)(n)) % __base; \ 60 (n) = ((u64)(n)) / __base; \ 61 __rem; \ 62 }) 63 #else 64 u32 __attribute__((weak)) __div64_32(u64 *n, u32 base) 65 { 66 u64 rem = *n; 67 u64 b = base; 68 u64 res, d = 1; 69 u32 high = rem >> 32; 70 71 /* Reduce the thing a bit first */ 72 res = 0; 73 if (high >= base) { 74 high /= base; 75 res = (u64) high << 32; 76 rem -= (u64) (high*base) << 32; 77 } 78 79 while ((s64)b > 0 && b < rem) { 80 b = b+b; 81 d = d+d; 82 } 83 84 do { 85 if (rem >= b) { 86 rem -= b; 87 res += d; 88 } 89 b >>= 1; 90 d >>= 1; 91 } while (d); 92 93 *n = res; 94 return rem; 95 } 96 97 # define do_div(n,base) ({ \ 98 u32 __base = (base); \ 99 u32 __rem; \ 100 (void)(((typeof((n)) *)0) == ((u64 *)0)); \ 101 if (likely(((n) >> 32) == 0)) { \ 102 __rem = (u32)(n) % __base; \ 103 (n) = (u32)(n) / __base; \ 104 } else \ 105 __rem = __div64_32(&(n), __base); \ 106 __rem; \ 107 }) 108 #endif 109 110 /** 111 * set_normalized_timespec - set timespec sec and nsec parts and normalize 112 * 113 * @ts: pointer to timespec variable to be set 114 * @sec: seconds to set 115 * @nsec: nanoseconds to set 116 * 117 * Set seconds and nanoseconds field of a timespec variable and 118 * normalize to the timespec storage format 119 * 120 * Note: The tv_nsec part is always in the range of 121 * 0 <= tv_nsec < NSEC_PER_SEC 122 * For negative values only the tv_sec field is negative ! 123 */ 124 void set_normalized_timespec(struct timespec *ts, long sec, s64 nsec) 125 { 126 while (nsec >= NSEC_PER_SEC) { 127 /* 128 * The following asm() prevents the compiler from 129 * optimising this loop into a modulo operation. See 130 * also __iter_div_u64_rem() in include/linux/time.h 131 */ 132 asm("" : "+rm"(nsec)); 133 nsec -= NSEC_PER_SEC; 134 ++sec; 135 } 136 while (nsec < 0) { 137 asm("" : "+rm"(nsec)); 138 nsec += NSEC_PER_SEC; 139 --sec; 140 } 141 ts->tv_sec = sec; 142 ts->tv_nsec = nsec; 143 } 144 145 static inline 146 unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src) 147 { 148 unsigned version = src->version & ~1; 149 /* Make sure that the version is read before the data. */ 150 smp_rmb(); 151 return version; 152 } 153 154 static inline 155 bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src, 156 unsigned version) 157 { 158 /* Make sure that the version is re-read after the data. */ 159 smp_rmb(); 160 return version != src->version; 161 } 162 163 static inline u64 rdtsc_ordered(void) 164 { 165 /* 166 * FIXME: on Intel CPUs rmb() aka lfence is sufficient which brings up 167 * to 2x speedup 168 */ 169 mb(); 170 return rdtsc(); 171 } 172 173 static inline 174 cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src) 175 { 176 u64 delta = rdtsc_ordered() - src->tsc_timestamp; 177 cycle_t offset = scale_delta(delta, src->tsc_to_system_mul, 178 src->tsc_shift); 179 return src->system_time + offset; 180 } 181 182 cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) 183 { 184 unsigned version; 185 cycle_t ret; 186 u64 last; 187 u8 flags; 188 189 do { 190 version = pvclock_read_begin(src); 191 ret = __pvclock_read_cycles(src); 192 flags = src->flags; 193 } while (pvclock_read_retry(src, version)); 194 195 if ((valid_flags & PVCLOCK_RAW_CYCLE_BIT) || 196 ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && 197 (flags & PVCLOCK_TSC_STABLE_BIT))) 198 return ret; 199 200 /* 201 * Assumption here is that last_value, a global accumulator, always goes 202 * forward. If we are less than that, we should not be much smaller. 203 * We assume there is an error marging we're inside, and then the correction 204 * does not sacrifice accuracy. 205 * 206 * For reads: global may have changed between test and return, 207 * but this means someone else updated poked the clock at a later time. 208 * We just need to make sure we are not seeing a backwards event. 209 * 210 * For updates: last_value = ret is not enough, since two vcpus could be 211 * updating at the same time, and one of them could be slightly behind, 212 * making the assumption that last_value always go forward fail to hold. 213 */ 214 last = atomic64_read(&last_value); 215 do { 216 if (ret < last) 217 return last; 218 last = atomic64_cmpxchg(&last_value, last, ret); 219 } while (unlikely(last != ret)); 220 221 return ret; 222 } 223 224 cycle_t kvm_clock_read() 225 { 226 struct pvclock_vcpu_time_info *src; 227 cycle_t ret; 228 int index = smp_id(); 229 230 src = &hv_clock[index]; 231 ret = pvclock_clocksource_read(src); 232 return ret; 233 } 234 235 void kvm_clock_init(void *data) 236 { 237 int index = smp_id(); 238 struct pvclock_vcpu_time_info *hvc = &hv_clock[index]; 239 240 printf("kvm-clock: cpu %d, msr %p\n", index, hvc); 241 wrmsr(MSR_KVM_SYSTEM_TIME_NEW, (unsigned long)hvc | 1); 242 } 243 244 void kvm_clock_clear(void *data) 245 { 246 wrmsr(MSR_KVM_SYSTEM_TIME_NEW, 0LL); 247 } 248 249 void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, 250 struct pvclock_vcpu_time_info *vcpu_time, 251 struct timespec *ts) 252 { 253 u32 version; 254 u64 delta; 255 struct timespec now; 256 257 /* get wallclock at system boot */ 258 do { 259 version = wall_clock->version; 260 rmb(); /* fetch version before time */ 261 now.tv_sec = wall_clock->sec; 262 now.tv_nsec = wall_clock->nsec; 263 rmb(); /* fetch time before checking version */ 264 } while ((wall_clock->version & 1) || (version != wall_clock->version)); 265 266 delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ 267 delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; 268 269 now.tv_nsec = do_div(delta, NSEC_PER_SEC); 270 now.tv_sec = delta; 271 272 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); 273 } 274 275 void kvm_get_wallclock(struct timespec *ts) 276 { 277 struct pvclock_vcpu_time_info *vcpu_time; 278 int index = smp_id(); 279 280 wrmsr(MSR_KVM_WALL_CLOCK_NEW, (unsigned long)&wall_clock); 281 vcpu_time = &hv_clock[index]; 282 pvclock_read_wallclock(&wall_clock, vcpu_time, ts); 283 } 284 285 void pvclock_set_flags(unsigned char flags) 286 { 287 valid_flags = flags; 288 } 289