11da177e4SLinus Torvalds /* calibrate.c: default delay calibration 21da177e4SLinus Torvalds * 31da177e4SLinus Torvalds * Excised from init/main.c 41da177e4SLinus Torvalds * Copyright (C) 1991, 1992 Linus Torvalds 51da177e4SLinus Torvalds */ 61da177e4SLinus Torvalds 7cd354f1aSTim Schmielau #include <linux/jiffies.h> 81da177e4SLinus Torvalds #include <linux/delay.h> 91da177e4SLinus Torvalds #include <linux/init.h> 10941e492bSAndrew Morton #include <linux/timex.h> 113da757daSAlok Kataria #include <linux/smp.h> 127afe1845SSameer Nanda #include <linux/percpu.h> 138a9e1b0fSVenkatesh Pallipadi 14f3f3149fSAlok Kataria unsigned long lpj_fine; 15bfe8df3dSRandy Dunlap unsigned long preset_lpj; 161da177e4SLinus Torvalds static int __init lpj_setup(char *str) 171da177e4SLinus Torvalds { 181da177e4SLinus Torvalds preset_lpj = simple_strtoul(str,NULL,0); 191da177e4SLinus Torvalds return 1; 201da177e4SLinus Torvalds } 211da177e4SLinus Torvalds 221da177e4SLinus Torvalds __setup("lpj=", lpj_setup); 231da177e4SLinus Torvalds 248a9e1b0fSVenkatesh Pallipadi #ifdef ARCH_HAS_READ_CURRENT_TIMER 258a9e1b0fSVenkatesh Pallipadi 268a9e1b0fSVenkatesh Pallipadi /* This routine uses the read_current_timer() routine and gets the 278a9e1b0fSVenkatesh Pallipadi * loops per jiffy directly, instead of guessing it using delay(). 288a9e1b0fSVenkatesh Pallipadi * Also, this code tries to handle non-maskable asynchronous events 298a9e1b0fSVenkatesh Pallipadi * (like SMIs) 308a9e1b0fSVenkatesh Pallipadi */ 318a9e1b0fSVenkatesh Pallipadi #define DELAY_CALIBRATION_TICKS ((HZ < 100) ? 1 : (HZ/100)) 328a9e1b0fSVenkatesh Pallipadi #define MAX_DIRECT_CALIBRATION_RETRIES 5 338a9e1b0fSVenkatesh Pallipadi 340db0628dSPaul Gortmaker static unsigned long calibrate_delay_direct(void) 358a9e1b0fSVenkatesh Pallipadi { 368a9e1b0fSVenkatesh Pallipadi unsigned long pre_start, start, post_start; 378a9e1b0fSVenkatesh Pallipadi unsigned long pre_end, end, post_end; 388a9e1b0fSVenkatesh Pallipadi unsigned long start_jiffies; 39f3f3149fSAlok Kataria unsigned long timer_rate_min, timer_rate_max; 40f3f3149fSAlok Kataria unsigned long good_timer_sum = 0; 41f3f3149fSAlok Kataria unsigned long good_timer_count = 0; 42d2b46313SAndrew Worsley unsigned long measured_times[MAX_DIRECT_CALIBRATION_RETRIES]; 43d2b46313SAndrew Worsley int max = -1; /* index of measured_times with max/min values or not set */ 44d2b46313SAndrew Worsley int min = -1; 458a9e1b0fSVenkatesh Pallipadi int i; 468a9e1b0fSVenkatesh Pallipadi 478a9e1b0fSVenkatesh Pallipadi if (read_current_timer(&pre_start) < 0 ) 488a9e1b0fSVenkatesh Pallipadi return 0; 498a9e1b0fSVenkatesh Pallipadi 508a9e1b0fSVenkatesh Pallipadi /* 518a9e1b0fSVenkatesh Pallipadi * A simple loop like 528a9e1b0fSVenkatesh Pallipadi * while ( jiffies < start_jiffies+1) 538a9e1b0fSVenkatesh Pallipadi * start = read_current_timer(); 548a9e1b0fSVenkatesh Pallipadi * will not do. As we don't really know whether jiffy switch 558a9e1b0fSVenkatesh Pallipadi * happened first or timer_value was read first. And some asynchronous 568a9e1b0fSVenkatesh Pallipadi * event can happen between these two events introducing errors in lpj. 578a9e1b0fSVenkatesh Pallipadi * 588a9e1b0fSVenkatesh Pallipadi * So, we do 598a9e1b0fSVenkatesh Pallipadi * 1. pre_start <- When we are sure that jiffy switch hasn't happened 608a9e1b0fSVenkatesh Pallipadi * 2. check jiffy switch 618a9e1b0fSVenkatesh Pallipadi * 3. start <- timer value before or after jiffy switch 628a9e1b0fSVenkatesh Pallipadi * 4. post_start <- When we are sure that jiffy switch has happened 638a9e1b0fSVenkatesh Pallipadi * 648a9e1b0fSVenkatesh Pallipadi * Note, we don't know anything about order of 2 and 3. 658a9e1b0fSVenkatesh Pallipadi * Now, by looking at post_start and pre_start difference, we can 668a9e1b0fSVenkatesh Pallipadi * check whether any asynchronous event happened or not 678a9e1b0fSVenkatesh Pallipadi */ 688a9e1b0fSVenkatesh Pallipadi 698a9e1b0fSVenkatesh Pallipadi for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) { 708a9e1b0fSVenkatesh Pallipadi pre_start = 0; 718a9e1b0fSVenkatesh Pallipadi read_current_timer(&start); 728a9e1b0fSVenkatesh Pallipadi start_jiffies = jiffies; 7370a06228STim Deegan while (time_before_eq(jiffies, start_jiffies + 1)) { 748a9e1b0fSVenkatesh Pallipadi pre_start = start; 758a9e1b0fSVenkatesh Pallipadi read_current_timer(&start); 768a9e1b0fSVenkatesh Pallipadi } 778a9e1b0fSVenkatesh Pallipadi read_current_timer(&post_start); 788a9e1b0fSVenkatesh Pallipadi 798a9e1b0fSVenkatesh Pallipadi pre_end = 0; 808a9e1b0fSVenkatesh Pallipadi end = post_start; 8170a06228STim Deegan while (time_before_eq(jiffies, start_jiffies + 1 + 8270a06228STim Deegan DELAY_CALIBRATION_TICKS)) { 838a9e1b0fSVenkatesh Pallipadi pre_end = end; 848a9e1b0fSVenkatesh Pallipadi read_current_timer(&end); 858a9e1b0fSVenkatesh Pallipadi } 868a9e1b0fSVenkatesh Pallipadi read_current_timer(&post_end); 878a9e1b0fSVenkatesh Pallipadi 88f3f3149fSAlok Kataria timer_rate_max = (post_end - pre_start) / 89f3f3149fSAlok Kataria DELAY_CALIBRATION_TICKS; 90f3f3149fSAlok Kataria timer_rate_min = (pre_end - post_start) / 91f3f3149fSAlok Kataria DELAY_CALIBRATION_TICKS; 928a9e1b0fSVenkatesh Pallipadi 938a9e1b0fSVenkatesh Pallipadi /* 94f3f3149fSAlok Kataria * If the upper limit and lower limit of the timer_rate is 958a9e1b0fSVenkatesh Pallipadi * >= 12.5% apart, redo calibration. 968a9e1b0fSVenkatesh Pallipadi */ 97d2b46313SAndrew Worsley if (start >= post_end) 98d2b46313SAndrew Worsley printk(KERN_NOTICE "calibrate_delay_direct() ignoring " 99d2b46313SAndrew Worsley "timer_rate as we had a TSC wrap around" 100d2b46313SAndrew Worsley " start=%lu >=post_end=%lu\n", 101d2b46313SAndrew Worsley start, post_end); 102d2b46313SAndrew Worsley if (start < post_end && pre_start != 0 && pre_end != 0 && 103f3f3149fSAlok Kataria (timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) { 104f3f3149fSAlok Kataria good_timer_count++; 105f3f3149fSAlok Kataria good_timer_sum += timer_rate_max; 106d2b46313SAndrew Worsley measured_times[i] = timer_rate_max; 107d2b46313SAndrew Worsley if (max < 0 || timer_rate_max > measured_times[max]) 108d2b46313SAndrew Worsley max = i; 109d2b46313SAndrew Worsley if (min < 0 || timer_rate_max < measured_times[min]) 110d2b46313SAndrew Worsley min = i; 111d2b46313SAndrew Worsley } else 112d2b46313SAndrew Worsley measured_times[i] = 0; 113d2b46313SAndrew Worsley 1148a9e1b0fSVenkatesh Pallipadi } 1158a9e1b0fSVenkatesh Pallipadi 116d2b46313SAndrew Worsley /* 117d2b46313SAndrew Worsley * Find the maximum & minimum - if they differ too much throw out the 118d2b46313SAndrew Worsley * one with the largest difference from the mean and try again... 119d2b46313SAndrew Worsley */ 120d2b46313SAndrew Worsley while (good_timer_count > 1) { 121d2b46313SAndrew Worsley unsigned long estimate; 122d2b46313SAndrew Worsley unsigned long maxdiff; 1238a9e1b0fSVenkatesh Pallipadi 124d2b46313SAndrew Worsley /* compute the estimate */ 125d2b46313SAndrew Worsley estimate = (good_timer_sum/good_timer_count); 126d2b46313SAndrew Worsley maxdiff = estimate >> 3; 127d2b46313SAndrew Worsley 128d2b46313SAndrew Worsley /* if range is within 12% let's take it */ 129d2b46313SAndrew Worsley if ((measured_times[max] - measured_times[min]) < maxdiff) 130d2b46313SAndrew Worsley return estimate; 131d2b46313SAndrew Worsley 132d2b46313SAndrew Worsley /* ok - drop the worse value and try again... */ 133d2b46313SAndrew Worsley good_timer_sum = 0; 134d2b46313SAndrew Worsley good_timer_count = 0; 135d2b46313SAndrew Worsley if ((measured_times[max] - estimate) < 136d2b46313SAndrew Worsley (estimate - measured_times[min])) { 137d2b46313SAndrew Worsley printk(KERN_NOTICE "calibrate_delay_direct() dropping " 138d2b46313SAndrew Worsley "min bogoMips estimate %d = %lu\n", 139d2b46313SAndrew Worsley min, measured_times[min]); 140d2b46313SAndrew Worsley measured_times[min] = 0; 141d2b46313SAndrew Worsley min = max; 142d2b46313SAndrew Worsley } else { 143d2b46313SAndrew Worsley printk(KERN_NOTICE "calibrate_delay_direct() dropping " 144d2b46313SAndrew Worsley "max bogoMips estimate %d = %lu\n", 145d2b46313SAndrew Worsley max, measured_times[max]); 146d2b46313SAndrew Worsley measured_times[max] = 0; 147d2b46313SAndrew Worsley max = min; 148d2b46313SAndrew Worsley } 149d2b46313SAndrew Worsley 150d2b46313SAndrew Worsley for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) { 151d2b46313SAndrew Worsley if (measured_times[i] == 0) 152d2b46313SAndrew Worsley continue; 153d2b46313SAndrew Worsley good_timer_count++; 154d2b46313SAndrew Worsley good_timer_sum += measured_times[i]; 155d2b46313SAndrew Worsley if (measured_times[i] < measured_times[min]) 156d2b46313SAndrew Worsley min = i; 157d2b46313SAndrew Worsley if (measured_times[i] > measured_times[max]) 158d2b46313SAndrew Worsley max = i; 159d2b46313SAndrew Worsley } 160d2b46313SAndrew Worsley 161d2b46313SAndrew Worsley } 162d2b46313SAndrew Worsley 163d2b46313SAndrew Worsley printk(KERN_NOTICE "calibrate_delay_direct() failed to get a good " 164d2b46313SAndrew Worsley "estimate for loops_per_jiffy.\nProbably due to long platform " 165d2b46313SAndrew Worsley "interrupts. Consider using \"lpj=\" boot option.\n"); 1668a9e1b0fSVenkatesh Pallipadi return 0; 1678a9e1b0fSVenkatesh Pallipadi } 1688a9e1b0fSVenkatesh Pallipadi #else 1690db0628dSPaul Gortmaker static unsigned long calibrate_delay_direct(void) 1700db0628dSPaul Gortmaker { 1710db0628dSPaul Gortmaker return 0; 1720db0628dSPaul Gortmaker } 1738a9e1b0fSVenkatesh Pallipadi #endif 1748a9e1b0fSVenkatesh Pallipadi 1751da177e4SLinus Torvalds /* 1761da177e4SLinus Torvalds * This is the number of bits of precision for the loops_per_jiffy. Each 177191e5688SPhil Carmody * time we refine our estimate after the first takes 1.5/HZ seconds, so try 178191e5688SPhil Carmody * to start with a good estimate. 1793da757daSAlok Kataria * For the boot cpu we can skip the delay calibration and assign it a value 180f3f3149fSAlok Kataria * calculated based on the timer frequency. 181f3f3149fSAlok Kataria * For the rest of the CPUs we cannot assume that the timer frequency is same as 1823da757daSAlok Kataria * the cpu frequency, hence do the calibration for those. 1831da177e4SLinus Torvalds */ 1841da177e4SLinus Torvalds #define LPS_PREC 8 1851da177e4SLinus Torvalds 1860db0628dSPaul Gortmaker static unsigned long calibrate_delay_converge(void) 18771c696b1SPhil Carmody { 188191e5688SPhil Carmody /* First stage - slowly accelerate to find initial bounds */ 189b1b5f65eSPhil Carmody unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit; 190191e5688SPhil Carmody int trials = 0, band = 0, trial_in_band = 0; 19171c696b1SPhil Carmody 19271c696b1SPhil Carmody lpj = (1<<12); 193191e5688SPhil Carmody 19471c696b1SPhil Carmody /* wait for "start of" clock tick */ 19571c696b1SPhil Carmody ticks = jiffies; 19671c696b1SPhil Carmody while (ticks == jiffies) 197191e5688SPhil Carmody ; /* nothing */ 19871c696b1SPhil Carmody /* Go .. */ 19971c696b1SPhil Carmody ticks = jiffies; 200191e5688SPhil Carmody do { 201191e5688SPhil Carmody if (++trial_in_band == (1<<band)) { 202191e5688SPhil Carmody ++band; 203191e5688SPhil Carmody trial_in_band = 0; 20471c696b1SPhil Carmody } 205191e5688SPhil Carmody __delay(lpj * band); 206191e5688SPhil Carmody trials += band; 207191e5688SPhil Carmody } while (ticks == jiffies); 208191e5688SPhil Carmody /* 209191e5688SPhil Carmody * We overshot, so retreat to a clear underestimate. Then estimate 210191e5688SPhil Carmody * the largest likely undershoot. This defines our chop bounds. 211191e5688SPhil Carmody */ 212191e5688SPhil Carmody trials -= band; 213b1b5f65eSPhil Carmody loopadd_base = lpj * band; 214b1b5f65eSPhil Carmody lpj_base = lpj * trials; 215b1b5f65eSPhil Carmody 216b1b5f65eSPhil Carmody recalibrate: 217b1b5f65eSPhil Carmody lpj = lpj_base; 218b1b5f65eSPhil Carmody loopadd = loopadd_base; 21971c696b1SPhil Carmody 22071c696b1SPhil Carmody /* 22171c696b1SPhil Carmody * Do a binary approximation to get lpj set to 222191e5688SPhil Carmody * equal one clock (up to LPS_PREC bits) 22371c696b1SPhil Carmody */ 224b1b5f65eSPhil Carmody chop_limit = lpj >> LPS_PREC; 225191e5688SPhil Carmody while (loopadd > chop_limit) { 226191e5688SPhil Carmody lpj += loopadd; 22771c696b1SPhil Carmody ticks = jiffies; 22871c696b1SPhil Carmody while (ticks == jiffies) 229191e5688SPhil Carmody ; /* nothing */ 23071c696b1SPhil Carmody ticks = jiffies; 23171c696b1SPhil Carmody __delay(lpj); 23271c696b1SPhil Carmody if (jiffies != ticks) /* longer than 1 tick */ 233191e5688SPhil Carmody lpj -= loopadd; 234191e5688SPhil Carmody loopadd >>= 1; 23571c696b1SPhil Carmody } 236b1b5f65eSPhil Carmody /* 237b1b5f65eSPhil Carmody * If we incremented every single time possible, presume we've 238b1b5f65eSPhil Carmody * massively underestimated initially, and retry with a higher 239b1b5f65eSPhil Carmody * start, and larger range. (Only seen on x86_64, due to SMIs) 240b1b5f65eSPhil Carmody */ 241b1b5f65eSPhil Carmody if (lpj + loopadd * 2 == lpj_base + loopadd_base * 2) { 242b1b5f65eSPhil Carmody lpj_base = lpj; 243b1b5f65eSPhil Carmody loopadd_base <<= 2; 244b1b5f65eSPhil Carmody goto recalibrate; 245b1b5f65eSPhil Carmody } 24671c696b1SPhil Carmody 24771c696b1SPhil Carmody return lpj; 24871c696b1SPhil Carmody } 24971c696b1SPhil Carmody 2507afe1845SSameer Nanda static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 }; 2517afe1845SSameer Nanda 252b565201cSJack Steiner /* 253b565201cSJack Steiner * Check if cpu calibration delay is already known. For example, 254b565201cSJack Steiner * some processors with multi-core sockets may have all cores 255b565201cSJack Steiner * with the same calibration delay. 256b565201cSJack Steiner * 257b565201cSJack Steiner * Architectures should override this function if a faster calibration 258b565201cSJack Steiner * method is available. 259b565201cSJack Steiner */ 2600db0628dSPaul Gortmaker unsigned long __attribute__((weak)) calibrate_delay_is_known(void) 261b565201cSJack Steiner { 262b565201cSJack Steiner return 0; 263b565201cSJack Steiner } 264b565201cSJack Steiner 265*e6639117SPeter De Schrijver /* 266*e6639117SPeter De Schrijver * Indicate the cpu delay calibration is done. This can be used by 267*e6639117SPeter De Schrijver * architectures to stop accepting delay timer registrations after this point. 268*e6639117SPeter De Schrijver */ 269*e6639117SPeter De Schrijver 270*e6639117SPeter De Schrijver void __attribute__((weak)) calibration_delay_done(void) 271*e6639117SPeter De Schrijver { 272*e6639117SPeter De Schrijver } 273*e6639117SPeter De Schrijver 2740db0628dSPaul Gortmaker void calibrate_delay(void) 2751da177e4SLinus Torvalds { 2761b19ca9fSRussell King unsigned long lpj; 277feae3203SMike Travis static bool printed; 2787afe1845SSameer Nanda int this_cpu = smp_processor_id(); 2791da177e4SLinus Torvalds 2807afe1845SSameer Nanda if (per_cpu(cpu_loops_per_jiffy, this_cpu)) { 2817afe1845SSameer Nanda lpj = per_cpu(cpu_loops_per_jiffy, this_cpu); 2828595c539SDiwakar Tundlam if (!printed) 2837afe1845SSameer Nanda pr_info("Calibrating delay loop (skipped) " 2847afe1845SSameer Nanda "already calibrated this CPU"); 2857afe1845SSameer Nanda } else if (preset_lpj) { 2861b19ca9fSRussell King lpj = preset_lpj; 287feae3203SMike Travis if (!printed) 288feae3203SMike Travis pr_info("Calibrating delay loop (skipped) " 289feae3203SMike Travis "preset value.. "); 290feae3203SMike Travis } else if ((!printed) && lpj_fine) { 2911b19ca9fSRussell King lpj = lpj_fine; 292feae3203SMike Travis pr_info("Calibrating delay loop (skipped), " 293f3f3149fSAlok Kataria "value calculated using timer frequency.. "); 294b565201cSJack Steiner } else if ((lpj = calibrate_delay_is_known())) { 295b565201cSJack Steiner ; 2961b19ca9fSRussell King } else if ((lpj = calibrate_delay_direct()) != 0) { 297feae3203SMike Travis if (!printed) 298feae3203SMike Travis pr_info("Calibrating delay using timer " 299feae3203SMike Travis "specific routine.. "); 3001da177e4SLinus Torvalds } else { 301feae3203SMike Travis if (!printed) 302feae3203SMike Travis pr_info("Calibrating delay loop... "); 3031b19ca9fSRussell King lpj = calibrate_delay_converge(); 3041da177e4SLinus Torvalds } 3057afe1845SSameer Nanda per_cpu(cpu_loops_per_jiffy, this_cpu) = lpj; 306feae3203SMike Travis if (!printed) 307feae3203SMike Travis pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n", 3081b19ca9fSRussell King lpj/(500000/HZ), 3091b19ca9fSRussell King (lpj/(5000/HZ)) % 100, lpj); 310feae3203SMike Travis 3111b19ca9fSRussell King loops_per_jiffy = lpj; 312feae3203SMike Travis printed = true; 313*e6639117SPeter De Schrijver 314*e6639117SPeter De Schrijver calibration_delay_done(); 3151da177e4SLinus Torvalds } 316