11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * xor.c : Multiple Devices driver for Linux 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 1996, 1997, 1998, 1999, 2000, 51da177e4SLinus Torvalds * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson. 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * Dispatch optimized RAID-5 checksumming functions. 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or modify 101da177e4SLinus Torvalds * it under the terms of the GNU General Public License as published by 111da177e4SLinus Torvalds * the Free Software Foundation; either version 2, or (at your option) 121da177e4SLinus Torvalds * any later version. 131da177e4SLinus Torvalds * 141da177e4SLinus Torvalds * You should have received a copy of the GNU General Public License 151da177e4SLinus Torvalds * (for example /usr/src/linux/COPYING); if not, write to the Free 161da177e4SLinus Torvalds * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 171da177e4SLinus Torvalds */ 181da177e4SLinus Torvalds 191da177e4SLinus Torvalds #define BH_TRACE 0 201da177e4SLinus Torvalds #include <linux/module.h> 215a0e3ad6STejun Heo #include <linux/gfp.h> 221da177e4SLinus Torvalds #include <linux/raid/xor.h> 23bff61975SNeilBrown #include <linux/jiffies.h> 2456a51991SJim Kukunas #include <linux/preempt.h> 251da177e4SLinus Torvalds #include <asm/xor.h> 261da177e4SLinus Torvalds 271da177e4SLinus Torvalds /* The xor routines to use. */ 281da177e4SLinus Torvalds static struct xor_block_template *active_template; 291da177e4SLinus Torvalds 301da177e4SLinus Torvalds void 319bc89cd8SDan Williams xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs) 321da177e4SLinus Torvalds { 339bc89cd8SDan Williams unsigned long *p1, *p2, *p3, *p4; 341da177e4SLinus Torvalds 359bc89cd8SDan Williams p1 = (unsigned long *) srcs[0]; 369bc89cd8SDan Williams if (src_count == 1) { 379bc89cd8SDan Williams active_template->do_2(bytes, dest, p1); 381da177e4SLinus Torvalds return; 391da177e4SLinus Torvalds } 401da177e4SLinus Torvalds 419bc89cd8SDan Williams p2 = (unsigned long *) srcs[1]; 429bc89cd8SDan Williams if (src_count == 2) { 439bc89cd8SDan Williams active_template->do_3(bytes, dest, p1, p2); 441da177e4SLinus Torvalds return; 451da177e4SLinus Torvalds } 461da177e4SLinus Torvalds 479bc89cd8SDan Williams p3 = (unsigned long *) srcs[2]; 489bc89cd8SDan Williams if (src_count == 3) { 499bc89cd8SDan Williams active_template->do_4(bytes, dest, p1, p2, p3); 501da177e4SLinus Torvalds return; 511da177e4SLinus Torvalds } 521da177e4SLinus Torvalds 539bc89cd8SDan Williams p4 = (unsigned long *) srcs[3]; 549bc89cd8SDan Williams active_template->do_5(bytes, dest, p1, p2, p3, p4); 551da177e4SLinus Torvalds } 56685784aaSDan Williams EXPORT_SYMBOL(xor_blocks); 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds /* Set of all registered templates. */ 59*af7cf25dSJan Beulich static struct xor_block_template *__initdata template_list; 601da177e4SLinus Torvalds 611da177e4SLinus Torvalds #define BENCH_SIZE (PAGE_SIZE) 621da177e4SLinus Torvalds 63*af7cf25dSJan Beulich static void __init 641da177e4SLinus Torvalds do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) 651da177e4SLinus Torvalds { 661da177e4SLinus Torvalds int speed; 676a328475SJim Kukunas unsigned long now, j; 681da177e4SLinus Torvalds int i, count, max; 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds tmpl->next = template_list; 711da177e4SLinus Torvalds template_list = tmpl; 721da177e4SLinus Torvalds 7356a51991SJim Kukunas preempt_disable(); 7456a51991SJim Kukunas 751da177e4SLinus Torvalds /* 761da177e4SLinus Torvalds * Count the number of XORs done during a whole jiffy, and use 771da177e4SLinus Torvalds * this to calculate the speed of checksumming. We use a 2-page 781da177e4SLinus Torvalds * allocation to have guaranteed color L1-cache layout. 791da177e4SLinus Torvalds */ 801da177e4SLinus Torvalds max = 0; 811da177e4SLinus Torvalds for (i = 0; i < 5; i++) { 826a328475SJim Kukunas j = jiffies; 831da177e4SLinus Torvalds count = 0; 846a328475SJim Kukunas while ((now = jiffies) == j) 856a328475SJim Kukunas cpu_relax(); 866a328475SJim Kukunas while (time_before(jiffies, now + 1)) { 87685784aaSDan Williams mb(); /* prevent loop optimzation */ 881da177e4SLinus Torvalds tmpl->do_2(BENCH_SIZE, b1, b2); 891da177e4SLinus Torvalds mb(); 901da177e4SLinus Torvalds count++; 911da177e4SLinus Torvalds mb(); 921da177e4SLinus Torvalds } 931da177e4SLinus Torvalds if (count > max) 941da177e4SLinus Torvalds max = count; 951da177e4SLinus Torvalds } 961da177e4SLinus Torvalds 9756a51991SJim Kukunas preempt_enable(); 9856a51991SJim Kukunas 991da177e4SLinus Torvalds speed = max * (HZ * BENCH_SIZE / 1024); 1001da177e4SLinus Torvalds tmpl->speed = speed; 1011da177e4SLinus Torvalds 102685784aaSDan Williams printk(KERN_INFO " %-10s: %5d.%03d MB/sec\n", tmpl->name, 1031da177e4SLinus Torvalds speed / 1000, speed % 1000); 1041da177e4SLinus Torvalds } 1051da177e4SLinus Torvalds 106685784aaSDan Williams static int __init 107685784aaSDan Williams calibrate_xor_blocks(void) 1081da177e4SLinus Torvalds { 1091da177e4SLinus Torvalds void *b1, *b2; 1101da177e4SLinus Torvalds struct xor_block_template *f, *fastest; 1111da177e4SLinus Torvalds 11233f65df7SVegard Nossum /* 11333f65df7SVegard Nossum * Note: Since the memory is not actually used for _anything_ but to 11433f65df7SVegard Nossum * test the XOR speed, we don't really want kmemcheck to warn about 11533f65df7SVegard Nossum * reading uninitialized bytes here. 11633f65df7SVegard Nossum */ 11733f65df7SVegard Nossum b1 = (void *) __get_free_pages(GFP_KERNEL | __GFP_NOTRACK, 2); 1181da177e4SLinus Torvalds if (!b1) { 119685784aaSDan Williams printk(KERN_WARNING "xor: Yikes! No memory available.\n"); 1201da177e4SLinus Torvalds return -ENOMEM; 1211da177e4SLinus Torvalds } 1221da177e4SLinus Torvalds b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; 1231da177e4SLinus Torvalds 1241da177e4SLinus Torvalds /* 125685784aaSDan Williams * If this arch/cpu has a short-circuited selection, don't loop through 126685784aaSDan Williams * all the possible functions, just test the best one 1271da177e4SLinus Torvalds */ 1281da177e4SLinus Torvalds 1291da177e4SLinus Torvalds fastest = NULL; 1301da177e4SLinus Torvalds 1311da177e4SLinus Torvalds #ifdef XOR_SELECT_TEMPLATE 1321da177e4SLinus Torvalds fastest = XOR_SELECT_TEMPLATE(fastest); 1331da177e4SLinus Torvalds #endif 1341da177e4SLinus Torvalds 1351da177e4SLinus Torvalds #define xor_speed(templ) do_xor_speed((templ), b1, b2) 1361da177e4SLinus Torvalds 1371da177e4SLinus Torvalds if (fastest) { 138685784aaSDan Williams printk(KERN_INFO "xor: automatically using best " 139d788fec8SBorislav Petkov "checksumming function:\n"); 1401da177e4SLinus Torvalds xor_speed(fastest); 141d788fec8SBorislav Petkov goto out; 1421da177e4SLinus Torvalds } else { 1439bc89cd8SDan Williams printk(KERN_INFO "xor: measuring software checksum speed\n"); 1441da177e4SLinus Torvalds XOR_TRY_TEMPLATES; 1451da177e4SLinus Torvalds fastest = template_list; 1461da177e4SLinus Torvalds for (f = fastest; f; f = f->next) 1471da177e4SLinus Torvalds if (f->speed > fastest->speed) 1481da177e4SLinus Torvalds fastest = f; 1491da177e4SLinus Torvalds } 1501da177e4SLinus Torvalds 151685784aaSDan Williams printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n", 1521da177e4SLinus Torvalds fastest->name, fastest->speed / 1000, fastest->speed % 1000); 1531da177e4SLinus Torvalds 1541da177e4SLinus Torvalds #undef xor_speed 1551da177e4SLinus Torvalds 156d788fec8SBorislav Petkov out: 1571da177e4SLinus Torvalds free_pages((unsigned long)b1, 2); 1581da177e4SLinus Torvalds 1591da177e4SLinus Torvalds active_template = fastest; 1601da177e4SLinus Torvalds return 0; 1611da177e4SLinus Torvalds } 1621da177e4SLinus Torvalds 1631da177e4SLinus Torvalds static __exit void xor_exit(void) { } 1641da177e4SLinus Torvalds 1651da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 1661da177e4SLinus Torvalds 167685784aaSDan Williams /* when built-in xor.o must initialize before drivers/md/md.o */ 168685784aaSDan Williams core_initcall(calibrate_xor_blocks); 1691da177e4SLinus Torvalds module_exit(xor_exit); 170