On Wed, 4 Feb 2009 21:27:46 +0000, Ralf Baechle <ralf@linux-mips.org> wrote:
> > If this makes sense, we might be able to sign up to do the work. Anyone
> > have a good, caching-aware memcpy test?
>
> Testing memcpy is an interesting little project. Correctness is one
> thing but a good implementation needs to do a few performance tradeoffs
> which are best meassure with real world, not synthetic workloads.
For correctness test, drivers/dma/dmatest.c might be a good template.
For speed test, test_cipher_speed in crypt/tcrypt.c can be used as a
template. Attached is a test module I wrote based on it, when I
implemented an asm version of csum_partial_copy_nocheck, etc. It will
show something like this:
# insmod /tmp/testspeed.ko mode=1
testing speed of csum_partial_copy_nocheck
test 0 (32 byte): 2051560 operations in 1 seconds (65649920 bytes)
test 1 (96 byte): 823512 operations in 1 seconds (79057152 bytes)
test 2 (256 byte): 329124 operations in 1 seconds (84255744 bytes)
test 3 (512 byte): 167739 operations in 1 seconds (85882368 bytes)
...
testing speed of gen_csum_partial_copy_nocheck
test 0 (32 byte): 1555953 operations in 1 seconds (49790496 bytes)
test 1 (96 byte): 700025 operations in 1 seconds (67202400 bytes)
test 2 (256 byte): 293716 operations in 1 seconds (75191296 bytes)
test 3 (512 byte): 151770 operations in 1 seconds (77706240 bytes)
...
insmod: error inserting '/tmp/testspeed.ko': -1 Resource temporarily unavailable
Feel free to hack it ;)
/*
* Quick & dirty speed testing module. (Based on tcrypt).
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/moduleparam.h>
#include <linux/jiffies.h>
#include <net/checksum.h>
static unsigned int sec = 1;
static int mode;
/* non-optimized version of csum_partial_copy_nocheck */
static unsigned int gen_csum_partial_copy_nocheck(const void *src,
void *dst, int len, unsigned int sum)
{
sum = csum_partial(src, len, sum);
memcpy(dst, src, len);
return sum;
}
/* non-optimized version of csum_partial_copy_from_user */
static unsigned int gen_csum_partial_copy_from_user(const void __user *src,
void *dst, int len, unsigned int sum, int *err_ptr)
{
might_sleep();
if (__copy_from_user(dst, src, len))
*err_ptr = -EFAULT;
return csum_partial(dst, len, sum);
}
#define loop_while_sec(start, end, sec, count) \
for (start = jiffies, end = start + sec * HZ, count = 0; \
time_before(jiffies, end); count++)
static int test_csum_partial_copy_speed(int cachemiss)
{
unsigned long start, end;
unsigned int i;
void *src, *dst;
size_t sizes[] = {
0x20, 0x60, 0x100, 0x200, 0x400,
1460, /* ETH_DATA_LEN - 20(ip header) - 20(tcp header) */
0x800, 0x1000,
};
size_t maxsize = sizes[ARRAY_SIZE(sizes) - 1];
int ofs;
int count;
int err;
int bufsize = 0x10000;
src = kmalloc(bufsize, GFP_KERNEL);
if (!src)
return -ENOMEM;
dst = kmalloc(bufsize, GFP_KERNEL);
if (!dst) {
kfree(src);
return -ENOMEM;
}
memset(src, 0xff, maxsize);
printk("\ntesting speed of csum_partial_copy_nocheck\n");
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
printk("test %u (%d byte): ", i, sizes[i]);
ofs = 0;
loop_while_sec(start, end, sec, count) {
csum_partial_copy_nocheck(src + ofs, dst + ofs,
sizes[i], 0);
if (cachemiss) {
ofs += sizes[i];
if (ofs + sizes[i] > bufsize)
ofs = 0;
}
}
printk("%d operations in %d seconds (%d bytes)\n",
count, sec, count * sizes[i]);
}
printk("\ntesting speed of csum_partial_copy_from_user\n");
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
printk("test %u (%d byte): ", i, sizes[i]);
ofs = 0;
loop_while_sec(start, end, sec, count) {
csum_partial_copy_from_user((const void __force __user
*)src + ofs,
dst + ofs,
sizes[i], 0, &err);
if (cachemiss) {
ofs += sizes[i];
if (ofs + sizes[i] > bufsize)
ofs = 0;
}
}
printk("%d operations in %d seconds (%d bytes)\n",
count, sec, count * sizes[i]);
}
printk("\ntesting speed of gen_csum_partial_copy_nocheck\n");
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
printk("test %u (%d byte): ", i, sizes[i]);
ofs = 0;
loop_while_sec(start, end, sec, count) {
gen_csum_partial_copy_nocheck(src + ofs, dst + ofs,
sizes[i], 0);
if (cachemiss) {
ofs += sizes[i];
if (ofs + sizes[i] > bufsize)
ofs = 0;
}
}
printk("%d operations in %d seconds (%d bytes)\n",
count, sec, count * sizes[i]);
}
printk("\ntesting speed of gen_csum_partial_copy_from_user\n");
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
printk("test %u (%d byte): ", i, sizes[i]);
ofs = 0;
loop_while_sec(start, end, sec, count) {
gen_csum_partial_copy_from_user((const void __force
__user *)src + ofs,
dst + ofs,
sizes[i], 0, &err);
if (cachemiss) {
ofs += sizes[i];
if (ofs + sizes[i] > bufsize)
ofs = 0;
}
}
printk("%d operations in %d seconds (%d bytes)\n",
count, sec, count * sizes[i]);
}
kfree(src);
kfree(dst);
return 0;
}
static int __init init(void)
{
int ret = 0;
switch (mode) {
case 0:
ret = test_csum_partial_copy_speed(0);
break;
case 1:
ret = test_csum_partial_copy_speed(1);
break;
}
if (ret)
return ret;
/* We intentionaly return -EAGAIN to prevent keeping the module. */
return -EAGAIN;
}
static void __exit fini(void) {}
module_init(init);
module_exit(fini);
module_param(mode, int, 0);
module_param(sec, uint, 0);
MODULE_PARM_DESC(sec, "Length in seconds of speed tests (default 1)");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Quick & dirty speed testing module");
|