1 |
Simon Strandman wrote: |
2 |
|
3 |
> Hi! |
4 |
> |
5 |
> Some binary distros like Mandrake and suse patches their glibcs with |
6 |
> x86_64 optimized strings and an x86_64 optimized libm to improve |
7 |
> performance. |
8 |
> |
9 |
> I tried extracting those patches from an mandrake SRPM and add them to |
10 |
> the glibc 2.3.5 ebuild. The x86_64 optimized strings patch built and |
11 |
> worked perfectly and gave a large speedup as you can see below. But I |
12 |
> couldn't get glibc to build with the libm patch because of unresolved |
13 |
> symbols (and I'm no programmer so I have no idea how to fix that). |
14 |
> |
15 |
> I found a small C program on a suse mailing-list to measure glibc |
16 |
> memory copy performance: |
17 |
> http://lists.suse.com/archive/suse-amd64/2005-Mar/0220.html |
18 |
> |
19 |
> With the glibc 2.3.5 currently in gentoo I get: |
20 |
> isidor ~ # ./memcpy 2200 1000 1048576 |
21 |
> Memory to memory copy rate = 1291.600098 MBytes / sec. Block size = |
22 |
> 1048576. |
23 |
> |
24 |
> But with glibc 2.3.5 + amd64 optimized strings I get: |
25 |
> isidor ~ # ./memcpy 2200 1000 1048576 |
26 |
> Memory to memory copy rate = 2389.321777 MBytes / sec. Block size = |
27 |
> 1048576. |
28 |
> |
29 |
> That's an improvement of over 1000mb/s! Suse 9.3 also gives about |
30 |
> 2300mb/s out of the box. |
31 |
> |
32 |
> How about adding these patches to gentoo? Perhaps in glibc 2.3.5-r1 |
33 |
> before it leaves package.mask? I'll create a bugreport about it if you |
34 |
> agree! |
35 |
> |
36 |
> This .tar.bz2 contains the glibc directory from my overlay with the |
37 |
> mandrake patches included in files/mdk, but the libm patches are |
38 |
> commented out in the ebuild. |
39 |
> http://snigel.no-ip.com/~nxsty/linux/glibc.tar.bz2 |
40 |
> |
41 |
There is a bug in the original memcpy.c that will cause a segfault if |
42 |
you don't pass it any parameters. Here is a fixed version. I've left |
43 |
everything else alone (except for a spelling correction). |
44 |
|
45 |
// memcpy.c - Measure how fast we can copy memory |
46 |
|
47 |
#include <stdio.h> |
48 |
#include <stdlib.h> |
49 |
#include <time.h> |
50 |
#include <string.h> |
51 |
|
52 |
/* timing function */ |
53 |
#define rdtscll(val) do { \ |
54 |
unsigned int a,d; \ |
55 |
asm volatile("rdtsc" : "=a" (a), "=d" (d)); \ |
56 |
(val) = ((unsigned long)a) | (((unsigned long)d)<<32); \ |
57 |
} while(0) |
58 |
|
59 |
int main(int argc, char *argv[]) { |
60 |
int cpu_rate, num_loops, block_size, block_size_lwords, i, j; |
61 |
unsigned char *send_block_p, *rcv_block_p; |
62 |
unsigned long start_time, end_time; |
63 |
float rate; |
64 |
unsigned long *s_p, *r_p; |
65 |
|
66 |
if (argc != 4) { |
67 |
fprintf(stderr, |
68 |
"Usage: %s <cpu clk rate (MHz)> <num. iterations> <copy block |
69 |
size>\n", |
70 |
argv[0] ); |
71 |
return 1; |
72 |
} |
73 |
|
74 |
cpu_rate = atoi(argv[1]); |
75 |
num_loops = atoi(argv[2]); |
76 |
block_size = atoi(argv[3]); |
77 |
|
78 |
block_size_lwords = block_size / sizeof(unsigned long); |
79 |
block_size = sizeof(unsigned long) * block_size_lwords; |
80 |
|
81 |
send_block_p = malloc(block_size); |
82 |
rcv_block_p = malloc(block_size); |
83 |
|
84 |
if ((send_block_p == NULL) || (rcv_block_p == NULL)) { |
85 |
fprintf(stderr, "Malloc failed to allocate block(s) of size %d.\n", |
86 |
block_size); |
87 |
} |
88 |
|
89 |
// start_time = clock(); |
90 |
rdtscll(start_time); |
91 |
|
92 |
for (i = 0; i < num_loops; i++) { |
93 |
memcpy(rcv_block_p, send_block_p, block_size); |
94 |
|
95 |
// s_p = (unsigned long *) send_block_p; |
96 |
// r_p = (unsigned long *) rcv_block_p; |
97 |
// |
98 |
// for (j = 0 ; j < block_size_lwords; j++) { |
99 |
// *(r_p++) = *(s_p++); |
100 |
// } |
101 |
} |
102 |
|
103 |
// end_time = clock(); |
104 |
rdtscll(end_time); |
105 |
|
106 |
rate = (float) (block_size) * (float) (num_loops) / |
107 |
((float) (end_time - start_time)) * |
108 |
((float) cpu_rate) * 1.0E6 / 1.0E6; |
109 |
|
110 |
fprintf(stdout, |
111 |
"Memory to memory copy rate = %f MBytes / sec. Block size = %d.\n", |
112 |
rate, block_size); |
113 |
|
114 |
} /* end main() */ |
115 |
|
116 |
|
117 |
-- |
118 |
"Pluralitas non est ponenda sine necessitate" - W. of O. |
119 |
|
120 |
-- |
121 |
gentoo-amd64@g.o mailing list |