yrchen tole me a hint yesterday afternoon, that, the matrices should be tramsposed before did a huge compute. In today's experiment, I wrote two version codes, one doesn't has any transpose loop (called m1), the other does (called m1_t). Both them compiled with gcc, with -O3 optimization. The result is:
m1_t:
real 5m31.300s
user 5m31.245s
sys 0m0.020s
m1:
real 5m30.802s
user 5m30.265s
sys 0m0.008s
It looks like that -O3 did the transpose before has the huge matrices multiplication.
#include
#include
#define DIE 800
void gen( int a[][DIE]);
void mul( int a[][DIE], int b[][DIE]);
int main(int argc, char *argv[])
{
int a[DIE][DIE];
int b[DIE][DIE];
int i;
for (i = 0; i < 10000; i++) {
srand(0);
gen(a);
srand(1);
gen(b);
mul(a, b);
}
return 0;
}
void gen( int a[][DIE] )
{
int i, j;
for (i = 0; i < DIE; i++)
for (j = 0; j < DIE; j++)
a[i][j] = random() % 5 + 1;
}
void mul( int a[][DIE], int b[][DIE])
{
int i, j, k;
int c[DIE][DIE];
//Transpose
for (i = 0; i < DIE; i++)
for (j = 0; j < DIE; j++)
mul[i][j] = b[j][i];
for (i = 0; i < DIE; i++) {
for (j = 0;j < DIE; j++) {
c[i][j] = 0;
for (k = 0; k < DIE; k++)
c[i][j] += a[i][k] * mul[i][k];
//printf("%d\t", c[i][j]);
}
//printf("\n");
}
}
沒有留言:
張貼留言