Beruflich Dokumente
Kultur Dokumente
Strassen
Program di kerjakan pada Laptop Toshiba Satellite L635, Processor Intel Core i3 M370 2,4 GHz
32-bit operation, serta menggunakan Compiler Dev-c++
Listing Program Algoritma Standar (Nave) dan Algoritma Strassen
#include <iostream>
#include <stdlib.h>
#include <stdio.h>
void mmult(int N,
int Xpitch, const double X[],
int Ypitch, const double Y[],
int Zpitch, double Z[])
{
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
{
double sum = 0.0;
for (int k = 0; k < N; k++)
sum += X[i*Xpitch + k]*Y[k*Ypitch + j];
Z[i*Zpitch + j] = sum;
}
}
// S = X + Y
void madd(int N,
int Xpitch, const double X[],
int Ypitch, const double Y[],
// S = X - Y
void msub(int N,
int Xpitch, const double X[],
int Ypitch, const double Y[],
int Spitch, double S[])
{
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
S[i*Spitch + j] = X[i*Xpitch + j] - Y[i*Ypitch + j];
}
if (N <= 20)
{
mmult(N, Xpitch, X, Ypitch, Y, Zpitch, Z);
return;
}
const double *A = X;
const double *B = X + n;
const double *C = X + n*Xpitch;
const double *D = C + n;
const double *E = Y;
const double *F = Y + n;
const double *G = Y + n*Ypitch;
const double *H = G + n;
double *P[7];
const int sz = n*n*sizeof(double);
for (int i = 0; i < 7; i++)
P[i] = (double *) malloc(sz);
double *T = (double *) malloc(sz);
double *U = (double *) malloc(sz);
// P0 = A*(F - H);
msub(n, Ypitch, F, Ypitch, H, n, T);
// P1 = (A + B)*H
madd(n, Xpitch, A, Xpitch, B, n, T);
mmult_fast(n, n, T, Ypitch, H, n, P[1]);
// P2 = (C + D)*E
madd(n, Xpitch, C, Xpitch, D, n, T);
mmult_fast(n, n, T, Ypitch, E, n, P[2]);
// P3 = D*(G - E);
msub(n, Ypitch, G, Ypitch, E, n, T);
mmult_fast(n, Xpitch, D, n, T, n, P[3]);
// P4 = (A + D)*(E + H)
madd(n, Xpitch, A, Xpitch, D, n, T);
madd(n, Ypitch, E, Ypitch, H, n, U);
mmult_fast(n, n, T, n, U, n, P[4]);
// P5 = (B - D)*(G + H)
msub(n, Xpitch, B, Xpitch, D, n, T);
madd(n, Ypitch, G, Ypitch, H, n, U);
mmult_fast(n, n, T, n, U, n, P[5]);
// P6 = (A - C)*(E + F)
msub(n, Xpitch, A, Xpitch, C, n, T);
madd(n, Ypitch, E, Ypitch, F, n, U);
mmult_fast(n, n, T, n, U, n, P[6]);
free(U);
free(T);
for (int i = 6; i >= 0; i--)
free(P[i]);
}
void mprint(int N, int pitch, const double M[])
{
for (int i = 0; i < N; i++)
{
for (int j = 0; j < N; j++)
printf("%+0.4f ", M[i*pitch + j]);
printf("\n");
}
}
#ifdef MM_TEST1
int main(void)
{
double X[4*4] =
{
2, 4, 2, 5,
-3, 1, 4, 2,
1, 5, 6, -2,
6, 2, 4, 2
};
double Y[4*4] =
{
5, 1, 4, 2,
2, -6, 1, 3,
4, 2, 4, 5,
1, 3, -2, 1
};
double Z[4*4];
mmult(4, 4, X, 4, Y, 4, Z);
mprint(4, 4, Z);
printf("=========\n");
double Zfast[4*4];
mmult_fast(4, 4, X, 4, Y, 4, Zfast);
mprint(4, 4, Zfast);
return 0;
}
#endif
#include <sys/time.h>
char buffer[30];
time_t curtime;
#define N 1000
int main(void)
{
double *X, *Y, *Z, *Zfast;
X = (double*) malloc(N*N*sizeof(double));
Y = (double*) malloc(N*N*sizeof(double));
Z = (double*) malloc(N*N*sizeof(double));
Zfast = (double*) malloc(N*N*sizeof(double));
mrand(N, N, X);
mrand(N, N, Y);
mrand(N, N, Z);
mrand(N, N, Zfast);
gettimeofday(&tvBegin, NULL);
timeval_print(&tvBegin);
mmult(N, N, X, N, Y, N, Z);
gettimeofday(&tvEnd, NULL);
timeval_print(&tvEnd);
timeval_subtract(&tvDiff, &tvEnd, &tvBegin);
printf("%ld.%06ld\n", (long int) tvDiff.tv_sec, (long int) tvDiff.tv_usec);
gettimeofday(&tvBegin, NULL);
timeval_print(&tvBegin);
mmult_fast(N, N, X, N, Y, N, Zfast);
gettimeofday(&tvEnd, NULL);
timeval_print(&tvEnd);
timeval_subtract(&tvDiff, &tvEnd, &tvBegin);
printf("%ld.%06ld\n", (long int) tvDiff.tv_sec, (long int) tvDiff.tv_usec);
return 0;
}