// Multiply two matrices A * B = C #include #include #include #include #include "MersenneTwister.h" #include "book.h" #define TILE 30 // tile size #define NT 200 // number of tiles #define MS TILE*NT //matrix size // row-major storing #define IDX2(i,j) (((i)*(MS))+(j)) // Matrices are stored in row-major order typedef struct { int width; int height; double* elements; } Matrix; __global__ void matrixMul(Matrix A, Matrix B, Matrix C){ int tidx=threadIdx.x; int tidy=threadIdx.y; int bidx=blockIdx.x; int bidy=blockIdx.y; int row = bidy*TILE+tidy; int col = bidx*TILE+tidx; double S=0.0; int i; for(i=0;i>>(dev_A,dev_B,dev_C); HANDLE_ERROR(cudaMemcpy(C.elements, dev_C.elements, C.width*C.height*sizeof(double), cudaMemcpyDeviceToHost)); printf("Time to compute: %f seconds.\n ", ((double)clock() - start)/CLOCKS_PER_SEC ); free(A.elements); free(B.elements); free(C.elements); HANDLE_ERROR(cudaFree(dev_A.elements)); HANDLE_ERROR(cudaFree(dev_B.elements)); HANDLE_ERROR(cudaFree(dev_C.elements)); return 1; }