/*************************************************************************** CANNON'S ALGORITHM FOR PARALLEL MATRIX MULTIPLICATION computes square matrix multiplication of size upto 64x64. This memory efficient algorithm uses checkerboard partitioning such that every process stores only one block of each operand at anytime. Using dynamic allocation will surely exploit the memory efficiency of this algorithm. However, this program uses static arrays to make it easier to implement the algorithm (note: some "strange" problems with "drifting" pointers forces the static array implementation) The program also measures the computation time and the total running time by using ftime routine of libbsd.a library (uses -lbsd option). COMPILATION: mc -sync mpc -lbsd -o executable_name this_file_name PARAMETERS: -procs n => uses n processors to run the program. This parameter is added automatically by the compiler. The default value is set in MP_PROCS environment variable. -input inputfile => reading the data of the matrix from text file inputfile The first integer in the textfile is the number of rows in both operand matrices. -output outputfile => send the output to outputfile. If not given, output will be sent to output.dat -block b => uses block of size b in computation. The block size is limited to 16. -pro p => to tell the program to use p processors . This parameter is necessary because the compiler-added -procs isn't accessible to the program. If not given, MP_PROCS will be used. p should be the same number as the number given to -procs parameter. ****************************************************************************/ #include #include #include #include #include #include #include"maisie.h" #define MAXSIZE 64 /* maximum size of matrix is 64x64 */ #define MAXBLOCKSIZE 16 /* maximum block size is 16x16 */ /**************************************************************************** entity block_processor represents one process of the parallel matrix multiplication. INPUT: - myA and myB: the matrices - starting_row, starting_col:index of the the block in the big result matrix - num_of_blocks: total number of blocks that the matrix is divided into. - blocksize: the size of the block. - print_processor: printing entity. OUTPUT: a block of the result of the matrix multiplication, sent to print_processor ****************************************************************************/ entity block_processor{myA, myB, starting_row, starting_col, num_of_blocks, blocksize, print_processor} int myA[MAXBLOCKSIZE][MAXBLOCKSIZE]; int myB[MAXBLOCKSIZE][MAXBLOCKSIZE]; int starting_row; int starting_col; int num_of_blocks; int blocksize; ename print_processor; { int i, j, k, shifting, myC[MAXBLOCKSIZE][MAXBLOCKSIZE]; ename left_processor; ename up_processor; message row_shift {int new_A[MAXBLOCKSIZE][MAXBLOCKSIZE];} rs; message col_shift {int new_B[MAXBLOCKSIZE][MAXBLOCKSIZE];} cs; message your_neighbor {ename left_processor; ename up_processor;} initmsg; /* WAITING FOR THE NEIGHBORS' ADDRESS */ wait until mtype (your_neighbor) { left_processor = msg.your_neighbor.left_processor; up_processor = msg.your_neighbor.up_processor; } /* COMPUTATION OF INITIAL DATA */ for(i=0; i MAXSIZE) { print("ERROR: the size of the array is too large!"); terminate(); } if (row % blocksize) { print("ERROR: array size must be divisible by blocksize!"); terminate(); } print("%ix%i matrix multiplication with %ix%i blocks on %i processors", row, row, blocksize, blocksize, num_of_procs); col = row; num_of_blocks = row / blocksize; for(i=0; imost_digit) most_digit = digit; } /* RESULTS IS IN => SAVE THE END OF COMPUTATION TIME */ ftime(&Tp); finish_time = (double) Tp.time + (double) Tp.millitm / 1000; /* PRINTING THE RESULT TO A TEXT FILE */ print("PRINTING"); most_digit = most_digit + 2; sprintf(format, "%%%ii", most_digit); if (output) outputfile=fopen(argv[output], "wt"); else outputfile=fopen("output.dat", "wt"); /* default output file: output.dat */ for(i=0; i