subroutine trans (a, n) !* transpose square matrix a, dimension nxn !* Consider this as a black box for the MPI course !*/ integer i, j, ij, ji, l,n double precision tmp double precision a(*) ij = 0 l = -1 do i = 0, n-1 l = l+ n + 1 ji = l ij = ij+i + 1 do j=i+1,n-1 tmp = a(ij+1) a(ij+1) = a(ji+1) a(ji+1) = tmp ij = ij+1 ji = ji+ n enddo enddo return end subroutine trans !* !* This program demonstrates the use of MPI_Alltoall when !* transposing a square matrix. !* For simplicity, the number of processes is 4 and the dimension !* of the matrix is fixed to NROW !*/ program main implicit none include 'mpif.h' integer :: nprocs integer :: proc_me,proc_up,proc_down integer ierr integer,parameter::NROW=8192 integer,parameter::NP=64 integer,parameter::NBLK=NROW/NP double precision,parameter::ONEML=99999999999.0 double precision,allocatable:: a(:,:) double precision,allocatable:: b(:,:) integer i, j, rank,provided,itemp double precision r0 call MPI_INIT(ierr) call MPI_COMM_SIZE(MPI_COMM_WORLD,nprocs,ierr) call MPI_COMM_RANK(MPI_COMM_WORLD,proc_me,ierr) allocate( a(NBLK,NROW),b(NBLK,NROW)) if(proc_me.eq.0) write(*,*) 'Transposing a',NROW,'x',NROW,' matrix, divided among',NP,'processors' if (nprocs.ne.NP) then if (proc_me.eq.0) then write(*,*) 'Error, number of processes must be',NP end if call MPI_Finalize (ierr) stop endif r0 = MPI_Wtime() do j=1,NROW do i=1,NBLK a(i,j) = ONEML * j + i + NBLK *proc_me enddo enddo r0 = MPI_Wtime()-r0 if(proc_me.eq.0) write(*,*) 'Building matrix time (sec)',r0 !* do the MPI part of the transpose */ !/* Tricky here is the number of items to send and receive. ! * Not NROWxNBLK as one may guess, but the amount to send to one process ! * and the amount to receive from any process */ r0 = MPI_Wtime() ! MPI_Alltoall does not a transpose of the data received, we have to ! do this ourself: ! - use function MPI_Alltoall to redistribute data: --- r0 = MPI_Wtime()-r0 if(proc_me.eq.0) write(*,*) 'MPI_Alltoall time (sec)',r0 !* transpose NP square matrices, order NBLKxNBLK: */ r0 = MPI_Wtime() do i=1,NP call trans(b(1,(i-1) * NBLK+1),NBLK) enddo r0 = MPI_Wtime()-r0; if(proc_me.eq.0) write(*,*) 'Transpose block matrices time (sec)',r0 !* now check the result */ do j=1,NROW do i=1,NBLK if (b(i,j).ne. ONEML * (i + NBLK * proc_me) + j ) then write(*,*) 'process',proc_me,'b',b(i,j),'expected', & ONEML * (i + NBLK * proc_me) + j call MPI_Abort (MPI_COMM_WORLD,1, ierr) endif enddo enddo if (proc_me.eq.0) write(*,*) 'Transpose seems ok!' call MPI_Finalize (ierr) end program main