MPI¶
Introduction¶
MPI (Message Passing Interface) is a library, containing many subroutines and functions, that can be used by Fortran and C/C++. MPI is designed for distributed memory machines.
This section gives a quick introduction to MPI with Fortran example codes. For more information refer to mpi-forum.org.
The Fortran codes for the examples in this section can be found in the course repository in directory “MPI”.
Basic MPI subroutines and functions¶
We will use the following MPI subroutines and functions:
- MPI_INIT
- MPI_FINALIZE
- MPI_COMM_SIZE
- MPI_COMM_RANK
- MPI_BARRIER
- MPI_REDUCE
- MPI_SEND
- MPI_RECV
- MPI_SENDRECV
- MPI_WTIME
Example 1: Hellow World!¶
1 2 3 4 5 6 7 8 9 10 11 12 | program MPI_1
use mpi
implicit none
integer :: ierr, nprocs, myid
call mpi_init(ierr)
call mpi_comm_size(MPI_COMM_WORLD, nprocs, ierr)
call mpi_comm_rank(MPI_COMM_WORLD, myid, ierr)
write(*,*) 'I am processor with ID no. ', myid, ' out of ',nprocs, " processors"
call mpi_finalize(ierr)
end program MPI_1
|
Example 2: Wave equation in 1D¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | program wave1D_mpi
use mpi
implicit none
integer :: ierr, nprocs, myid
integer :: status(MPI_STATUS_SIZE)
integer, parameter :: nx = 200
real(kind = 8) :: t_final = 2.d0
integer :: i,j,nt,it
integer :: px,ix_off ! the x-proc index and the offset
integer :: p_left,p_right,px_max
integer :: nxl !this is the local size
integer :: remx
real(kind = 8) :: hx,t,dt,max_err_loc,max_err
real(kind = 8), dimension(:), allocatable :: x,u,up,um,uxx,force,uex
integer :: int_sum
CHARACTER(7) :: char_step
CHARACTER(7) :: char_id
real(kind = 8) :: ums(1)
real(kind=8) :: mpi_t0, mpi_t1, mpi_dt
call mpi_init(ierr)
call mpi_comm_size(MPI_COMM_WORLD, nprocs, ierr)
call mpi_comm_rank(MPI_COMM_WORLD, myid, ierr)
call MPI_BARRIER(MPI_COMM_WORLD,ierr)
mpi_t0=mpi_wtime()
hx = 2.d0/dble(nx-1)
! Label the processes from 1 to px_max
px = myid + 1
px_max = nprocs
! Split up the grid in the x-direction
nxl = nx/px_max
remx = nx-nxl*px_max
if (px .le. remx) then
nxl = nxl + 1
ix_off = (px-1)*nxl
else
ix_off = (remx)*(nxl+1) + (px-(remx+1))*nxl
end if
call MPI_BARRIER(MPI_COMM_WORLD,ierr)
call MPI_Reduce(nxl,int_sum,1,&
MPI_INTEGER,MPI_SUM,&
0,MPI_COMM_WORLD,ierr)
if(myid == 0) then
if (nx .ne. int_sum) then
write(*,*) 'Something is wrong with the number of points in x-direction: ',&
nx,int_sum
end if
end if
! Determine the neighbours of processor px
p_left = px-1 - 1
p_right = px+1 - 1
if (px .eq. px_max) p_right = MPI_PROC_NULL
if (px .eq. 1) p_left = MPI_PROC_NULL
! Allocate memory for various arrays
allocate(u(0:nxl+1),up(0:nxl+1),um(0:nxl+1))
allocate(x(0:nxl+1),uxx(1:nxl),force(1:nxl),uex(1:nxl))
do i = 1,nxl
x(i) = -1.d0 + dble(i-1+ix_off)*hx
end do
! send to left recieve from right
call MPI_Sendrecv(x(1),1,MPI_DOUBLE_PRECISION,p_left,123,&
x(nxl+1),1,MPI_DOUBLE_PRECISION,p_right,123,MPI_COMM_WORLD,status,ierr)
! send to right recieve from left
call MPI_Sendrecv(x(nxl),1,MPI_DOUBLE_PRECISION,p_right,125,&
x(0),1,MPI_DOUBLE_PRECISION,p_left,125,MPI_COMM_WORLD,status,ierr)
call MPI_BARRIER(MPI_COMM_WORLD,ierr)
t = 0.d0
! Compute the timestep
dt = 0.9d0*hx !this must satisfy the CFL condition
nt = floor(t_final/dt)+1
dt = t_final/dble(nt)
WRITE(char_id,"(I7.7)") myid
call printdble1d(x(1:nxl),nxl,"./SOL/x"//trim(char_id)//".txt")
! Set up initial data including the ghost points
do i = 0,nxl+1
call mms(ums,x(i),0.d0,t-dt)
um(i) = ums(1)
call mms(ums,x(i),0.d0,t)
u(i) = ums(1)
end do
! Do time stepping
do it = 1,nt
! Communicate between processors
! send to left recieve from right
call MPI_Sendrecv(u(1),1,MPI_DOUBLE_PRECISION,p_left,123,&
u(nxl+1),1,MPI_DOUBLE_PRECISION,p_right,123,MPI_COMM_WORLD,status,ierr)
! send to right recieve from left
call MPI_Sendrecv(u(nxl),1,MPI_DOUBLE_PRECISION,p_right,125,&
u(0),1,MPI_DOUBLE_PRECISION,p_left,125,MPI_COMM_WORLD,status,ierr)
call MPI_BARRIER(MPI_COMM_WORLD,ierr)
! Specify physical BC
if (px .eq. px_max) then
call mms(ums,x(nxl),0.d0,t)
u(nxl) = ums(1)
end if
if (px .eq. 1) then
call mms(ums,x(1),0.d0,t)
u(1) = ums(1)
end if
! Compute the right hand side
call compute_uxx(uxx,u,nxl,hx,t,x)
call compute_forcing(force,nxl,t,x)
! Compute the new solution up from u and um
up(1:nxl) = 2.d0*u(1:nxl) - um(1:nxl) + dt**2*(uxx+force)
! Increment time and swap the solution at different time levels
t = t+dt
um = u
u = up
call MPI_BARRIER(MPI_COMM_WORLD,ierr)
! Update physical BC
if (px .eq. px_max) then
call mms(ums,x(nxl),0.d0,t)
u(nxl) = ums(1)
end if
if (px .eq. 1) then
call mms(ums,x(1),0.d0,t)
u(1) = ums(1)
end if
WRITE(char_id,"(I7.7)") myid
WRITE(char_step,"(I7.7)") it
call printdble1d(u(1:nxl),nxl,&
"./SOL/sol"//trim(char_step)//"_"//trim(char_id)//".txt")
! Compute the exact solution
do i = 1,nxl
call mms(ums,x(i),0.d0,t)
uex(i) = ums(1)
end do
! Compute the max error at time t
max_err_loc = maxval(abs(u(1:nxl)-uex))
call MPI_Reduce(max_err_loc,max_err,1,&
MPI_DOUBLE_PRECISION,MPI_MAX,&
0,MPI_COMM_WORLD,ierr)
end do
if(myid.eq.0) write(*,*) "At time t=", t, "the max error is ", max_err
call MPI_BARRIER(MPI_COMM_WORLD,ierr)
mpi_t1=mpi_wtime()
mpi_dt=mpi_t1-mpi_t0
if(myid.eq.0) write(*,*) "With", nprocs, " processors the wall clock time = ", mpi_dt, " sec."
call mpi_finalize(ierr)
end program wave1D_mpi
SUBROUTINE mms(u,x,y,t)
! This subroutine returns the manufactured solution in the array u
! at a given point (x,y=0,t)
DOUBLE PRECISION x,y,t
DOUBLE PRECISION u(1)
t2 = 3.141592653589793D0*x
t3 = cos(t2)
t4 = 3.141592653589793D0*y
t5 = cos(t4)
t6 = 3.141592653589793D0*t
t7 = cos(t6)
u = t3*t5*t7
END SUBROUTINE mms
SUBROUTINE compute_forcing(force,nx,t,x)
implicit none
integer :: nx
real(kind = 8) :: force(1:nx),t,x(0:nx+1)
integer :: i
do i = 1,nx
force(i) = 0
end do
END SUBROUTINE compute_forcing
SUBROUTINE compute_uxx(uxx,u,nx,hx,t,x)
implicit none
integer :: nx
real(kind = 8) :: u(0:nx+1),x(0:nx+1),hx,uxx(1:nx),t
integer :: i
real(kind = 8) :: h2i
h2i = 1.d0/hx/hx
do i = 1,nx
uxx(i) = h2i*(u(i+1)-2.d0*u(i)+u(i-1))
end do
END SUBROUTINE compute_uxx
SUBROUTINE printdble1d(u,nx,str)
implicit none
integer, intent(in) :: nx
real(kind = 8), intent(in) :: u(nx)
character(len=*), intent(in) :: str
integer :: i
open(2,file=trim(str),status='unknown')
do i=1,nx,1
write(2,fmt='(E24.16)',advance='no') u(i)
end do
close(2)
END SUBROUTINE printdble1d
|