MPI

Introduction

MPI (Message Passing Interface) is a library, containing many subroutines and functions, that can be used by Fortran and C/C++. MPI is designed for distributed memory machines.

This section gives a quick introduction to MPI with Fortran example codes. For more information refer to mpi-forum.org.

The Fortran codes for the examples in this section can be found in the course repository in directory “MPI”.

Basic MPI subroutines and functions

We will use the following MPI subroutines and functions:

  • MPI_INIT
  • MPI_FINALIZE
  • MPI_COMM_SIZE
  • MPI_COMM_RANK
  • MPI_BARRIER
  • MPI_REDUCE
  • MPI_SEND
  • MPI_RECV
  • MPI_SENDRECV
  • MPI_WTIME

Example 1: Hellow World!

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
program MPI_1
  use mpi
  implicit none
  integer :: ierr, nprocs, myid
  call mpi_init(ierr)
  call mpi_comm_size(MPI_COMM_WORLD, nprocs, ierr)
  call mpi_comm_rank(MPI_COMM_WORLD, myid, ierr)
  
  write(*,*) 'I am processor with ID no. ', myid, ' out of ',nprocs, " processors"
  
  call mpi_finalize(ierr)
end program MPI_1

Example 2: Wave equation in 1D

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
program wave1D_mpi
  use mpi
  implicit none
  integer :: ierr, nprocs, myid
  integer :: status(MPI_STATUS_SIZE)
  
  integer, parameter :: nx = 200
  real(kind = 8) :: t_final = 2.d0
  integer :: i,j,nt,it
  
  integer :: px,ix_off ! the x-proc index and the offset
  integer :: p_left,p_right,px_max
  integer :: nxl       !this is the local size
  integer :: remx
  
  real(kind = 8) :: hx,t,dt,max_err_loc,max_err
  real(kind = 8), dimension(:),  allocatable :: x,u,up,um,uxx,force,uex
  integer :: int_sum
  CHARACTER(7) :: char_step
  CHARACTER(7) :: char_id

  real(kind = 8) :: ums(1)
  
  real(kind=8) :: mpi_t0, mpi_t1, mpi_dt
  
  call mpi_init(ierr)
  call mpi_comm_size(MPI_COMM_WORLD, nprocs, ierr)
  call mpi_comm_rank(MPI_COMM_WORLD, myid, ierr)

  call MPI_BARRIER(MPI_COMM_WORLD,ierr)
  mpi_t0=mpi_wtime()

  hx = 2.d0/dble(nx-1)
  
  ! Label the processes from 1 to px_max
  px = myid + 1
  px_max = nprocs
  
  ! Split up the grid in the x-direction
  nxl = nx/px_max
  remx = nx-nxl*px_max
  if (px .le. remx) then
     nxl = nxl + 1
     ix_off = (px-1)*nxl
  else
     ix_off = (remx)*(nxl+1) + (px-(remx+1))*nxl
  end if
  call MPI_BARRIER(MPI_COMM_WORLD,ierr)
  call MPI_Reduce(nxl,int_sum,1,&
       MPI_INTEGER,MPI_SUM,&
       0,MPI_COMM_WORLD,ierr)
  if(myid == 0) then
     if (nx .ne. int_sum) then
        write(*,*) 'Something is wrong with the number of points in x-direction: ',&
             nx,int_sum
     end if
  end if
  
  ! Determine the neighbours of processor px
  p_left  = px-1 - 1
  p_right = px+1 - 1
  if (px .eq. px_max) p_right = MPI_PROC_NULL
  if (px .eq. 1) p_left = MPI_PROC_NULL  

  ! Allocate memory for various arrays
  allocate(u(0:nxl+1),up(0:nxl+1),um(0:nxl+1))
  allocate(x(0:nxl+1),uxx(1:nxl),force(1:nxl),uex(1:nxl))
  do i = 1,nxl
     x(i) = -1.d0 + dble(i-1+ix_off)*hx
  end do

  ! send to left recieve from right
  call MPI_Sendrecv(x(1),1,MPI_DOUBLE_PRECISION,p_left,123,&
       x(nxl+1),1,MPI_DOUBLE_PRECISION,p_right,123,MPI_COMM_WORLD,status,ierr)
  ! send to right recieve from left
  call MPI_Sendrecv(x(nxl),1,MPI_DOUBLE_PRECISION,p_right,125,&
       x(0),1,MPI_DOUBLE_PRECISION,p_left,125,MPI_COMM_WORLD,status,ierr)
  call MPI_BARRIER(MPI_COMM_WORLD,ierr)

  t = 0.d0
  
  ! Compute the timestep
  dt = 0.9d0*hx      !this must satisfy the CFL condition
  nt = floor(t_final/dt)+1
  dt = t_final/dble(nt)
  
  WRITE(char_id,"(I7.7)") myid
  call printdble1d(x(1:nxl),nxl,"./SOL/x"//trim(char_id)//".txt")

  ! Set up initial data including the ghost points
  do i = 0,nxl+1
     call mms(ums,x(i),0.d0,t-dt)
     um(i) = ums(1)
     call mms(ums,x(i),0.d0,t)
     u(i) = ums(1)
  end do
  
  ! Do time stepping
  do it = 1,nt
     ! Communicate between processors
     ! send to left recieve from right
     call MPI_Sendrecv(u(1),1,MPI_DOUBLE_PRECISION,p_left,123,&
          u(nxl+1),1,MPI_DOUBLE_PRECISION,p_right,123,MPI_COMM_WORLD,status,ierr)
     ! send to right recieve from left
     call MPI_Sendrecv(u(nxl),1,MPI_DOUBLE_PRECISION,p_right,125,&
          u(0),1,MPI_DOUBLE_PRECISION,p_left,125,MPI_COMM_WORLD,status,ierr)
     call MPI_BARRIER(MPI_COMM_WORLD,ierr)
     ! Specify physical BC
     if (px .eq. px_max) then
        call mms(ums,x(nxl),0.d0,t)
        u(nxl) = ums(1)
     end if
     if (px .eq. 1) then
        call mms(ums,x(1),0.d0,t)
        u(1) = ums(1)
     end if
     ! Compute the right hand side
     call compute_uxx(uxx,u,nxl,hx,t,x)
     call compute_forcing(force,nxl,t,x)
     ! Compute the new solution up from u and um
     up(1:nxl) = 2.d0*u(1:nxl) - um(1:nxl) + dt**2*(uxx+force) 
     ! Increment time and swap the solution at different time levels
     t = t+dt
     um = u
     u  = up
     call MPI_BARRIER(MPI_COMM_WORLD,ierr)
     ! Update physical BC
     if (px .eq. px_max) then
        call mms(ums,x(nxl),0.d0,t)
        u(nxl) = ums(1)
     end if
     if (px .eq. 1) then
        call mms(ums,x(1),0.d0,t)
        u(1) = ums(1)
     end if

     WRITE(char_id,"(I7.7)") myid
     WRITE(char_step,"(I7.7)") it
     call printdble1d(u(1:nxl),nxl,&
          "./SOL/sol"//trim(char_step)//"_"//trim(char_id)//".txt")

     ! Compute the exact solution
     do i = 1,nxl
        call mms(ums,x(i),0.d0,t)
        uex(i) = ums(1)
     end do
     
     ! Compute the max error at time t
     max_err_loc = maxval(abs(u(1:nxl)-uex))
     call MPI_Reduce(max_err_loc,max_err,1,&
          MPI_DOUBLE_PRECISION,MPI_MAX,&
          0,MPI_COMM_WORLD,ierr)
  end do
  if(myid.eq.0) write(*,*) "At time t=", t, "the max error is ", max_err

  call MPI_BARRIER(MPI_COMM_WORLD,ierr)
  mpi_t1=mpi_wtime()
  mpi_dt=mpi_t1-mpi_t0
  if(myid.eq.0) write(*,*) "With", nprocs, "  processors the wall clock time = ", mpi_dt, " sec."

  call mpi_finalize(ierr)
  
end program wave1D_mpi


SUBROUTINE mms(u,x,y,t)
! This subroutine returns the manufactured solution in the array u
! at a given point (x,y=0,t)
      
      DOUBLE PRECISION x,y,t
      DOUBLE PRECISION u(1)
      t2 = 3.141592653589793D0*x
      t3 = cos(t2)
      t4 = 3.141592653589793D0*y
      t5 = cos(t4)
      t6 = 3.141592653589793D0*t
      t7 = cos(t6)
      u = t3*t5*t7
END SUBROUTINE mms



SUBROUTINE compute_forcing(force,nx,t,x)
  implicit none
  integer :: nx
  real(kind = 8) :: force(1:nx),t,x(0:nx+1)
  integer :: i
  
  do i = 1,nx
     force(i) = 0
  end do
END SUBROUTINE compute_forcing



SUBROUTINE compute_uxx(uxx,u,nx,hx,t,x)
  implicit none
  integer :: nx
  real(kind = 8) :: u(0:nx+1),x(0:nx+1),hx,uxx(1:nx),t
  integer :: i
  real(kind = 8) :: h2i

  h2i = 1.d0/hx/hx
  do i = 1,nx
     uxx(i) = h2i*(u(i+1)-2.d0*u(i)+u(i-1))
  end do
END SUBROUTINE compute_uxx



SUBROUTINE printdble1d(u,nx,str)
  implicit none
  integer, intent(in) :: nx
  real(kind = 8), intent(in) :: u(nx)
  character(len=*), intent(in) :: str
  integer :: i
  open(2,file=trim(str),status='unknown')
  do i=1,nx,1
     write(2,fmt='(E24.16)',advance='no') u(i)
  end do
  close(2)
END SUBROUTINE printdble1d