MPI Project Set 4
Project 1
Many algorithms can or must utilize more than one ghost zone in each dimension. Write a program in the language of your choice to send and receive the halo zones between two processes. You may assume the number of ghost zones is the same in each dimension. You do not have to use subarrays but they may simplify the code.
Example Solutions
C++
C++
Contents of mpi_ghostzones.cxx
#include <cstring>
#include <iostream>
#include <iomanip>
#include <mpi.h>
using namespace std;
/*#This example exchanges data among four rectangular domains with halos. Most real codes use squares, but we want to illustrate how to use different dimensions.
*/
int main (int argc, char *argv[]) {
int N, M;
// Added for MPI
int nrl, ncl;
int rank, grid_rank, nprocs;
int up, down, right, left;
int tag=0;
int nghosts, w_halo;
//Initialize MPI
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
N = 8;
M = 12;
int rows=2;
int cols=3;
if (rows*cols != nprocs) {
cout<<"Number of rows times columns does not equal nprocs\n";
return 1;
}
if (N%rows==0 && M%cols==0) {
nrl = N/rows;
ncl = M/cols;
}
else {
cout<<"Number of ranks should divide the number of rows evenly.\n";
return 2;
}
nghosts=2;
w_halo=nghosts*2;
int nrl_total=nrl+w_halo;
int ncl_total=ncl+w_halo;
double **w=new double*[nrl_total];
double *wptr=new double[(nrl_total)*(ncl_total)];
for (int i=0;i<nrl_total;++i,wptr+=ncl_total) {
w[i] = wptr;
}
double counter=1.;
for ( int i = 0; i < nrl_total; i++ ) {
for (int j = 0; j < ncl_total; j++ ) {
w[i][j] = (rank+1)*counter;
counter++;
}
}
//Set up the topology assuming processes number left to right by row
const int ndims=2;
int dims[2]={rows,cols};
int periods[2]={1,0};
int reorder=0;
MPI_Comm grid_comm;
int grid_coords[2];
MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, &grid_comm);
MPI_Comm_rank(grid_comm, &grid_rank);
MPI_Cart_coords(grid_comm, grid_rank, ndims, grid_coords);
int direction=0;
int displ=1;
MPI_Cart_shift(grid_comm, direction, displ, &up, &down);
direction=1;
MPI_Cart_shift(grid_comm, direction, displ, &left, &right);
cout<<"topo "<<grid_rank<<" "<<up<<" "<<down<<" "<<left<<" "<<right<<endl;
//Boundary conditions
double topBC=100.;
double bottomBC=400.;
double rightBC=350.;
double leftBC=350.;
if (grid_coords[0]==0) {
for (int i=0; i<nghosts; ++i) {
for (int j=0;j<ncl_total;++j){
w[i][j]=topBC;
}
}
}
if (grid_coords[0]==rows-1) {
for (int i=nrl+nghosts; i<nrl_total; ++i) {
for (int j=0;j<ncl_total;++j){
w[i][j]=bottomBC;
}
}
}
if (grid_coords[1]==0) {
for (int i=0; i<nrl_total; ++i){
for (int j=0; j<nghosts; ++j) {
w[i][j]=leftBC;
}
}
}
if (grid_coords[1]==cols-1) {
for (int i=0;i<nrl_total;++i){
for (int j=ncl+nghosts; j<ncl_total; ++j) {
w[i][j]=rightBC;
}
}
}
int starts[ndims];
int subsizes[ndims];
int sizes[]={nrl_total,ncl_total};
MPI_Datatype sbuf_left,rbuf_right,sbuf_right,rbuf_left;
MPI_Datatype sbuf_up,rbuf_down,sbuf_down,rbuf_up;
//Set up the MPI type for row buffers
//
subsizes[0]=nghosts; subsizes[1]=ncl_total;
starts[0]=nghosts; starts[1]=0;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&sbuf_up);
MPI_Type_commit(&sbuf_up);
starts[0]=nrl+nghosts; starts[1]=0;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&rbuf_down);
MPI_Type_commit(&rbuf_down);
starts[0]=nrl; starts[1]=0;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&sbuf_down);
MPI_Type_commit(&sbuf_down);
starts[0]=0; starts[1]=0;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&rbuf_up);
MPI_Type_commit(&rbuf_up);
//Set up MPI types for columns
//
subsizes[0]=nrl; subsizes[1]=nghosts;
starts[0]=nghosts; starts[1]=ncl;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&sbuf_right);
MPI_Type_commit(&sbuf_right);
starts[0]=nghosts; starts[1]=0;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&rbuf_left);
MPI_Type_commit(&rbuf_left);
starts[0]=nghosts; starts[1]=nghosts;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&sbuf_left);
MPI_Type_commit(&sbuf_left);
starts[0]=nghosts; starts[1]=ncl+nghosts;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&rbuf_right);
MPI_Type_commit(&rbuf_right);
//Buffers set up
MPI_Status status;
//We declared our array as an array of pointers, so we must align the
//beginning pointer for MPI at the start of the actual data
MPI_Sendrecv(&(w[0][0]),1,sbuf_up,up,tag,&(w[0][0]),1,rbuf_down,down,tag,grid_comm,&status);
MPI_Sendrecv(&(w[0][0]),1,sbuf_down,down,tag,&(w[0][0]),1,rbuf_up,up,tag,grid_comm,&status);
MPI_Sendrecv(&(w[0][0]),1,sbuf_right,right,tag,&(w[0][0]),1,rbuf_left,left,tag,grid_comm,&status);
MPI_Sendrecv(&(w[0][0]),1,sbuf_left,left,tag,&(w[0][0]),1,rbuf_right,right,tag,grid_comm,&status);
//Verification. Usually done with plots in real codes.
//Simplified from earlier examples, we'll just spot-check
//
double *u=new double[(nrl_total)*(ncl_total)];
int uwsize=(nrl_total)*(ncl_total);
memset(u,0.,uwsize);
int position;
MPI_Barrier(grid_comm);
if ( grid_rank == 0 ) {
MPI_Recv(u,uwsize,MPI_DOUBLE,1,1,grid_comm,&status);
cout<<"Ranks 0 and 1 Check columns"<<endl;
position=0;
for (int i=0;i<nrl_total;i++) {
for (int j=0;j<ncl_total;j++) {
cout<<w[i][j]<<" ";
}
cout<<" | ";
for (int j=0;j<ncl_total;j++) {
cout<<u[position++]<<" ";
}
cout<<endl;
}
cout<<endl;
}
if ( grid_rank == 1 ) {
position=0;
for (int i=0; i<nrl_total; i++)
for (int j=0;j<ncl_total; j++)
u[position++]=w[i][j];
MPI_Send(u,uwsize,MPI_DOUBLE,0,1,grid_comm);
cout<<"Rank 1 sent to rank 0 "<<endl;
}
MPI_Barrier(grid_comm);
//Check columns on other side
//
memset(u,0.,uwsize);
if ( grid_rank == 1 ) {
MPI_Recv(u,uwsize,MPI_DOUBLE,2,2,grid_comm,&status);
cout<<"Ranks 1 and 2 Check columns"<<endl;
position=0;
for (int i=0;i<nrl_total;i++) {
for (int j=0;j<ncl_total;j++) {
cout<<w[i][j]<<" ";
}
cout<<" | ";
for (int j=0;j<ncl_total;j++) {
cout<<u[position++]<<" ";
}
cout<<endl;
}
cout<<endl;
}
if ( grid_rank == 2 ) {
position=0;
for (int i=0; i<nrl_total; i++)
for (int j=0;j<ncl_total; j++)
u[position++]=w[i][j];
MPI_Send(u,uwsize,MPI_DOUBLE,1,2,grid_comm);
}
MPI_Barrier(grid_comm);
//Check periodic row exchanges
//
memset(u,0.,uwsize);
if ( grid_rank == 0 ) {
MPI_Recv(u,uwsize,MPI_DOUBLE,3,3,grid_comm,&status);
cout<<"Ranks 0 and 3 Check rows including periodic exchange"<<endl;
for (int i=0;i<nrl_total;i++) {
for (int j=0;j<ncl_total;j++) {
cout<<w[i][j]<<" ";
}
cout<<endl;
}
cout<<"-=---------------------------------------------------"<<endl;
position=0;
for (int i=0;i<nrl_total;i++) {
for (int j=0;j<ncl_total;j++) {
cout<<u[position++]<<" ";
}
cout<<endl;
}
cout<<endl;
}
if ( grid_rank == 3 ) {
position=0;
for (int i=0; i<nrl_total; i++)
for (int j=0;j<ncl_total; j++)
u[position++]=w[i][j];
MPI_Send(u,uwsize,MPI_DOUBLE,0,3,grid_comm);
}
MPI_Barrier(grid_comm);
//end checking section
//Type_free not really necessary but good practice
MPI_Type_free(&sbuf_up);
MPI_Type_free(&sbuf_down);
MPI_Type_free(&rbuf_up);
MPI_Type_free(&rbuf_down);
MPI_Type_free(&sbuf_left);
MPI_Type_free(&sbuf_right);
MPI_Type_free(&rbuf_left);
MPI_Type_free(&rbuf_right);
MPI_Comm_free(&grid_comm);
MPI_Finalize();
return 0;
}
Download mpi_ghostzones.cxx file
Fortran
Fortran
Contents of mpi_ghostplate.f90
program ghosts
use mpi_f08
implicit none
integer, parameter :: maxiters=10000000
integer :: iteration=0
integer :: N, M
integer :: i,j
integer :: numargs, diff_interval, interations=0
integer :: lb, ubr, ubc
double precision, allocatable, dimension(:,:) :: u,w
double precision :: eps, diff = 1.0
character(len=80) :: arg, filename
character(len=4) :: char_row, char_col
double precision :: time0, time1
double precision :: rows
integer :: nrows, ncols, nrl, ncl
integer :: world_rank, nprocs, tag=0
integer, parameter :: root=0
integer :: left, right, up, down
integer :: nghosts, w_halo
integer :: nrl_gz,ncl_gz,nrl_total,ncl_total
integer :: nsubdims
integer, dimension(2) :: dims, grid_coords
logical, dimension(2) :: periods
integer, dimension(2) :: sizes, subsizes, starts
logical :: reorder
integer :: ndims, grid_rank
integer :: direction, displ
integer :: nrequests
type(MPI_Comm) :: grid_comm
type(MPI_Status) :: mpi_stat
type(MPI_Datatype) :: sbuf_up, sbuf_down, rbuf_up, rbuf_down
type(MPI_Datatype) :: sbuf_left, sbuf_right, rbuf_left, rbuf_right
type(MPI_Request), dimension(:), allocatable :: mpi_requests
double precision :: gdiff
character(len=24) :: fname
interface
subroutine set_bcs(lb, nghosts,nr,nc,nrows,ncols,coords,u)
implicit none
integer, intent(in) :: lb, nghosts, nr, nc, nrows, ncols
integer, dimension(:), intent(in) :: coords
double precision, dimension(0:,0:), intent(inout) :: u
end subroutine
end interface
!Initialize MPI, get the local number of columns
call MPI_INIT()
call MPI_COMM_SIZE(MPI_COMM_WORLD,nprocs)
call MPI_COMM_RANK(MPI_COMM_WORLD,world_rank)
! check number of parameters and read in epsilon, filename, optionally size
! all ranks do this
numargs=command_argument_count()
if (numargs .lt. 2) then
call MPI_Finalize()
stop 'USAGE: epsilon output-file <N> <M>'
else
call get_command_argument(1,arg)
read(arg,*) eps
call get_command_argument(2,filename)
if (numargs==2) then
N=500
M=500
endif
if (numargs==3) then
call get_command_argument(3,arg)
read(arg,*) N
M=N
endif
if (numargs==4) then
call get_command_argument(3,arg)
read(arg,*) N
call get_command_argument(4,arg)
read(arg,*) M
endif
endif
!For simplicity, we will limit this code to perfect square process count
rows=sqrt(dble(nprocs))
if ( rows /= dble(int(rows)) ) then
call MPI_Finalize()
stop "This code requires a perfect square number of processes"
else
nrows=int(rows)
ncols=nrows
endif
!Strong scaling
!Weak scaling would set nrl=N and ncl=M
if (mod(N,nrows)==0 .and. mod(M,ncols)==0) then
nrl = N/nrows
ncl = M/ncols
else
call MPI_Finalize()
stop "Number of ranks should divide the number of rows evenly."
endif
!create cartesian topology for processes
ndims=2
dims(1) = nrows ! number of rows
dims(2) = ncols ! number of columns
periods(1) = .false. ! cyclic in this direction
periods(2) = .false. ! not cyclic in this direction
reorder = .false. ! allow MPI to reorder if it sees fit
call MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, &
reorder, grid_comm)
call MPI_Comm_rank(grid_comm, grid_rank)
call MPI_Cart_coords(grid_comm, grid_rank, ndims, grid_coords)
direction = 0 ! shift along the 1st index (0 or 1)
displ = 1 ! shift by 1
call MPI_Cart_shift(grid_comm, direction, displ, up, down)
direction = 1 ! shift along the 2nd index (0 or 1)
call MPI_Cart_shift(grid_comm, direction, displ, left, right)
! Set up values
! Assume number of ghosts zones same in both dimensions, lower bounds same
nghosts=1 ! number of ghost zones
w_halo=2*nghosts ! total halo width in each dimension (including corners)
nrl_gz=nrl+nghosts
ncl_gz=ncl+nghosts
nrl_total=nrl+w_halo
ncl_total=ncl+w_halo
!Assume lower bound same for both dimensions
lb=0
ubr=nrl_total-1
ubc=ncl_total-1
allocate(u(lb:ubr,lb:ubc), w(lb:ubr,lb:ubc))
u=0.d0
w=0.d0
!Physical boundary conditions
call set_bcs(lb,nghosts,nrl,ncl,nrows,ncols,grid_coords,u)
! Set up sendrecv/buffers
nsubdims = 2
sizes = [nrl_total,ncl_total]
!Rows
subsizes = [nghosts,ncl_total]
! Start indices must begin at 0 like C regardless of declared lower bound
starts = [nghosts,0]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION, sbuf_up)
call MPI_TYPE_COMMIT(sbuf_up)
starts = [nrl+nghosts,0]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,rbuf_down)
call MPI_TYPE_COMMIT(rbuf_down)
starts = [nrl,0]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,sbuf_down)
call MPI_TYPE_COMMIT(sbuf_down)
starts = [0,0]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION, rbuf_up)
call MPI_TYPE_COMMIT(rbuf_up)
!Columns
subsizes = [nrl,nghosts]
starts = [nghosts,ncl]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,sbuf_right)
call MPI_TYPE_COMMIT(sbuf_right)
starts = [nghosts,0]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,rbuf_left)
call MPI_TYPE_COMMIT(rbuf_left)
starts = [nghosts,nghosts]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,sbuf_left)
call MPI_TYPE_COMMIT(sbuf_left)
starts = [nghosts,ncl+nghosts]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,rbuf_right)
call MPI_TYPE_COMMIT(rbuf_right)
nrequests=8
allocate(mpi_requests(nrequests))
diff_interval=1
! Walltime from process 0 is good enough for me
if (grid_rank .eq. 0) then
write (*,'(a,es15.8,a,i4,a,i4)') 'Running until the difference is <= ', &
eps,' for global size ',N,'x',M
time0=MPI_WTIME()
endif
! Compute steady-state solution
do while ( iteration<=maxiters )
! Exchange halo values
call MPI_Irecv(u, 1, rbuf_right, right, tag, grid_comm,mpi_requests(1))
call MPI_Irecv(u, 1, rbuf_left, left, tag, grid_comm,mpi_requests(2))
call MPI_Irecv(u, 1, rbuf_up, up, tag, grid_comm,mpi_requests(3))
call MPI_Irecv(u, 1, rbuf_down, down, tag, grid_comm,mpi_requests(4))
call MPI_Isend(u, 1, sbuf_left, left, tag, grid_comm,mpi_requests(5))
call MPI_Isend(u, 1, sbuf_right, right, tag, grid_comm,mpi_requests(6))
call MPI_Isend(u, 1, sbuf_down, down, tag, grid_comm,mpi_requests(7))
call MPI_Isend(u, 1, sbuf_up, up, tag, grid_comm,mpi_requests(8))
! complete communications
call MPI_Waitall(nrequests,mpi_requests,MPI_STATUSES_IGNORE)
do j=lb+nghosts,lb+nrl_gz-1
do i=lb+nghosts,lb+ncl_gz-1
w(i,j) = 0.25*(u(i-1,j) + u(i+1,j) + u(i,j-1) + u(i,j+1))
enddo
enddo
if (mod(iteration,diff_interval)==0) then
if (diff_interval==-1) continue !disable convergence test
diff=maxval(abs(w(nghosts:ubr-nghosts-1,nghosts:ubc-nghosts-1) &
-u(nghosts:ubr-nghosts-1,nghosts:ubc-nghosts-1)))
call MPI_ALLREDUCE(diff,gdiff,1,MPI_DOUBLE_PRECISION,MPI_MAX, &
grid_comm)
if (gdiff <= eps) then
exit
endif
endif
!Update u. Don't overwrite boundaries.
u(1:nrl,1:ncl) = w(1:nrl,1:ncl)
! Reset physical boundaries (they get overwritten in the halo exchange)
call set_bcs(lb,nghosts,nrl,ncl,nrows,ncols,grid_coords,u)
!If this happens we will exit at next conditional test.
!Avoids explicit break here
if (iteration>maxiters) then
if (grid_rank==0) then
write(*,*) "Warning: maximum iterations exceeded"
endif
endif
iteration = iteration + 1
enddo
if (grid_rank==root) then
time1 = MPI_WTIME()
write(*,*) 'completed; iterations = ', iteration,"in ",time1-time0," sec"
endif
! Write solution to output file by row. Must use correct stitching to get
! overall result, such as a numpy concatenate on axis=1.
! It would also work to output by columns, in which case stitching is more
! straightforward.
!Some compilers (Intel) like to add EOLs to line up columns in output. The
! Fortran standard permits this with * IO, but it complicates plotting.
! Use some F08 features to fix this.
write(char_row,'(i4)') grid_coords(1)
write(char_col,'(i4)') grid_coords(2)
fname=trim(filename)//trim(adjustl(char_row))//trim(adjustl(char_col))
open (unit=10,file=fname)
do i=lb+nghosts,lb+nghosts+nrl
write (10,'(*(g0,1x))') u(i,1:ncl)
enddo
call MPI_Comm_free(grid_comm)
call MPI_Finalize()
end program
subroutine set_bcs(lb,nghosts,nr,nc,nrows,ncols,coords,u)
implicit none
integer, intent(in) :: lb ,nghosts, nr, nc, nrows, ncols
integer, dimension(:), intent(in) :: coords
double precision, dimension(lb:,lb:), intent(inout) :: u
double precision :: topBC, bottomBC, leftBC, rightBC
! Set boundary values
! This has the ice bath on the bottom edge.
! Note: when plotting, 0,0 is the bottom.
topBC=0.d0
bottomBC=100.d0
rightBC=100.d0
leftBC=100.d0
!Set boundary conditions
if (coords(1)==0) then
u(lb:lb+nghosts-1,:)=topBC
else if (coords(1)==nrows-1) then
u(nr+nghosts+lb:,:)=bottomBC
endif
if (coords(2)==0) then
u(:,lb:lb+nghosts-1)=leftBC
else if (coords(2)==ncols-1) then
u(:,nc+nghosts+lb:)=rightBC
endif
end subroutine set_bcs
Download mpi_ghostplate.f90 file
Python
Python
Contents of mpi_ghostzones.py
import sys
import numpy as np
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nprocs = comm.Get_size()
#This example exchanges data among four rectangular domains with halos.
#Most real codes use squares, but we want to illustrate how to use different
#dimensions.
#Divide up the processes. Either we require a perfect square, or we
#must specify how to distribute by row/column. In a realistic program,
#the process distribution (either the total, for a perfect square, or
#the rows/columns) would be read in and we would need to check that the number
#of processes requested is consistent with the decomposition.
N = 8
M = 12
nrows=2
ncols=3
if nrows*ncols != nprocs:
print("Number of rows times columns does not equal nprocs")
sys.exit()
if N%nrows==0 and M%ncols==0:
nrl = N//nrows
ncl = M//ncols
else:
print("Number of ranks should divide the number of rows evenly.")
sys.exit()
#Set up the topology
ndims=2
dims=[nrows, ncols]
periods=[True, False]
grid_comm=comm.Create_cart(dims, periods)
grid_rank=grid_comm.Get_rank()
grid_coords=grid_comm.coords
direction=0
displ=1
(up,down)=grid_comm.Shift(direction, displ)
direction=1
(left,right)=grid_comm.Shift(direction,displ)
#print("Topo {:d} {:} {:d} {:d} {:d}".format(grid_rank,left,right,up,down))
#Assume same number of ghost zones for rows and columns
nghosts=2
w_halo=2*nghosts
nrl_total=nrl+w_halo
ncl_total=ncl+w_halo
w = np.zeros((nrl_total, ncl_total), dtype=np.double)
#Arbitrary values.
w[:,:]=np.reshape(np.arange(1,(nrl_total)*(ncl_total)+1),(nrl_total,ncl_total))
w=w*(grid_rank+1)
#Boundary conditions
#Make it easier to distinguish one side from another for checking
topBC=100
bottomBC=400.
edgeBC=350.
if grid_coords[0]==0:
w[0:nghosts,:]=topBC
if grid_coords[0]==nrows-1:
w[nrl+nghosts:,:]=bottomBC
if grid_coords[1]==0:
w[:,0:nghosts]=edgeBC
if grid_coords[1]==ncols-1:
w[:,ncl+nghosts:]=edgeBC
# set up MPI types for buffers
sizes = [nrl_total,ncl_total]
# Rows
subsizes = [nghosts,ncl_total]
starts = [nghosts,0]
sbuf_up=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
sbuf_up.Commit()
starts = [nrl+nghosts,0]
rbuf_down=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
rbuf_down.Commit()
starts = [nrl,0]
sbuf_down=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
sbuf_down.Commit()
starts = [0,0]
rbuf_up=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
rbuf_up.Commit()
# Columns
subsizes = [nrl,nghosts]
starts = [nghosts,ncl]
sbuf_right=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
sbuf_right.Commit()
starts = [nghosts,0]
rbuf_left=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
rbuf_left.Commit()
starts = [nghosts,nghosts]
sbuf_left=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
sbuf_left.Commit()
starts = [nghosts,ncl+nghosts]
rbuf_right=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
rbuf_right.Commit()
tag=0
# sending up and receiving down
grid_comm.Sendrecv([w,1,sbuf_up], up, tag, [w,1,rbuf_down], down, tag)
# sending down and receiving up
grid_comm.Sendrecv([w,1,sbuf_down], down, tag, [w,1,rbuf_up], up, tag)
# sending left and right
grid_comm.Sendrecv([w,1,sbuf_left], left, tag, [w,1,rbuf_right], right, tag)
grid_comm.Sendrecv([w,1,sbuf_right], right, tag, [w,1,rbuf_left], left, tag)
#Results
status=MPI.Status()
#Check row exchange
u=np.zeros_like(w)
uwsize=(nrl+2)*(ncl+2)
grid_comm.Barrier()
if grid_rank==0:
grid_comm.Recv([u,MPI.DOUBLE],source=1,tag=1,status=status)
print("Ranks 0 and 1 check columns")
for i in range(nrl_total):
print(w[i,:],end='')
print(" | ",end='')
print(u[i,:])
print()
if grid_rank==1:
grid_comm.Send([w,MPI.DOUBLE],dest=0,tag=1)
grid_comm.Barrier()
u[:,:]=0.0
if grid_rank==1:
grid_comm.Recv([u,MPI.DOUBLE],source=2,tag=2,status=status)
print("Ranks 1 and 2 check columns")
for i in range(nrl_total):
print(w[i,:],end='')
print(" | ",end='')
print(u[i,:])
print()
if grid_rank==2:
grid_comm.Send([w,MPI.DOUBLE],dest=1,tag=2)
#Check rows including periodic
grid_comm.Barrier()
u[:,:]=0.0
if grid_rank==0:
grid_comm.Recv([u,MPI.DOUBLE],source=3,tag=3,status=status)
print("Ranks 0 and 3 check rows including periodic exchange")
for i in range(nrl_total):
print(w[i,:])
print('-----------------------------------------')
for i in range(nrl_total):
print(u[i,:])
if grid_rank==3:
grid_comm.Send([w,MPI.DOUBLE],dest=0,tag=3)
Download mpi_ghostzones.py file
Project 2
- Modify or write a two-dimensional Jacobi solver with a variable number of ghost zones in each dimension. As in (1) you may assume the same number of ghost zones in each dimension. Use subarrays for the halo communications. Make your sends and receives nonblocking.
Example Solutions
C++
C++
Contents of mpi_ghostplate.cxx
#include <iostream>
#include <cmath>
#include <string>
#include <sstream>
#include <fstream>
#include <iomanip>
#include <mpi.h>
#define MAX_ITER 10000000
void set_bcs(int nghosts, int nr, int nc, int nrows, int ncols, int* coords, double **u);
using namespace std;
int main (int argc, char *argv[]) {
double diff; // Change in value
int N, M;
int iteration = 0;
int diffInterval;
double epsilon;
// Added for MPI
int nrows, ncols;
int nrl, ncl;
int rank, grid_rank, nprocs;
int up, down, right, left;
int tag=0;
int nghosts, w_halo;
int errcode;
double gdiff;
//Initialize MPI
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
//check number of parameters and read in epsilon, filename, optionally size
if (argc < 3) {
printf ("USAGE: %s epsilon output-file <N> <M>\n", argv[0]);
MPI_Abort(MPI_COMM_WORLD,errcode);
return 1;
}
else {
stringstream ssepsilon;
ssepsilon<<argv[1];
if (ssepsilon.fail()) {
printf ("Error reading in epsilon value\n");
MPI_Abort(MPI_COMM_WORLD,errcode);
return 2;
}
ssepsilon>>epsilon;
if (argc==3) {
N=500;
M=500;
}
if (argc==4) {
stringstream ssnr;
ssnr<<argv[3];
if (ssnr.fail()) {
cout<<"Error converting row dimension \n";
MPI_Abort(MPI_COMM_WORLD,errcode);
return 2;
}
ssnr>>N;
M=N;
}
if (argc==5) {
stringstream ssnr;
ssnr<<argv[3];
if (ssnr.fail()) {
cout<<"Error converting row dimension \n";
MPI_Abort(MPI_COMM_WORLD,errcode);
return 2;
}
ssnr>>N;
stringstream ssnc;
ssnc<<argv[4];
if (ssnc.fail()) {
cout<<"Error converting row dimension \n";
MPI_Abort(MPI_COMM_WORLD,errcode);
return 2;
}
ssnc>>M;
}
}
//For simplicity, we will limit this code to perfect square process count
double rows=sqrt(nprocs);
nrows=(int)rows;
if ( nrows*nrows != nprocs ) {
cout<<"This code requires a perfect square number of processes\n";
MPI_Abort(MPI_COMM_WORLD,errcode);
return 1;
}
else {
ncols=nrows;
}
//Strong scaling
//Weak scaling would set nrl=N and ncl=M
if (N%nrows==0 && M%ncols==0) {
nrl = N/nrows;
ncl = M/ncols;
}
else {
cout<<"Number of ranks should divide the total number of rows/cols evenly.\n";
MPI_Abort(MPI_COMM_WORLD,errcode);
}
//Number of ghost zones
nghosts=1;
w_halo=nghosts*2;
int nrl_total=nrl+w_halo;
int ncl_total=ncl+w_halo;
double **u=new double*[nrl_total];
double *uptr=new double[(nrl_total)*(ncl_total)];
double **w=new double*[nrl_total];
double *wptr=new double[(nrl_total)*(ncl_total)];
for (int i=0;i<nrl_total;++i,wptr+=ncl_total) {
w[i] = wptr;
}
for (int i=0;i<nrl_total;++i,uptr+=ncl_total) {
u[i] = uptr;
}
//Declare one-d diff array for faster access.
int dsize = nrl*ncl;
double *diffs = new double[dsize];
for ( int i = 0; i < nrl_total; i++ ) {
for (int j = 0; j < ncl_total; j++ ) {
u[i][j] = 0.;
w[i][j] = 0.;
}
}
//Set up the topology assuming processes number left to right by row
const int ndims=2;
int dims[2]={nrows,ncols};
int periods[2]={0,0};
int reorder=0;
MPI_Comm grid_comm;
int grid_coords[2];
MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, &grid_comm);
MPI_Comm_rank(grid_comm, &grid_rank);
MPI_Cart_coords(grid_comm, grid_rank, ndims, grid_coords);
int direction=0;
int displ=1;
MPI_Cart_shift(grid_comm, direction, displ, &up, &down);
direction=1;
MPI_Cart_shift(grid_comm, direction, displ, &left, &right);
set_bcs(nghosts, nrl, ncl, nrows, ncols, grid_coords, u);
int starts[ndims];
int subsizes[ndims];
int sizes[]={nrl_total,ncl_total};
MPI_Datatype sbuf_left,rbuf_right,sbuf_right,rbuf_left;
MPI_Datatype sbuf_up,rbuf_down,sbuf_down,rbuf_up;
//Set up the MPI type for row buffers
//
subsizes[0]=nghosts; subsizes[1]=ncl_total;
starts[0]=nghosts; starts[1]=0;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&sbuf_up);
MPI_Type_commit(&sbuf_up);
starts[0]=nrl+nghosts; starts[1]=0;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&rbuf_down);
MPI_Type_commit(&rbuf_down);
starts[0]=nrl; starts[1]=0;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&sbuf_down);
MPI_Type_commit(&sbuf_down);
starts[0]=0; starts[1]=0;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&rbuf_up);
MPI_Type_commit(&rbuf_up);
//Set up MPI types for columns
//
subsizes[0]=nrl; subsizes[1]=nghosts;
starts[0]=nghosts; starts[1]=ncl;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&sbuf_right);
MPI_Type_commit(&sbuf_right);
starts[0]=nghosts; starts[1]=0;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&rbuf_left);
MPI_Type_commit(&rbuf_left);
starts[0]=nghosts; starts[1]=nghosts;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&sbuf_left);
MPI_Type_commit(&sbuf_left);
starts[0]=nghosts; starts[1]=ncl+nghosts;
MPI_Type_create_subarray(ndims,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&rbuf_right);
MPI_Type_commit(&rbuf_right);
//Buffers set up
MPI_Status status;
int nrequests=8;
MPI_Request requests[nrequests];
diffInterval=1;
if (rank==0) {
cout<<"Running until the difference is <="<<epsilon<<" with size "<<N<<"x"<<M<<"\n";
}
// Compute steady-state solution
double time0=MPI_Wtime();
while ( iteration <= MAX_ITER ) {
MPI_Irecv(&u[0][0], 1, rbuf_up, up, tag, grid_comm, &requests[0]);
MPI_Irecv(&u[0][0], 1, rbuf_down, down, tag, grid_comm, &requests[1]);
MPI_Irecv(&u[0][0], 1, rbuf_left, left, tag, grid_comm, &requests[2]);
MPI_Irecv(&u[0][0], 1, rbuf_right, right, tag, grid_comm, &requests[3]);
MPI_Isend(&u[0][0], 1, sbuf_down, down, tag, grid_comm, &requests[5]);
MPI_Isend(&u[0][0], 1, sbuf_up, up, tag, grid_comm, &requests[4]);
MPI_Isend(&u[0][0], 1, sbuf_right, right, tag, grid_comm, &requests[6]);
MPI_Isend(&u[0][0], 1, sbuf_left, left, tag, grid_comm, &requests[7]);
MPI_Waitall(nrequests,requests,MPI_STATUS_IGNORE);
for (int i=nghosts; i<nrl+nghosts;i++) {
for (int j=nghosts;j<ncl+nghosts;j++) {
w[i][j] = (u[i-1][j] + u[i+1][j] + u[i][j-1] + u[i][j+1])/4.0;
diffs[(i-nghosts)*ncl+j-nghosts] = abs(w[i][j] - u[i][j]);
}
}
if (iteration%diffInterval==0) {
diff=diffs[0];
for (int i=1;i<dsize;i++) {
if (diff<diffs[i]) {
diff=diffs[i];
}
}
}
//Find max of diff in all the processors.
MPI_Allreduce(&diff,&gdiff,1,MPI_DOUBLE,MPI_MAX,grid_comm);
if (gdiff <= epsilon)
break;
//Update u
for (int i=0; i<nrl_total; ++i){
for (int j=0; j<ncl_total; ++j) {
u[i][j]=w[i][j];
}
}
//Reset halo values
for (int i=0;i<nghosts;i++) {
for (int j=0; j<ncl_total; j++) {
w[i][j]=u[i][j];
w[nrl+nghosts+i][j]=u[nrl+nghosts+i][j];
}
}
for (int i=0;i<nrl_total;i++) {
for (int j=0;j<nghosts;j++) {
w[i][j]=u[i][j];
w[i][ncl+nghosts+i]=u[i][ncl+nghosts+i];
}
}
set_bcs(nghosts, nrl, ncl, nrows, ncols, grid_coords, u);
iteration++;
} //end of computation
if (iteration>MAX_ITER) {
if (rank==0) {
cout<<"Warning: maximum iteration exceeded\n";
}
}
double totalTime=(MPI_Wtime()-time0);
if (rank==0) {
cout << "Completed "<<iteration<<" iterations; time "<<totalTime<<endl;
}
// Write solution to output file
ofstream fp;
string fname=argv[2]+to_string(grid_coords[0])+to_string(grid_coords[1]);
fp.open(fname,ios::out);
for (int i = 1; i <= nrl; i++) {
for (int j = 1; j <= ncl; j++) {
fp<<u[i][j]<<" ";
}
fp<<"\n";
}
fp.close();
//Type_free not really necessary but good practice
MPI_Type_free(&sbuf_up);
MPI_Type_free(&sbuf_down);
MPI_Type_free(&rbuf_up);
MPI_Type_free(&rbuf_down);
MPI_Type_free(&sbuf_left);
MPI_Type_free(&sbuf_right);
MPI_Type_free(&rbuf_left);
MPI_Type_free(&rbuf_right);
MPI_Comm_free(&grid_comm);
MPI_Finalize();
return 0;
}
void set_bcs(int nghosts,int nr, int nc, int nrows, int ncols, int* coords, double **u) {
/* Set boundary values.
* This has an ice bath on the top edge.
* Note: when plotting, 0,0 is the bottom.
*/
int nr_total=nr+2*nghosts;
int nc_total=nc+2*nghosts;
double topBC=0.;
double bottomBC=100.;
double leftBC=100.;
double rightBC=100.;
if (coords[0]==0) {
for (int i=0; i<nghosts; ++i) {
for (int j=0;j<nc_total;++j){
u[i][j]=topBC;
}
}
} else if (coords[0]==nrows-1) {
for (int i=nr+nghosts; i<nr_total; ++i) {
for (int j=0;j<nc_total;++j){
u[i][j]=bottomBC;
}
}
}
if (coords[1]==0) {
for (int i=0; i<nr_total; ++i){
for (int j=0; j<nghosts; ++j) {
u[i][j]=leftBC;
}
}
} else if (coords[1]==ncols-1) {
for (int i=0; i<nr_total; ++i) {
for (int j=nc+nghosts;j<nc_total;++j){
u[i][j]=rightBC;
}
}
}
}
Download mpi_ghostplate.cxx file
Fortran
Fortran
Contents of mpi_ghostplate.f90
program ghosts
use mpi_f08
implicit none
integer, parameter :: maxiters=10000000
integer :: iteration=0
integer :: N, M
integer :: i,j
integer :: numargs, diff_interval, interations=0
integer :: lb, ubr, ubc
double precision, allocatable, dimension(:,:) :: u,w
double precision :: eps, diff = 1.0
character(len=80) :: arg, filename
character(len=4) :: char_row, char_col
double precision :: time0, time1
double precision :: rows
integer :: nrows, ncols, nrl, ncl
integer :: world_rank, nprocs, tag=0
integer, parameter :: root=0
integer :: left, right, up, down
integer :: nghosts, w_halo
integer :: nrl_gz,ncl_gz,nrl_total,ncl_total
integer :: nsubdims
integer, dimension(2) :: dims, grid_coords
logical, dimension(2) :: periods
integer, dimension(2) :: sizes, subsizes, starts
logical :: reorder
integer :: ndims, grid_rank
integer :: direction, displ
integer :: nrequests
type(MPI_Comm) :: grid_comm
type(MPI_Status) :: mpi_stat
type(MPI_Datatype) :: sbuf_up, sbuf_down, rbuf_up, rbuf_down
type(MPI_Datatype) :: sbuf_left, sbuf_right, rbuf_left, rbuf_right
type(MPI_Request), dimension(:), allocatable :: mpi_requests
double precision :: gdiff
character(len=24) :: fname
interface
subroutine set_bcs(lb, nghosts,nr,nc,nrows,ncols,coords,u)
implicit none
integer, intent(in) :: lb, nghosts, nr, nc, nrows, ncols
integer, dimension(:), intent(in) :: coords
double precision, dimension(0:,0:), intent(inout) :: u
end subroutine
end interface
!Initialize MPI, get the local number of columns
call MPI_INIT()
call MPI_COMM_SIZE(MPI_COMM_WORLD,nprocs)
call MPI_COMM_RANK(MPI_COMM_WORLD,world_rank)
! check number of parameters and read in epsilon, filename, optionally size
! all ranks do this
numargs=command_argument_count()
if (numargs .lt. 2) then
call MPI_Finalize()
stop 'USAGE: epsilon output-file <N> <M>'
else
call get_command_argument(1,arg)
read(arg,*) eps
call get_command_argument(2,filename)
if (numargs==2) then
N=500
M=500
endif
if (numargs==3) then
call get_command_argument(3,arg)
read(arg,*) N
M=N
endif
if (numargs==4) then
call get_command_argument(3,arg)
read(arg,*) N
call get_command_argument(4,arg)
read(arg,*) M
endif
endif
!For simplicity, we will limit this code to perfect square process count
rows=sqrt(dble(nprocs))
if ( rows /= dble(int(rows)) ) then
call MPI_Finalize()
stop "This code requires a perfect square number of processes"
else
nrows=int(rows)
ncols=nrows
endif
!Strong scaling
!Weak scaling would set nrl=N and ncl=M
if (mod(N,nrows)==0 .and. mod(M,ncols)==0) then
nrl = N/nrows
ncl = M/ncols
else
call MPI_Finalize()
stop "Number of ranks should divide the number of rows evenly."
endif
!create cartesian topology for processes
ndims=2
dims(1) = nrows ! number of rows
dims(2) = ncols ! number of columns
periods(1) = .false. ! cyclic in this direction
periods(2) = .false. ! not cyclic in this direction
reorder = .false. ! allow MPI to reorder if it sees fit
call MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, &
reorder, grid_comm)
call MPI_Comm_rank(grid_comm, grid_rank)
call MPI_Cart_coords(grid_comm, grid_rank, ndims, grid_coords)
direction = 0 ! shift along the 1st index (0 or 1)
displ = 1 ! shift by 1
call MPI_Cart_shift(grid_comm, direction, displ, up, down)
direction = 1 ! shift along the 2nd index (0 or 1)
call MPI_Cart_shift(grid_comm, direction, displ, left, right)
! Set up values
! Assume number of ghosts zones same in both dimensions, lower bounds same
nghosts=1 ! number of ghost zones
w_halo=2*nghosts ! total halo width in each dimension (including corners)
nrl_gz=nrl+nghosts
ncl_gz=ncl+nghosts
nrl_total=nrl+w_halo
ncl_total=ncl+w_halo
!Assume lower bound same for both dimensions
lb=0
ubr=nrl_total-1
ubc=ncl_total-1
allocate(u(lb:ubr,lb:ubc), w(lb:ubr,lb:ubc))
u=0.d0
w=0.d0
!Physical boundary conditions
call set_bcs(lb,nghosts,nrl,ncl,nrows,ncols,grid_coords,u)
! Set up sendrecv/buffers
nsubdims = 2
sizes = [nrl_total,ncl_total]
!Rows
subsizes = [nghosts,ncl_total]
! Start indices must begin at 0 like C regardless of declared lower bound
starts = [nghosts,0]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION, sbuf_up)
call MPI_TYPE_COMMIT(sbuf_up)
starts = [nrl+nghosts,0]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,rbuf_down)
call MPI_TYPE_COMMIT(rbuf_down)
starts = [nrl,0]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,sbuf_down)
call MPI_TYPE_COMMIT(sbuf_down)
starts = [0,0]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION, rbuf_up)
call MPI_TYPE_COMMIT(rbuf_up)
!Columns
subsizes = [nrl,nghosts]
starts = [nghosts,ncl]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,sbuf_right)
call MPI_TYPE_COMMIT(sbuf_right)
starts = [nghosts,0]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,rbuf_left)
call MPI_TYPE_COMMIT(rbuf_left)
starts = [nghosts,nghosts]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,sbuf_left)
call MPI_TYPE_COMMIT(sbuf_left)
starts = [nghosts,ncl+nghosts]
call MPI_Type_create_subarray(nsubdims, sizes, subsizes, starts, &
MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,rbuf_right)
call MPI_TYPE_COMMIT(rbuf_right)
nrequests=8
allocate(mpi_requests(nrequests))
diff_interval=1
! Walltime from process 0 is good enough for me
if (grid_rank .eq. 0) then
write (*,'(a,es15.8,a,i4,a,i4)') 'Running until the difference is <= ', &
eps,' for global size ',N,'x',M
time0=MPI_WTIME()
endif
! Compute steady-state solution
do while ( iteration<=maxiters )
! Exchange halo values
call MPI_Irecv(u, 1, rbuf_right, right, tag, grid_comm,mpi_requests(1))
call MPI_Irecv(u, 1, rbuf_left, left, tag, grid_comm,mpi_requests(2))
call MPI_Irecv(u, 1, rbuf_up, up, tag, grid_comm,mpi_requests(3))
call MPI_Irecv(u, 1, rbuf_down, down, tag, grid_comm,mpi_requests(4))
call MPI_Isend(u, 1, sbuf_left, left, tag, grid_comm,mpi_requests(5))
call MPI_Isend(u, 1, sbuf_right, right, tag, grid_comm,mpi_requests(6))
call MPI_Isend(u, 1, sbuf_down, down, tag, grid_comm,mpi_requests(7))
call MPI_Isend(u, 1, sbuf_up, up, tag, grid_comm,mpi_requests(8))
! complete communications
call MPI_Waitall(nrequests,mpi_requests,MPI_STATUSES_IGNORE)
do j=lb+nghosts,lb+nrl_gz-1
do i=lb+nghosts,lb+ncl_gz-1
w(i,j) = 0.25*(u(i-1,j) + u(i+1,j) + u(i,j-1) + u(i,j+1))
enddo
enddo
if (mod(iteration,diff_interval)==0) then
if (diff_interval==-1) continue !disable convergence test
diff=maxval(abs(w(nghosts:ubr-nghosts-1,nghosts:ubc-nghosts-1) &
-u(nghosts:ubr-nghosts-1,nghosts:ubc-nghosts-1)))
call MPI_ALLREDUCE(diff,gdiff,1,MPI_DOUBLE_PRECISION,MPI_MAX, &
grid_comm)
if (gdiff <= eps) then
exit
endif
endif
!Update u. Don't overwrite boundaries.
u(1:nrl,1:ncl) = w(1:nrl,1:ncl)
! Reset physical boundaries (they get overwritten in the halo exchange)
call set_bcs(lb,nghosts,nrl,ncl,nrows,ncols,grid_coords,u)
!If this happens we will exit at next conditional test.
!Avoids explicit break here
if (iteration>maxiters) then
if (grid_rank==0) then
write(*,*) "Warning: maximum iterations exceeded"
endif
endif
iteration = iteration + 1
enddo
if (grid_rank==root) then
time1 = MPI_WTIME()
write(*,*) 'completed; iterations = ', iteration,"in ",time1-time0," sec"
endif
! Write solution to output file by row. Must use correct stitching to get
! overall result, such as a numpy concatenate on axis=1.
! It would also work to output by columns, in which case stitching is more
! straightforward.
!Some compilers (Intel) like to add EOLs to line up columns in output. The
! Fortran standard permits this with * IO, but it complicates plotting.
! Use some F08 features to fix this.
write(char_row,'(i4)') grid_coords(1)
write(char_col,'(i4)') grid_coords(2)
fname=trim(filename)//trim(adjustl(char_row))//trim(adjustl(char_col))
open (unit=10,file=fname)
do i=lb+nghosts,lb+nghosts+nrl
write (10,'(*(g0,1x))') u(i,1:ncl)
enddo
call MPI_Comm_free(grid_comm)
call MPI_Finalize()
end program
subroutine set_bcs(lb,nghosts,nr,nc,nrows,ncols,coords,u)
implicit none
integer, intent(in) :: lb ,nghosts, nr, nc, nrows, ncols
integer, dimension(:), intent(in) :: coords
double precision, dimension(lb:,lb:), intent(inout) :: u
double precision :: topBC, bottomBC, leftBC, rightBC
! Set boundary values
! This has the ice bath on the bottom edge.
! Note: when plotting, 0,0 is the bottom.
topBC=0.d0
bottomBC=100.d0
rightBC=100.d0
leftBC=100.d0
!Set boundary conditions
if (coords(1)==0) then
u(lb:lb+nghosts-1,:)=topBC
else if (coords(1)==nrows-1) then
u(nr+nghosts+lb:,:)=bottomBC
endif
if (coords(2)==0) then
u(:,lb:lb+nghosts-1)=leftBC
else if (coords(2)==ncols-1) then
u(:,nc+nghosts+lb:)=rightBC
endif
end subroutine set_bcs
Download mpi_ghostplate.f90 file
Python
Python
Contents of mpi_ghostplate.py
import sys
import time
import numpy as np
from mpi4py import MPI
def set_bcs(u,nrl,ncl,nrows,ncols,coords):
# Set physical boundary values and compute mean boundary value.
# Due to Python pass-by-assignment, mutable parameters will be
#changed outside (so this is a subroutine)
topBC=0.
bottomBC=100.
leftBC = 100.
rightBC = 100.
if coords[0]==0:
u[0:nghosts,:]=topBC
if coords[0]==nrows-1:
u[nrl+nghosts:,:]=bottomBC
if coords[1]==0:
u[:,0:nghosts]=leftBC
if coords[1]==ncols-1:
u[:,ncl+nghosts:]=rightBC
# check number of parameters and read in parameters
#Better to use argparse but this is closer to compiled language code and simpler
#if one hasn't used argparse.
argv=sys.argv
if len(argv) > 2:
try:
epsilon=float(argv[1])
filename=argv[2]
except:
"Illegal input"
sys.exit(1)
if len(argv) == 3:
N=500
M=500
if len(argv) == 4:
try:
N=int(argv[3])
M=N
except:
"Cannot convert grid dimension to integer"
sys.exit(2)
if len(argv) == 5:
try:
N=int(argv[3])
M=int(argv[4])
except:
"Cannot convert grid dimension to integer"
sys.exit(2)
else:
print('USAGE: epsilon output-file <N> <M>')
sys.exit(1)
#Set up MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nprocs = comm.Get_size()
root=0
tag=0
#Set number of rows and columns
#For simplicity, require a perfect square number of processes
nrows=np.sqrt(nprocs)
if nrows != int(nrows):
print("This code requires a perfect square number of processes")
sys.exit()
else:
nrows=int(nrows)
ncols=nrows
#Strong scaling
if M%nrows!=0:
print("Number of rows must be evenly divisible by number of processes")
sys.exit(0)
else:
nrl = N//nrows
ncl = N//ncols
#Set max iterations. 10 million should do it.
max_iterations=10000000
#Set up the topology
ndims=2
dims=[nrows, ncols]
periods=[False, False]
grid_comm=comm.Create_cart(dims, periods)
grid_rank=grid_comm.Get_rank()
grid_coords=grid_comm.coords
direction=0
displ=1
(up,down)=grid_comm.Shift(direction, displ)
direction=1
(left,right)=grid_comm.Shift(direction,displ)
#Assume same number of ghost zones for rows and columns
nghosts=2
w_halo=2*nghosts
nrl_total=nrl+w_halo
ncl_total=ncl+w_halo
#Solution arrays
u = np.zeros((nrl_total, ncl_total), dtype=np.double)
w = np.zeros((nrl_total, ncl_total), dtype=np.double)
set_bcs(u,nrl,ncl,nrows,ncols,grid_coords)
# set up MPI types for buffers
sizes = [nrl_total,ncl_total]
# Rows
subsizes = [nghosts,ncl_total]
starts = [nghosts,0]
sbuf_up=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
sbuf_up.Commit()
starts = [nrl+nghosts,0]
rbuf_down=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
rbuf_down.Commit()
starts = [nrl,0]
sbuf_down=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
sbuf_down.Commit()
starts = [0,0]
rbuf_up=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
rbuf_up.Commit()
# Columns
subsizes = [nrl,nghosts]
starts = [nghosts,ncl]
sbuf_right=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
sbuf_right.Commit()
starts = [nghosts,0]
rbuf_left=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
rbuf_left.Commit()
starts = [nghosts,nghosts]
sbuf_left=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
sbuf_left.Commit()
starts = [nghosts,ncl+nghosts]
rbuf_right=MPI.DOUBLE.Create_subarray(sizes, subsizes, starts)
rbuf_right.Commit()
# Compute steady-state solution
iteration=0
diff_interval=1
start_time=MPI.Wtime()
my_diff = np.zeros(1)
global_diff=np.zeros(1)
nrl_gz=nrl+nghosts
ncl_gz=ncl+nghosts
if rank==root:
print(f'Running until the difference is < {epsilon:}, global size {N:}x{M:}')
while iteration<=max_iterations:
rcv_down =comm.Irecv([u, 1, rbuf_down], down)
rcv_up =comm.Irecv([u, 1, rbuf_up], up)
rcv_left =comm.Irecv([u, 1, rbuf_left], left)
rcv_right=comm.Irecv([u, 1, rbuf_right], right)
snd_up =comm.Isend([u, 1, sbuf_up], up)
snd_down =comm.Isend([u, 1, sbuf_down], down)
snd_right=comm.Isend([u, 1, sbuf_right], right)
snd_left =comm.Isend([u, 1, sbuf_left], left)
requests=[rcv_down,rcv_up,snd_up,snd_down,
rcv_left,rcv_right,snd_right,snd_left]
MPI.Request.Waitall(requests)
w[nghosts:nrl_gz,nghosts:ncl_gz]=0.25*(u[nghosts-1:nrl_gz-1,nghosts:ncl_gz]+
u[nghosts+1:nrl_gz+1,nghosts:ncl_gz]+
u[nghosts:nrl_gz,nghosts-1:ncl_gz-1]+
u[nghosts:nrl_gz,nghosts+1:ncl_gz+1])
if iteration%diff_interval==0:
my_diff[0]=np.max(np.abs(w[nghosts:nrl_gz,nghosts:ncl_gz]-
u[nghosts:ncl_gz,nghosts:ncl_gz]))
comm.Allreduce(my_diff,global_diff,op=MPI.MAX)
if global_diff[0]<=epsilon:
break
#Update u
u[nghosts:nrl_gz,nghosts:ncl_gz]=w[nghosts:nrl_gz,nghosts:ncl_gz]
#Reapply physical boundary conditions
set_bcs(u,nrl,ncl,nrows,ncols,grid_coords)
iteration+=1
#This is what the weird "else" clause in Python for/while loops is used to do.
#Our stopping criterion is now on exceeding max_iterations so don't need
# to break, but we need to warn that it happened.
else:
if grid_rank==0:
print("Warning: maximum iterations exceeded")
total_time=MPI.Wtime()-start_time
if grid_rank==0:
print(f'completed in {iteration:} iterations with time {total_time:.2f}')
# Write solution to output file
filename = filename + str(grid_coords[0]) + str(grid_coords[1])
fout = open (filename,'w')
for i in range(1,nrl+1):
line=" ".join(map(str,list(u[i,1:ncl+1])))
row=line+"\n"
fout.write(row)
# All done!
#print(f"wrote output file {filename:}")
Download mpi_ghostplate.py file