MPI Two Dimensional Halo Exercise

We can now pull all of our new knowledge together to let MPI do more of the work for us in the heated-plate problem.

Exercise

In the language of your choice, write a heated-plate solution using two-dimensional data decomposition. Use MPI types and the MPI Cartesian communicator. Stay with blocking sendrecv for this exercise. Boundary conditions should be nonperiodic in both directions.

Check your rsults by plotting. If you wish, you can use the script contour2d.py:

contour2d.py source code

import sys
import os
import argparse
import re
import numpy as np
import matplotlib.pyplot as plt

parser = argparse.ArgumentParser()
parser.add_argument("-r", "--rows", help="Rows in process topology")
parser.add_argument("-c", "--cols", help="Columns in process topology")
#parser.add_argument("-f", "--fortran", help="Fortran ordering", action="store_true")
parser.add_argument("filename", help="Base name for data files")
args = parser.parse_args()
base = args.filename
nrows = int(args.rows)
ncols = int(args.cols)

files = [f for f in os.listdir('.') if re.match(base+r"\d*",f)]
files.sort()
for row in range(nrows):
    subdomains=[]
    for col in range(ncols):
        thisrow=base+str(row)+str(col)
        print(thisrow)
        data=np.loadtxt(thisrow,unpack=False)
        subdomains.append(data)
        rowimage=np.hstack(subdomains)
    image=np.vstack(rowimage)

fig=plt.figure()
plt.contourf(image)
plt.colorbar()
plt.show()


The usage is

python contour2d.py -r numrows -c numcols basefilename

The files should be written by row with the name fitting the pattern

base<coord[0]coord[1]>

(or coord[1]coord[2] for Fortran) e.g.

out00 out01 out11 ...

Example Solutions

C++

#include <iostream> 
#include <cmath>
#include <string>
#include <sstream>
#include <fstream>
#include <iomanip> 
#include <mpi.h> 


#define MAX_ITER 10000000

void set_bcs(int nr, int nc, int nrows, int ncols, int* coords, double **u);

using namespace std; 

int main (int argc, char *argv[]) {

    double diff;            // Change in value
    int N, M;
    int iterations = 0;
    int diffInterval;
    double epsilon;

    // Added for MPI
    int nrows, ncols;
    int nrl, ncl;
    int rank, nprocs;
    int errcode;
    int up, down, right, left;
    int tag=0;
    double gdiff;

    //Initialize MPI
    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);

    /* check number of parameters and read in epsilon, filename, optionally size */
   if (argc < 3) {
     printf ("USAGE:  %s epsilon output-file <N> <M>\n", argv[0]);
     MPI_Abort(MPI_COMM_WORLD,errcode);
     return 1;
   }
   else {
      stringstream ssepsilon;
      ssepsilon<<argv[1];
      if (ssepsilon.fail()) {
         printf ("Error reading in epsilon value\n");
         MPI_Abort(MPI_COMM_WORLD,errcode);
         return 2;
      }
      ssepsilon>>epsilon;

      if (argc==3) {
        N=500;
        M=500;
      }
      if (argc==4) {
        stringstream ssnr;
        ssnr<<argv[3];
        if (ssnr.fail()) {
           cout<<"Error converting row dimension \n";
           MPI_Abort(MPI_COMM_WORLD,errcode);
           return 2;
         }
         ssnr>>N;
         M=N;
      }
      if (argc==5) {
        stringstream ssnr;
        ssnr<<argv[3];
        if (ssnr.fail()) {
            cout<<"Error converting row dimension \n";
            MPI_Abort(MPI_COMM_WORLD,errcode);
            return 2;
        }
        ssnr>>N;
        stringstream ssnc;
        ssnc<<argv[4];
        if (ssnc.fail()) {
            cout<<"Error converting row dimension \n";
            MPI_Abort(MPI_COMM_WORLD,errcode);
            return 2;
        }
        ssnc>>M;
     }
   }
   //For simplicity, we will limit this code to perfect square process count
   double rows=sqrt(nprocs);
   nrows=(int)rows;
   if ( nrows*nrows != nprocs ) {
       cout<<"This code requires a perfect square number of processes\n";
       MPI_Abort(MPI_COMM_WORLD,errcode);
       return 1;
   }
   else {
       ncols=nrows;
   }

   //Strong scaling
   //Weak scaling would set nrl=N and ncl=M
   if (N%nrows==0 && M%ncols==0) {
       nrl = N/nrows;
       ncl = M/ncols;
   }
   else {
       cout<<"Number of ranks should divide the total number of rows/cols evenly.\n";
       MPI_Abort(MPI_COMM_WORLD,errcode);
   }

    double **u=new double*[nrl+2];
    double **w=new double*[nrl+2];
    double **diffs=new double*[nrl+2];

    double *uptr=new double[(nrl+2)*(ncl+2)];
    double *wptr=new double[(nrl+2)*(ncl+2)];
    double *dptr=new double[(nrl+2)*(ncl+2)];

    for (int i=0;i<nrl+2;++i,uptr+=ncl+2) {
       u[i] = uptr;
    }
    for (int i=0;i<nrl+2;++i,wptr+=ncl+2) {
       w[i] = wptr;
    }
    for (int i=0;i<nrl+2;++i,dptr+=ncl+2) {
       diffs[i] = dptr;
    }

    //create Cartesian topology
    int ndims=2;
    int dims[2]={nrows,ncols};
    int periods[2]={0,0};
    int reorder=0;
    MPI_Comm grid_comm;
    int grid_rank;
    int grid_coords[2];

    MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, &grid_comm);
    MPI_Comm_rank(grid_comm, &grid_rank);
    MPI_Cart_coords(grid_comm, grid_rank, ndims, grid_coords);

    int direction=0;
    int displ=1;
    MPI_Cart_shift(grid_comm, direction, displ, &up, &down);

    direction=1;
    MPI_Cart_shift(grid_comm, direction, displ, &left, &right);

    cout<<"topo "<<grid_rank<<" "<<left<<" "<<right<<" "<<up<<" "<<down<<endl;

    //Set up the MPI type for columns
    MPI_Datatype col;
    MPI_Type_vector(nrl+2,1,ncl+2,MPI_DOUBLE,&col);
    MPI_Type_commit(&col);
    MPI_Status status;

    //Initialize solution matrices
    for ( int i = 0; i <= nrl+1; i++ ) {
         for (int j = 0; j <= ncl+1; j++ ) {
             u[i][j] = 0.;
             w[i][j] = 0.;
         }
    }
    set_bcs(nrl, ncl, nrows, ncols, grid_coords, u);

    diffInterval=1;

    double time0=MPI_Wtime();

  // Compute steady-state solution
  // The diffs array is mostly to make it simpler to extend the testing interval

  if (rank==0) {
      cout<<"Running until the difference is <="<<epsilon<<" with size "<<N<<"x"<<M<<"\n";
  }

  while ( iterations<=MAX_ITER ) {
     // reset diff each time through to get max abs diff
     // max_element doesn't work great for twod arrays and is often slow
     diff=.8*epsilon;

     MPI_Sendrecv(&u[1][1],ncl, MPI_DOUBLE,up,tag,&u[nrl+1][1],
                           ncl, MPI_DOUBLE,down,tag,grid_comm,&status);

     MPI_Sendrecv(&u[nrl][1],ncl,MPI_DOUBLE,down,tag,&u[0][1],
                             ncl,MPI_DOUBLE,up,tag,grid_comm,&status);

     MPI_Sendrecv(&u[0][ncl],1,col,right,tag,&u[0][0],1,col,left,                                                           tag, grid_comm, &status); 
     MPI_Sendrecv(&u[0][1],1,col,left,tag,&u[0][ncl+1],1,col,right,                                                         tag, grid_comm, &status);


     for (int i=1; i<=nrl;i++) {
        for (int j=1;j<=ncl;j++) {
            w[i][j] = (u[i-1][j] + u[i+1][j] + u[i][j-1] + u[i][j+1])/4.0;
            diffs[i][j] = abs(w[i][j] - u[i][j]);
         }
     }
     if (iterations%diffInterval==0) {
        for (int i=1; i<=nrl;i++) {
           for (int j=1;j<=ncl;j++) {
           if (diff<diffs[i][j]) {
           diff=diffs[i][j];
           }
       }
     }
         //Find max of diff in all the processors.
         MPI_Allreduce(&diff,&gdiff,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
         if (gdiff <= epsilon)
             break;
     }

     //Set halo values
     /*
     */
     for (int j=0; j<=ncl+1; j++) {
         w[0][j]=u[0][j];
         w[nrl+1][j]=u[nrl+1][j];
     }
     for (int i=0;i<=nrl+1; i++) {
         w[i][0]=u[i][0];
         w[i][ncl+1]=u[i][ncl+1];
     }

     for (int i=0; i<=nrl+1;i++) {
        for (int j=0;j<=ncl+1;j++) {
            u[i][j] = w[i][j];
        }
     }

     set_bcs(nrl, ncl, nrows, ncols, grid_coords, u);

     iterations++;
  } //end of computation 

  if (iterations>MAX_ITER) {
     if (rank==0) {
        cout<<"Warning: maximum iterations exceeded\n";
     }
  }

  double totalTime=(MPI_Wtime()-time0);
  if (rank==0) {
      cout << "Completed "<<iterations<<" iterations; time "<<totalTime<<endl;
  }

  // Write solution to output file
  ofstream fp;
  string fname=argv[2]+to_string(grid_coords[0])+to_string(grid_coords[1]);
  fp.open(fname,ios::out);
  for (int i = 1; i <= nrl; i++) {
     for (int j = 1; j <= ncl; j++) {
        fp<<u[i][j]<<" ";
     }
     fp<<"\n";
  }
  fp.close();

  //Type_free not really necessary but good practice
   MPI_Type_free(&col);
   MPI_Comm_free(&grid_comm);
   MPI_Finalize();

   return 0;

}

void set_bcs(int nr, int nc, int nrows, int ncols, int* coords, double **u) {

  /* Set boundary values.
   * This has an ice bath on the top edge.
   * Note: when plotting, 0,0 is the bottom.
   */

  double topBC=0.;
  double bottomBC=100.;
  double leftBC=100.;
  double rightBC=100.;

  if (coords[0]==0) {
     for (int i=0;i<=nc+1;++i){
         u[0][i]=topBC;
     }
  } else if (coords[0]==nrows-1) {
      for (int i=0;i<=nc+1;++i){
         u[nr+1][i]=bottomBC;
      }
  }
  if (coords[1]==0) {
      for (int i=0;i<=nr+1;++i){
         u[i][0]=leftBC;
      }
  } else if (coords[1]==ncols-1) {
     for (int i=0;i<=nr+1;++i){
        u[i][nc+1]=rightBC;
     }
  }

}

Fortran

program sendrows
   use mpi_f08

   integer, parameter :: maxiter=10000000
   integer            :: N, M
   integer            :: i,j
   integer            :: numargs, diff_interval, interations=0
   double precision   :: eps, diff = 1.0
   character(len=80)  :: arg, filename
   character(len=4)   :: char_row, char_col
   double precision, allocatable, dimension(:,:)  :: u, w
   double precision   :: time0, time1

   double precision   :: rows
   integer            :: nrows, ncols, nrl, ncl
   integer            :: rank, nprocs, tag=0
   integer            :: left, right, up, down
   integer, dimension(2) :: dims, grid_coords
   logical, dimension(2) :: periods
   logical            :: reorder
   integer            :: ndims, grid_rank
   integer            :: direction, displ
   integer, parameter :: root=0
   type(MPI_Comm)     :: grid_comm
   type(MPI_Status)   :: mpi_stat
   type(MPI_Datatype) :: row
   double precision   :: gdiff
   character(len=24)  :: fname

   interface
     subroutine set_bcs(nr,nc,nrows,ncols,coords,u)
        implicit none
        integer,                intent(in)  :: nr, nc, nrows, ncols
        integer, dimension(:),  intent(in)                :: coords
        double precision, dimension(0:,0:), intent(inout) :: u
     end subroutine
   end interface

   !Initialize MPI, get the local number of columns
   call MPI_INIT()
   call MPI_COMM_SIZE(MPI_COMM_WORLD,nprocs)
   call MPI_COMM_RANK(MPI_COMM_WORLD,rank)

   ! check number of parameters and read in epsilon, filename, optionally size
   ! all ranks do this
   numargs=command_argument_count()
   if (numargs .lt. 2) then
      call MPI_Finalize()
      stop 'USAGE: epsilon output-file <N> <M>'
   else
      call get_command_argument(1,arg)
      read(arg,*) eps
      call get_command_argument(2,filename)

      if (numargs==2) then
         N=500
         M=500
      endif
      if (numargs==3) then
         call get_command_argument(3,arg)
         read(arg,*) N
         M=N
      endif
      if (numargs==4) then
         call get_command_argument(3,arg)
         read(arg,*) N
         call get_command_argument(4,arg)
         read(arg,*) M
      endif
   endif

   !For simplicity, we will limit this code to perfect square process count
   rows=sqrt(dble(nprocs))
   if ( rows /= dble(int(rows)) ) then
       call MPI_Finalize()
       stop "This code requires a perfect square number of processes"
   else
       nrows=int(rows)
       ncols=nrows
   endif

   !Strong scaling
   !Weak scaling would set nrl=N and ncl=M
   if (mod(N,nrows)==0 .and. mod(M,ncols)==0) then
       nrl = N/nrows
       ncl = M/ncols
   else
       call MPI_Finalize()
       stop "Number of ranks should divide the number of rows evenly."
   endif

   !create cartesian topology for processes
   ndims=2
   dims(1) = nrows      ! number of rows
   dims(2) = ncols      ! number of columns
   periods(1) = .false.  ! cyclic in this direction
   periods(2) = .false. ! not cyclic in this direction
   reorder    = .false.  ! allow MPI to reorder if it sees fit
   call MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods,                 &
                                                     reorder, grid_comm)
   call MPI_Comm_rank(grid_comm, grid_rank)
   call MPI_Cart_coords(grid_comm, grid_rank, ndims, grid_coords)

   direction =  0   ! shift along the 1st index (0 or 1)
   displ =  1       ! shift by  1
   call MPI_Cart_shift(grid_comm, direction, displ, up, down)

   direction =  1   ! shift along the 2nd index (0 or 1)
   call MPI_Cart_shift(grid_comm, direction, displ, left, right)

   write(*,*) 'topo', grid_rank, up, down, left, right

   ! Set up values
   allocate(u(0:nrl+1, 0:ncl+1), w(0:nrl+1,0:ncl+1))
   u=0.d0
   w=0.d0

   ! Boundary conditions
   call set_bcs(nrl,ncl,nrows,ncols,grid_coords,u)

   if ( grid_coords(1)==nrows-1) then
!         print *,grid_rank, grid_coords, u(nrl+1,:)
   endif

   call MPI_Type_vector(ncl+2,1,nrl+2,MPI_DOUBLE_PRECISION,row)
   call MPI_TYPE_COMMIT(row)

   diff_interval=1

  ! Walltime from process 0 is good enough for me
   if (grid_rank .eq. 0) then
      write (*,'(a,es15.8,a,i4,a,i4)') 'Running until the difference is <= ', &
                                              eps,' for global size ',N,'x',M
     time0=MPI_WTIME()
   endif

  ! Compute steady-state solution
   do while ( iterations<=maxiter )

     ! Exchange halo values
     ! Send left and right

     call MPI_SENDRECV(u(1:nrl,1)    ,nrl,MPI_DOUBLE_PRECISION,left,tag,     &
                       u(1:nrl,ncl+1),nrl,MPI_DOUBLE_PRECISION,right,tag,    &
                                         grid_comm,mpi_stat)

     call MPI_SENDRECV(u(1:nrl,ncl)  ,nrl,MPI_DOUBLE_PRECISION,right,tag,    &
                       u(1:nrl,0)    ,nrl,MPI_DOUBLE_PRECISION,left,tag,     &
                                          grid_comm,mpi_stat)

     ! Send up and down

     call MPI_SENDRECV(u(nrl,0)    ,1, row, up,tag,                          &
                       u(nrl+1,0),1, row,down,tag,                           &
                                         grid_comm,mpi_stat)

      call MPI_SENDRECV(u(1,0)    ,1,row, down,tag,                          &
                         u(0,0)    ,1,row, up,tag,                           &
                                          grid_comm,mpi_stat)



       do j=1,ncl
           do i=1,nrl
               w(i,j) = 0.25*(u(i-1,j) + u(i+1,j) + u(i,j-1) + u(i,j+1))
           enddo
       enddo

       if (mod(iterations,diff_interval)==0) then
           if (diff_interval==-1) continue  !disable convergence test
               diff=maxval(abs(w(1:nrl,1:ncl)-u(1:nrl,1:ncl)))
               call MPI_ALLREDUCE(diff,gdiff,1,MPI_DOUBLE_PRECISION,MPI_MAX,  &
                                                             MPI_COMM_WORLD)

               if (gdiff <= eps) then
                   exit
               endif
       endif

      !Set halo values
       w(0,:)=u(0,:)
       w(nrl+1,:)=u(nrl+1,:)
       w(:,0)=u(:,0)
       w(:,ncl+1)=u(:,ncl+1)

       u = w

     ! Reset physical boundaries (they get overwritten in the halo exchange)
       call set_bcs(nrl,ncl,nrows,ncols,grid_coords,u)

      !If this happens we will exit at next conditional test.
      !Avoids explicit break here
       if (iterations>maxiter) then
          if (grid_rank==0) then
              write(*,*) "Warning: maximum iterations exceeded"
          endif
       endif

       iterations = iterations + 1

   enddo

   if (grid_rank==root) then
     time1 = MPI_WTIME()
     write(*,*) 'completed; iterations = ', iterations,"in ",time1-time0," sec"
   endif


  ! Write solution to output file by row. Must use correct stitching to get
  ! overall result, such as a numpy concatenate on axis=1.
  ! It would also work to output by columns, in which case stitching is more
  ! straightforward.

  !Some compilers (Intel) like to add EOLs to line up columns in output. The
  ! Fortran standard permits this with * IO, but it complicates plotting.
  ! Use some F08 features to fix this.

  write(char_row,'(i4)')  grid_coords(1) 
  write(char_col,'(i4)')  grid_coords(2) 
  fname=trim(filename)//trim(adjustl(char_row))//trim(adjustl(char_col))

  open (unit=10,file=fname)
  do i=1,nrl
     write (10,'(*(g0,1x))') u(i,1:ncl)
  enddo


   call MPI_Comm_free(grid_comm)

   call MPI_Finalize()

end program


subroutine set_bcs(nr,nc,nrows,ncols,coords,u)
  implicit none
  integer, intent(in)                      :: nr, nc, nrows, ncols
  integer, dimension(:), intent(in)        :: coords
  double precision, dimension(0:,0:), intent(inout) :: u
  double precision                         :: topBC, bottomBC, leftBC, rightBC

  ! Set boundary values
  ! This has the ice bath on the bottom edge.
  ! Note: when plotting, 0,0 is the bottom.

  topBC=0.d0
  bottomBC=100.d0
  rightBC=100.d0
  leftBC=100.d0

  !Set boundary conditions
   if (coords(1)==0) then
      u(0,:)=topBC
   else if (coords(1)==nrows-1) then
      u(nr+1,:)=bottomBC
   endif
   if (coords(2)==0) then
      u(:,0)=leftBC
   else if (coords(2)==ncols-1) then
      u(:,nc+1)=rightBC
   endif

end subroutine set_bcs

Python

import sys
import time
import numpy as np
from mpi4py import MPI

def set_bcs(u,nrl,ncl,nrows,ncols,coords):
    # Set physical boundary values and compute mean boundary value. 
    # Due to Python pass-by-assignment, mutable parameters will be
    #changed outside (so this is a subroutine)

    topBC=0.
    bottomBC=100.
    leftBC = 100.
    rightBC = 100.

    if coords[0]==0:
        u[0,:]=topBC
    if coords[0]==nrows-1:
        u[nrl+1,:]=bottomBC
    if coords[1]==0:
        u[:,0]=leftBC
    if coords[1]==ncols-1:
        u[:,ncl+1]=rightBC

# check number of parameters and read in parameters

#Better to use argparse but this is closer to compiled language code and simpler
#if one hasn't used argparse.
argv=sys.argv
if  len(argv) > 2:
   try:
      epsilon=float(argv[1])
      filename=argv[2]
   except:
      "Illegal input"
      sys.exit(1)
   if len(argv) == 3:
       N=500
       M=500
   if len(argv) == 4:
       try:
          N=int(argv[3])
          M=N
       except:
           "Cannot convert grid dimension to integer"
           sys.exit(2)
   if len(argv) == 5:
       try:
           N=int(argv[3])
           M=int(argv[4])
       except:
           "Cannot convert grid dimension to integer"
           sys.exit(2)
else:
   print('USAGE: epsilon output-file <N> <M>')
   sys.exit(1)


# Initializing MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nprocs = comm.Get_size()
root = 0
tag = 0

#Set max iterations. 10 million should do it.
max_iterations=10000000

#Set number of rows and columns
#For simplicity, require a perfect square number of processes
nrows=np.sqrt(nprocs)
if nrows != int(nrows):
    print("This code requires a perfect square number of processes")
    sys.exit()
else:
    nrows=int(nrows)
    ncols=nrows

#Strong scaling
if M%nrows!=0:
    print("Number of rows must be evenly divisible by number of processes")
    sys.exit(0)
else:
    nrl = N//nrows
    ncl = N//ncols

#Set up topology
ndims=2
dims=[nrows, ncols]
periods=[False, False]
grid_comm=comm.Create_cart(dims, periods)

grid_rank=grid_comm.Get_rank()
grid_coords=grid_comm.coords

direction=0
displ=1
(up,down)=grid_comm.Shift(direction, displ)
direction=1
(left,right)=grid_comm.Shift(direction,displ)

u=np.zeros((nrl+2,ncl+2))
w=np.zeros((nrl+2,ncl+2))

u=np.zeros((nrl+2,ncl+2))
w=np.zeros((nrl+2,ncl+2))

set_bcs(u,nrl,ncl,nrows,ncols,grid_coords)

#Set up types
column_zero=MPI.DOUBLE.Create_subarray([nrl+2,ncl+2],[nrl,1],[1,0])
column_zero.Commit()

column_one=MPI.DOUBLE.Create_subarray([nrl+2,ncl+2],[nrl,1],[1,1])
column_one.Commit()

column_end=MPI.DOUBLE.Create_subarray([nrl+2,ncl+2],[nrl,1],[1,ncl])
column_end.Commit()

column_endbc=MPI.DOUBLE.Create_subarray([nrl+2,ncl+2],[nrl,1],[1,ncl+1])
column_endbc.Commit()


# Compute steady-state solution
iterations=0
diff_interval=1

start_time=MPI.Wtime()
my_diff = np.zeros(1)
global_diff=np.zeros(1)

if rank==root:
    print(f'Running until the difference is < {epsilon:}, global size {N:}x{M:}')

while iterations<=max_iterations:

   # Send up and receive down
    grid_comm.Sendrecv([u[1,1:ncl+1], MPI.DOUBLE], up, tag, [u[nrl+1,1:ncl+1], MPI.DOUBLE], down, tag)
    # Send down and receive up
    grid_comm.Sendrecv([u[nrl, 1:ncl+1], MPI.DOUBLE], down, tag, [u[0,1:ncl+1], MPI.DOUBLE], up, tag)

    #Send and receive left and right
    grid_comm.Sendrecv([u,1,column_one], left, tag, [u,1,column_endbc], right, tag)
    grid_comm.Sendrecv([u,1,column_end], right, tag, [u,1,column_zero], left, tag)

    w[1:-1,1:-1]=0.25*(u[:-2,1:-1]+u[2:,1:-1]+u[1:-1,:-2]+u[1:-1,2:])


    #set halo values
    w[0,:]=u[0,:]
    w[nrl+1,:]=u[nrl+1,:]
    w[:,0]=u[:,0]
    w[:,ncl+1]=u[:,ncl+1]


    if iterations%diff_interval==0:
        my_diff[0]=np.max(np.abs(w[1:-1,1:-1]-u[1:-1,1:-1]))
        comm.Allreduce(my_diff,global_diff,op=MPI.MAX)

        if global_diff[0]<=epsilon:
            break

    u[:,:]=w[:,:]
    #Reapply physical boundary conditions
    set_bcs(u,nrl,ncl,nrows,ncols,grid_coords)

    iterations+=1
#This is what the weird "else" clause in Python for/while loops is used to do.
#Our stopping criterion is now on exceeding max_iterations so don't need
# to break, but we need to warn that it happened.
else:
     if grid_rank==0:
         print("Warning: maximum iterations exceeded")

total_time=MPI.Wtime()-start_time

if grid_rank==0:
    print(f'completed in {iterations:} iterations with time {total_time:.2f}')

# Write solution to output file
filename = filename + str(grid_coords[0]) + str(grid_coords[1])
fout = open (filename,'w')
for i in range(1,nrl+1):
    line=" ".join(map(str,list(u[i,1:ncl+1])))
    row=line+"\n"
    fout.write(row)

# All done!
#print(f"wrote output file {filename:}")

Previous
RC Logo RC Logo © 2026 The Rector and Visitors of the University of Virginia