Next: Results Up: Comparison of various runs Previous: Algorithm
Program Listing

The compile command used for the SP2 is:
mpxlf90 jacmpi.f -O3 -qstrict -qarch=pwr2 -qtune=pwr2 -qhot -o jacmpi
!******************************************************************
! FILE: jacmpi.f90
!
! DESCRIPTION:
!     This code solves the Laplace equation
!     using the Jacobi method, domain decomposition,
!     and MPI.  The boundary conditions used here are:
!------------------------------------------------------------------
!       f(0,y) = 0                0   < y < 5
!       f(x,5) = 0                0   < x < 5
!       f(x,0) = 0                   0   < x < 1
!       f(x,0) = linear         1   < x < 1.2
!       f(x,0) = 0                   1.2 < x < 5
!       f(5,y) = 100                   0   < y < 3
!       f(5,y) = linear                3   < y < 3.2
!       f(5,y) = 0                   3.2 < y < 5
!------------------------------------------------------------------
!     The initial guess is a random field for f.
!
! You may realize that certain portions of this code could
! be done in a simpler manner, but it is written the way
! it is for pedagogical reasons.  There are a number of
! things that this code does that you may find useful 
! later on.
!   
! ORIGINAL AUTHOR: Lyle Long, lnl@psu.edu, Penn State University
! LAST REVISED BY: Anirudh Modi, anirudh-modi@psu.edu (4/7/98-Tue)
!******************************************************************

Program jacobi

implicit none
include 'mpif.h'
integer mpierr,status(MPI_STATUS_SIZE)

integer nodex, nodey, nnodes, nx,ny,master,myeast, mywest, iter, &
        iters, myj,nabor,myi, mynorth, mysouth, nxw,nyw,nn, &
             cont, ntotal, request,i1,i2,j1,j2,ih1,ih2,jh1,jh2

! NODEX*NODEY gives the number of domains
parameter (nodex = NPROC_X, nodey = NPROC_Y)

! total no. of processors:
parameter (Nnodes = nodex*nodey)

! these are the total number of cells in each direction:
parameter (NX = NGRID_X, ny=NGRID_Y)

! total no. of cells:
parameter (ntotal = nx*ny )

! no. of cells on each processor
! (there are 2 ghost points around the edge of each domain)
parameter (nxw=nx/nodex+2, nyw=ny/nodey+2 )

parameter (MASTER = 0)

integer   numtasks,taskid,mtype,i,j,ncont,indgr

real*8    f(nxw,nyw),fold(nxw,nyw)
real*8    x(nxw,nyw),y(nxw,nyw)
real*8    fn(nxw), fe(nyw), fs(nxw), fw(nyw)
real*8    time,mflop,tottime,totmflop,mfloptmp,von,ranf
real*8    tstart, tend, second, xrange, yrange, xhole1, xhole2, &
                yhole1, yhole2, T1, T2, tcomm1, tcomm2, tcomm

character*30 pltitl, myfile

! find out how many processors there are (numtasks) and what is
! 'my' taskid is:
call MPI_INIT(mpierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD,numtasks,mpierr)
call MPI_COMM_RANK(MPI_COMM_WORLD,taskid,mpierr)

if ( taskid == master ) then
        ! this code reads in the number of iterations from a file:
        open(11,file='input.jacmpi',form='formatted',status='unknown')
        read(11,*) iters
        close(11)
endif

! broadcast iters to all processors
call MPI_BCAST(iters,1,MPI_INTEGER,0,MPI_COMM_WORLD,mpierr)

tcomm = 0.0                     ! Initialise comm time                     !
T1 = 0.0                        ! Temperature BC 1
T2 = 100.0                      ! Temperature BC 2

! x-direction
xrange = 5.0                    ! x-range (origin at 0.0)
xhole1 = 1.0                    ! location of beginning of hole
xhole2 = 1.2                    ! location of ending of hole
ih1 = int(nx*xhole1/xrange)     ! global index of beginning of hole
ih2 = int(nx*xhole2/xrange)     ! global index of ending of hole

! y-direction
yrange = 5.0                    ! x-range (origin at 0.0)
yhole1 = 3.0                    ! location of beginning of hole 
yhole2 = 3.2                    ! location of ending of hole
jh1 = int(ny*yhole1/yrange)     ! global index of beginning of hole
jh2 = int(ny*yhole2/yrange)     ! global index of ending of hole

if (taskid == 0) then
        write(6,*) ' This is node', taskid, ' (node 0)'
else
        write(6,*) ' This is node', taskid, ' (other nodes)'
endif

if ( nodex*nodey .ne. numtasks ) then
        if ( taskid == master ) then
                print *, ' must have nodex*nodey = numtasks !'
                print *, ' nodex=',nodex 
                print *, ' nodey=',nodey 
                print *, ' numtasks=',numtasks
        endif
        call mpi_abort( mpi_comm_world, 1 )
        stop
endif
! ********************* master task ******************************
if (taskid == MASTER) then

        print *, 'Number of tasks     = ', numtasks       
        print *, 'Total cells         = ', ntotal
        print *, 'Cells per processor = ', nxw*nyw

        print *, 'No. of iterations   = ', iters

        print *, 'Data sent/node/step =', &
               (nyw*2.0 + nxw*2.0 )*8.0/1.0e6, &
             ' mbytes'

        print *, 'Total data on each node =', &
               (nxw*nyw*8.0*5.0)/1.0e6 , ' mbytes'

endif

! find i,j processor id nos.:
! (in addition to having taskid, I also label the processors
!  with i,j indices, this makes it easier to determine who
!  the neighboring processors are)
myj = taskid / nodex + 1
myi = taskid - (myj-1)*nodex  + 1

print *,'task, i, j = ',taskid,myi,myj

! find taskid numbers of neighbor processors :
mynorth = nabor(myi,   myj+1, nodex, nodey)
mysouth = nabor(myi,   myj-1, nodex, nodey)
myeast  = nabor(myi+1, myj,   nodex, nodey)
mywest  = nabor(myi-1, myj,   nodex, nodey)

print *, 'task,n,s,e,w=',taskid,mynorth,mysouth,myeast,mywest

! -- initialize f and edge arrays on each processor --------
! Initialize f randomly
fn = 0.0
fs = 0.0
f = 10.0 * ranf()
fe = 0.0
fw = 0.0

! lets also put it a spike in the initial guess
if ( taskid == master ) f(nxw/2,nyw/2) = 100.0

!----- initialize x,y on each processor, note these are global x,y's
x(1,:) = 0.0
x(nxw,:) = 0.0
y(:,1) = 0.0
y(:,nyw) = 0.0

do i=2,nxw-1
  x(i,:) = dble((dble(myi-1)+dble(i-2)/dble(nxw-2))*dble(xrange/nodex))
enddo
do j=2,nyw-1
  y(:,j) = dble((dble(myj-1)+dble(j-2)/dble(nyw-2))*dble(yrange/nodey))
enddo

! -----begin iterations-----------------------------------------
if (taskid == master) print *,'Begin iterating...'
tstart = mpi_wtime()

do iter = 1, iters
        if ( taskid == master ) print *, 'iter = ',iter
        
        ! apply BOUNDARY CONDITIONS on edges of domain:
        ! note that only certain processors need to do this.
        
        if ( myi == 1 )  f(2,2:nyw-1) = T1      ! x = 0.0
        
        if ( myi == nodex ) then                ! x = 5.0
                j1 = max( min(jh1 - nyw * (myj-1), nyw-1), 1)
                j2 = max( min(jh2 - nyw * (myj-1), nyw-1), 1)
                if (j1 > 1) f(nxw-1,2:j1) = T2
                do j = j1+1,j2
                        f(nxw-1,j) = dble((j-j2-1)*(T1-T2)/(j2-j1))
                enddo
                if (j2 < nyw-1) f(nxw-1,j2+1:nyw-1) = T1
        endif
        
        if ( myj == 1 ) then                    ! y = 0.0
                i1 = max( min(ih1 - nxw * (myi-1), nxw-1), 1)
                i2 = max( min(ih2 - nxw * (myi-1), nxw-1), 1)
                if (i1 > 1) f(2:i1,2) = T1
                do i = i1+1,i2
                        f(i,2) = dble((i-i1-1)*(T2-T1)/(i2-i1))
                enddo
                if (i2 < nxw-1) f(i2+1:nxw-1,2) = T2
        endif
        
        if ( myj == nodey ) f(1:nxw,nyw-1) = T1 ! y = 5.0
        
        ! copy edge data to smaller arrays for transfer:
        
        !---- North & South---------- 
        fs(2:nxw-1) = f(2:nxw-1,2)
        fn(2:nxw-1) = f(2:nxw-1,nyw-1)
        
        !--------East & west
        fw(2:nyw-1) = f(2,2:nyw-1)
        fe(2:nyw-1) = f(nxw-1,2:nyw-1)
        
        tcomm1 = mpi_wtime()            ! Start of communication time

        mtype = 111
        ! send to north 
        call mpi_isend(fn,nxw,MPI_DOUBLE_PRECISION,mynorth,mtype, &
                MPI_COMM_WORLD, request,mpierr)
        
        ! receive from south
        call mpi_recv(fn,nxw,MPI_DOUBLE_PRECISION,mysouth,mtype, &
                MPI_COMM_WORLD,status,mpierr)
        
        mtype = 222
        ! send to south
        call mpi_isend(fs,nxw,MPI_DOUBLE_PRECISION,mysouth,mtype, &
                MPI_COMM_WORLD,request,mpierr)

        ! receive from north
        call mpi_recv(fs,nxw,MPI_DOUBLE_PRECISION,mynorth,mtype, &
                MPI_COMM_WORLD,status,mpierr)
        
        mtype = 333
        ! send to east
        call mpi_isend(fe,nyw,MPI_DOUBLE_PRECISION,myeast,mtype, &
                MPI_COMM_WORLD,request,mpierr)

        ! receive from west
        call mpi_recv(fe,nyw,MPI_DOUBLE_PRECISION,mywest,mtype, &
                MPI_COMM_WORLD,status,mpierr)
        
        mtype = 444
        ! send to west
        call mpi_isend(fw,nyw,MPI_DOUBLE_PRECISION,mywest,mtype, &
                MPI_COMM_WORLD,request,mpierr)

        ! receive from east
        call mpi_recv(fw,nyw,MPI_DOUBLE_PRECISION,myeast,mtype, &
                MPI_COMM_WORLD,status,mpierr)
        
        tcomm2 = mpi_wtime()            ! End of communication time
        tcomm = tcomm + tcomm2 - tcomm1
        
        ! copy transferred arrays into ghost cells in main f arrays:
        
        !---- North & South---------- 
        f(2:nxw-1,1) = fn(2:nxw-1)
        f(2:nxw-1,nyw) = fs(2:nxw-1)
        
        !--------East & west
        f(1,2:nyw-1) = fe(2:nyw-1)
        f(nxw,2:nyw-1) = fw(2:nyw-1)
        
        fold = f
        f(2:nxw-1,2:nyw-1) = 0.25 * (fold(3:nxw,2:nyw-1) &
                                + fold(1:nxw-2,2:nyw-1)  &
                                + fold(2:nxw-1,3:nyw) &
                                + fold(2:nxw-1,1:nyw-2))
enddo
!   ---end iterations---------------------------------------------

tend = mpi_wtime()

time = tend - tstart
mflop = (4.0*iters*(nxw-2)*(nyw-2))/time/1.0e6
time = time - tcomm
print *, 'comp time=', time,' mflops/node=',mflop,' on task=',taskid
print *, 'comm time=', tcomm,' mflops/node=',mflop,' on task=',taskid

mtype = 20
call mpi_send(mflop,1,MPI_DOUBLE_PRECISION,0,mtype,MPI_COMM_WORLD,mpierr)

if ( taskid == MASTER ) then
        print *,'Receiving mflops nos. back to Master'

        mtype=20
        totmflop=0.0
        do nn=0,numtasks-1
        call mpi_recv(mfloptmp,1,MPI_DOUBLE_PRECISION,nn,mtype, &
                 MPI_COMM_WORLD,status,mpierr)
        totmflop = totmflop + mfloptmp
        enddo

        print *,' TOTAL MFLOPS = ',totmflop            
endif

! have each node write a file :
!     --  create a processor-specific output file name:  
! the following writes character info to the character myfile
! to create unique file names:

write(myfile,900) 'fort.', taskid+1
900      format(a,i3.3) 

if ( ntotal < 5000000 ) then
        print *, ' my file = ',myfile
        open(22,file=myfile,form='formatted',status='unknown')
        write(22,*) 'title = "Jacobi"'
        write(22,*) 'variables = "x","y","T"'
        write(22,*) 'zone i = ',nxw-2,' j = ',nyw-2
        do i=2,nxw-1
        do j=2,nyw-1
                write(22,*) x(i,j), y(i,j), f(i,j)
        enddo
        write(22,*)
        enddo
endif

if (taskid == master) print *, ' Program Done.'

! quit MPI
call MPI_FINALIZE(mpierr)

stop
end
!******************************************************************
function nabor(i,j,nx,ny)

integer i,j,nx,ny,nabor

if ( i <  1 ) i = nx
if ( i >  nx) i = 1

if ( j <  1 ) j = ny
if ( j >  ny) j = 1

nabor = (i-1) + (j-1)*nx 

return
end 

!******************************************************************
real function ranf (  )

!    **************************************************************
!    ** RETURNS A UNIFORM RANDOM VARIATE IN THE RANGE 0 TO 1.    **
!    **                                                          **
!    **                 ***************                          **
!    **                 **  WARNING  **                          **
!    **                 ***************                          **
!    **                                                          **
!    ** GOOD RANDOM NUMBER GENERATORS ARE MACHINE SPECIFIC.      **
!    ** PLEASE USE THE ONE RECOMMENDED FOR YOUR MACHINE.         **
!    **************************************************************

integer     l, c, m
parameter ( l = 1029, c = 221591, m = 1048576 )

integer     seed
double precision dummy
save        seed
data        seed / 0 /

seed = mod ( seed * l + c, m )
ranf = real ( seed ) / m

return
end
!******************************************************************
Anirudh Modi
4/10/1998