      Program Jacobi

!----------------------------------------------
!  this is a problem taken from Hoffman's book
!  on CFD.   It is governed by Laplace's equation.
!
!  The boundary conditions are :
!       a = 0   at x=0

!       a = 0   at y=5

!       a = 0   at y=0 and     0 < x < 1
!     da/dy=0   at y=0 and     1 < x < 1.2 
!       a = 100 at y=0 and   1.2 < x < 5

!       a = 100 at x=5 and     0 < y < 3
!     da/dx=0   at x=5 and   3.0 < x < 3.2 
!       a = 0   at x=5 and   3.2 < y < 5
!
!----------------------------------------------

! YOU MUST USE THIS (implicit none) IN YOUR CODE !
      implicit none

      integer n, m1, m2
!      parameter ( n = 8 )
       parameter ( n = 400 )
!        parameter ( n = 1000 )
!        parameter ( n = 2000 )

      real*8, dimension(n,n) ::  a, aold, b1,b2,b3,b4,da, x, y

      real*8     rms, rms2, rms0, dx, xx
      real*8 time0,time1,time2,time3,mflops
      real*8 time0i,time1i,time2i,time3i

      integer  i,j, niter, nprocset,mclock
      character*5   nproc
      logical  mask(n,n), mask2(n), mask3(n)


! other than the following 3 lines of code, this is a purely
! standard fortran code.    The following are simply
! compiler directives that tell the computer how to arrange
! the data in memory.    You should try different forms here
! if you have time  (e.g. 
!         procs(8,4)
! for the 32 processor case

!hpf$ processors procs(NUMBER_OF_PROCESSORS(),1)
!hpf$ distribute(block,block) onto procs::a
!hpf$  align with a :: aold,b1,b2,b3,b4,da,x,y,mask

!  should use allocate to set size of arrays     

      time0 = 0.0
      time1 = 0.0
      time2 = 0.0
      time3 = 0.0

      time0i = 0.0
      time1i = 0.0
      time2i = 0.0
      time3i = 0.0


      time3i = real(mclock())/100.0

      mask  = .false.
      mask2 = .false.
      mask3 = .false.

      rms  = 1.0
      rms0 = 1.0



! this sets the grid size
      dx   = 5.0 / ( real(n) - 1.0 )

      do  i=1,n
        do j=1,n 
           x(i,j) = dx * ( i - 1.0 )
           y(i,j) = dx * ( j - 1.0 )
        enddo
      enddo


! this sets the mask for the y=0 and 1<x<1.2 boundary conditions
      do i = 1, n
        xx = dx * ( i - 1.0 )
        if (  xx .gt. 1.0 .and. xx .lt. 1.2)  mask2(i)= .true.
      enddo

! this sets the mask for the x=5 and 3<y<3.2 boundary conditions
      do i = 1, n
        xx = dx * ( i - 1.0 )
        if (  xx .gt. 3.0 .and. xx .lt. 3.2)  mask3(i)= .true.
      enddo

!      print *, ' enter no. of iterations :'
!      read *, niter
      niter = 1000

      print *, ' Will perform ', niter, ' iterations'

! this defines all the 'inner' points
      mask( 2:n-1, 2:n-1 ) = .true.

! initialize entire field
! lets pick a bad guess for a  (ie a=x)
      a = x

! initialize at x=0
      a(1,:) = 0.0

! initialize at x=5
      a(n,:) = 100.0
      do j = 1, n
        xx = dx * ( j - 1.0 )
        if (  xx .gt. 3.2 ) a(n,j) = 0.0
      enddo

! initialize at y=0
      a(:,1) = 100.0
      do i = 1, n
        xx = dx * ( i - 1.0 )
        if (  xx .lt. 1.0 ) a(i,1) = 0.0
      enddo


! initialize at y=5
      a(:,n) = 0.0


      time3 = real(mclock())/100.0 - time3i


! --------this is the start of the iterative scheme--------
      do j = 1, niter 

        aold = a

! get neighbor values :
      time0i = real(mclock())/100.0
        b1 = cshift( a,  1,  1 )
        b2 = cshift( a, -1, 1 )
        b3 = cshift( a,  1, 2 )
        b4 = cshift( a, -1, 2 )
      time0 = time0 + real(mclock())/100.0 - time0i

! iterate Laplace's eqtn. using Jacobi method:
      time1i = real(mclock())/100.0 
        where ( mask ) 
         a = (b1 + b2 + b3 + b4) * 0.25
        endwhere
      time1 = time1 + real(mclock())/100.0 - time1i

! apply boundary conditions:
! da/dx = 0 and da/dy = 0
      time2i = real(mclock())/100.0

        where ( mask2 ) 
          a(:,1) = 0.3333333 * ( 4.0 * a(:,2) - a(:,3) )
        endwhere

        where ( mask3 ) 
          a(n,:) = 0.3333333 * ( 4.0 * a(n-1,:) - a(n-2,:) )
        endwhere

      time2 = time2 + real(mclock())/100.0 - time2i

! keep track of convergence data
        if ( mod(j,10) .eq. 0 .or. j .eq. 1)  then
          da = abs(a-aold)
          rms = sum( da ) / real(n)
          rms2 = sqrt ( sum( da**2 ) ) / real(n)
          if ( j .eq. 1 ) rms0 = rms
          print *, j, rms/rms0, rms2
        endif

      enddo

      print *, ' '
      print *, ' '
      print *, ' '
      print *, ' Time for Setting up:', time3

      print *, ' Time for communications :' , time0

      print *, ' Time for Computations :', time1

      print *, ' Time for Boundary Conditions:', time2

      print *, ' Total time = ', time0+time1+time2+time3
      print *, ' '

      print *, ' No. of F.P. Ops =  ', n*n*4.0*niter
      print *, ' '

      mflops = n*n*4.0*niter / (time0+time1+time3) / 1.0e6
      print *, ' megaflops  =  ', mflops
      print *, ' '

      print *, ' No. of Grid Points = ', n, ' x ', n
      print *, ' '

! write out a few lines of data
      do i = 1, n
          write(33,*)  i, a(i,1)
      enddo

      m1 = 1.5/dx + 1
      do i = 1, n
          write(34,*)  i, a(i,m1)
      enddo

      m2 = 3.1/dx + 1
      do i = 1, n
          write(35,*)  i, a(i,m2)
      enddo

      do i = 1, n
          write(36,*)  i, a(i,n)
      enddo


! write out data to read into gnuplot :

      if ( n .le. 1280 ) then

        do j = 1, n
        do i = 1, n
          write(40,*)  a(i,j)
          write(41,*)  x(i,j), y(i,j), a(i,j)
        enddo
          write(40,*) 
          write(41,*) 
      enddo

      else

        print *, 'not storing data to disk...'

      endif      

      stop
      end
