! "$Id$";
!
!  Program usage: mpirun -np <proc> plate2f [all TAO options] 
!
!  This example demonstrates use of the TAO package to solve a bound constrained
!  minimization problem.  This example is based on a problem from the
!  MINPACK-2 test suite.  Given a rectangular 2-D domain and boundary values
!  along the edges of the domain, the objective is to find the surface
!  with the minimal area that satisfies the boundary conditions.
!  The command line options are:
!    -mx <xg>, where <xg> = number of grid points in the 1st coordinate direction
!    -my <yg>, where <yg> = number of grid points in the 2nd coordinate direction
!    -bmx <bxg>, where <bxg> = number of grid points under plate in 1st direction
!    -bmy <byg>, where <byg> = number of grid points under plate in 2nd direction
!    -bheight <ht>, where <ht> = height of the plate
!
!/*T
!   Concepts: TAO - Solving a bound constrained minimization problem
!   Routines: TaoInitialize(); TaoFinalize();
!   Routines: TaoCreate(); TaoDestroy();
!   Routines: TaoAppSetObjectiveAndGradientRoutine(); 
!   Routines: TaoAppSetHessianMat(); TaoAppSetHessianRoutine();
!   Routines: TaoAppSetInitialSolutionVec(); TaoAppSetVariableBounds();
!   Routines: TaoSetOptions();
!   Routines: TaoApplicationCreate(); TaoSolve();
!   Routines: TaoView(); TaoAppDestroy();
!   Processors: n
!T*/



      implicit none

#include "plate2f.h"

! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
!                   Variable declarations
! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
!
!  Variables:
!    (common from plate2f.h):
!    Nx, Ny           number of processors in x- and y- directions
!    mx, my           number of grid points in x,y directions
!    N    global dimension of vector

      integer          info          ! used to check for functions returning nonzeros
      Vec              x             ! solution vector
      Vec              xl, xu        ! lower and upper bounds vectorsp
      integer          m             ! number of local elements in vector
      TAO_SOLVER       tao           ! TAO_SOLVER solver context
      TAO_APPLICATION  plateapp      ! PETSc application
      Mat              H             ! Hessian matrix
      ISLocalToGlobalMapping isltog  ! local to global mapping object
      PetscTruth       flg


      external FormFunctionGradient
      external FormHessian
      external MSA_BoundaryConditions
      external MSA_Plate
      external MSA_InitialPoint
! Initialize Tao
      call PetscInitialize(PETSC_NULL_CHARACTER,info)
      call TaoInitialize(PETSC_NULL_CHARACTER,info)
      

! Specify default dimensions of the problem
      mx = 10
      my = 10
      bheight = 0.1

! Check for any command line arguments that override defaults      
      
      call PetscOptionsGetInt(TAO_NULL_CHARACTER,"-mx",mx,flg,info)
      call PetscOptionsGetInt(TAO_NULL_CHARACTER,"-my",my,flg,info)
      
      bmx = mx/2
      bmy = my/2

      call PetscOptionsGetInt(TAO_NULL_CHARACTER,"-bmx",bmx,flg,info)
      call PetscOptionsGetInt(TAO_NULL_CHARACTER,"-bmy",bmy,flg,info)
      call PetscOptionsGetReal(TAO_NULL_CHARACTER,"-bheight",bheight,   &
     &      flg,info)
      

! Calculate any derived values from parameters
      N = mx*my

! Let Petsc determine the dimensions of the local vectors 
      Nx = PETSC_DECIDE
      NY = PETSC_DECIDE

! A two dimensional distributed array will help define this problem, which
! derives from an elliptic PDE on a two-dimensional domain.  From the 
! distributed array, create the vectors

      call DACreate2d(MPI_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_BOX,         &
     &     mx,my,Nx,Ny,1,1,PETSC_NULL_INTEGER,PETSC_NULL_INTEGER,           &
     &     da,info)
      

! Extract global and local vectors from DA; The local vectors are
! used solely as work space for the evaluation of the function, 
! gradient, and Hessian.  Duplicate for remaining vectors that are 
! the same types.

      call DACreateGlobalVector(da,x,info)
      call DACreateLocalVector(da,localX,info)
      call VecDuplicate(localX,localV,info)

! Create a matrix data structure to store the Hessian.
! Here we (optionally) also associate the local numbering scheme
! with the matrix so that later we can use local indices for matrix
! assembly

      call VecGetLocalSize(x,m,info)
      call MatCreateMPIAIJ(MPI_COMM_WORLD,m,m,N,N,7,PETSC_NULL_INTEGER,  &
     &     3,PETSC_NULL_INTEGER,H,info)

      call MatSetOption(H,MAT_SYMMETRIC,info)
      call DAGetISLocalToGlobalMapping(da,isltog,info)
      call MatSetLocalToGlobalMapping(H,isltog,info)
      

! The Tao code begins here
! Create TAO solver and set desired solution method.
! This problems uses bounded variables, so the
! method must either be 'tao_tron' or 'tao_blmvm'

      call TaoCreate(MPI_COMM_WORLD,'tao_blmvm',tao,info)
      call TaoApplicationCreate(MPI_COMM_WORLD,plateapp,info)
      

!     Set minimization function and gradient, hessian evaluation functions

!     TaoAppSetObjectiveAndGradientRoutine is shortened to 31 chars to comply with some compilers
      call TaoAppSetObjectiveAndGradientRo(plateapp,                    &
     &     FormFunctionGradient,PETSC_NULL_OBJECT,info)

      call TaoAppSetHessianMat(plateapp,H,H,info)
      call TaoAppSetHessianRoutine(plateapp,FormHessian,                 &
     &     PETSC_NULL_OBJECT, info)
      

! Set Variable bounds 
      call MSA_BoundaryConditions(info)
      call VecDuplicate(x,xl,info)
      call VecDuplicate(x,xu,info)
      call MSA_Plate(xl,xu,info)
      call TaoAppSetVariableBounds(plateapp,xl,xu,info)

! Set the initial solution guess
      call MSA_InitialPoint(x, info)
      call TaoAppSetInitialSolutionVec(plateapp,x,info)

! Check for any tao command line options
      call TaoSetOptions(plateapp,tao,info)

! Solve the application
      call TaoSolveApplication(plateapp,tao,info)

!     View TAO solver information
!      call TaoView(tao,info)

! Free TAO data structures
      call TaoDestroy(tao,info)
      call TaoAppDestroy(plateapp,info)  

! Free PETSc data structures
      call VecDestroy(x,info)
      call VecDestroy(xl,info)
      call VecDestroy(xu,info)
      call VecDestroy(Top,info)
      call VecDestroy(Bottom,info)
      call VecDestroy(Left,info)
      call VecDestroy(Right,info)
      call MatDestroy(H,info)
      call VecDestroy(localX,info)
      call VecDestroy(localV,info)
      call DADestroy(da,info)

! Finalize TAO 

      call TaoFinalize(info)
      call PetscFinalize(info)

      end

! ---------------------------------------------------------------------
!
!  FormFunctionGradient - Evaluates function f(X). 
!    
!  Input Parameters:
!  tao   - the TAO_SOLVER context
!  X     - the input vector 
!  dummy - optional user-defined context, as set by TaoSetFunction()
!          (not used here)
!
!  Output Parameters:
!  fcn     - the newly evaluated function
!  G       - the gradient vector
!  info  - error code
!


      subroutine FormFunctionGradient(tao,X,fcn,G,dummy,info)
      implicit none

! da, localX, localG, Top, Bottom, Left, Right defined in plate2f.h
#include "plate2f.h"
      
! Input/output variables

      TAO_SOLVER       tao
      PetscScalar fcn
      Vec              X, G
      integer          dummy, info
      
      integer          i,j,row
      integer          xs, xm, gxs, gxm, ys, ym, gys, gym
      PetscScalar      ft,zero,hx,hy,hydhx,hxdhy
      PetscScalar      area,rhx,rhy
      PetscScalar      f1,f2,f3,f4,f5,f6,d1,d2,d3
      PetscScalar      d4,d5,d6,d7,d8
      PetscScalar      df1dxc,df2dxc,df3dxc,df4dxc
      PetscScalar      df5dxc,df6dxc
      PetscScalar      xc,xl,xr,xt,xb,xlt,xrb


! PETSc's VecGetArray acts differently in Fortran than it does in C.
! Calling VecGetArray((Vec) X, (PetscScalar) x_array(0:1), (PetscOffset) x_index, info)
! will return an array of doubles referenced by x_array offset by x_index.
!  i.e.,  to reference the kth element of X, use x_array(k + x_index).
! Notice that by declaring the arrays with range (0:1), we are using the C 0-indexing practice.
      PetscScalar      g_v(0:1),x_v(0:1)
      PetscScalar      top_v(0:1),left_v(0:1)
      PetscScalar      right_v(0:1),bottom_v(0:1)
      PetscOffset      g_i,left_i,right_i
      PetscOffset      bottom_i,top_i,x_i

      ft = 0.0d0
      zero = 0.0d0
      hx = 1.0d0/(mx + 1)
      hy = 1.0d0/(my + 1)
      hydhx = hy/hx
      hxdhy = hx/hy
      area = 0.5d0 * hx * hy
      rhx = mx + 1.0d0
      rhy = my + 1.0d0


! Get local mesh boundaries
      call DAGetCorners(da,xs,ys,PETSC_NULL_INTEGER,xm,ym,              &
     &                  PETSC_NULL_INTEGER,info)
      call DAGetGhostCorners(da,gxs,gys,PETSC_NULL_INTEGER,             &
     &                       gxm,gym,PETSC_NULL_INTEGER,info)

! Scatter ghost points to local vector
      call DAGlobalToLocalBegin(da,X,INSERT_VALUES,localX,info)
      call DAGlobalToLocalEnd(da,X,INSERT_VALUES,localX,info)

! Initialize the vector to zero
      call VecSet(localV,zero,info)

! Get arrays to vector data (See note above about using VecGetArray in Fortran)
      call VecGetArray(localX,x_v,x_i,info)
      call VecGetArray(localV,g_v,g_i,info)
      call VecGetArray(Top,top_v,top_i,info)
      call VecGetArray(Bottom,bottom_v,bottom_i,info)
      call VecGetArray(Left,left_v,left_i,info)
      call VecGetArray(Right,right_v,right_i,info)

! Compute function over the locally owned part of the mesh       
      do j = ys,ys+ym-1
         do i = xs,xs+xm-1
            row = (j-gys)*gxm + (i-gxs)
            xc = x_v(row+x_i)
            xt = xc
            xb = xc
            xr = xc
            xl = xc
            xrb = xc
            xlt = xc

            if (i .eq. 0) then !left side
               xl = left_v(j - ys + 1 + left_i)
               xlt = left_v(j - ys + 2 + left_i)
            else
               xl = x_v(row - 1 + x_i)
            endif

            if (j .eq. 0) then !bottom side
               xb = bottom_v(i - xs + 1 + bottom_i)
               xrb = bottom_v(i - xs + 2 + bottom_i)
            else
               xb = x_v(row - gxm + x_i)
            endif

            if (i + 1 .eq. gxs + gxm) then !right side
               xr = right_v(j - ys + 1 + right_i)
               xrb = right_v(j - ys + right_i)
            else
               xr = x_v(row + 1 + x_i)
            endif

            if (j + 1 .eq. gys + gym) then !top side
               xt = top_v(i - xs + 1 + top_i)
               xlt = top_v(i - xs + top_i)
            else
               xt = x_v(row + gxm + x_i)
            endif

            if ((i .gt. gxs ) .and. (j + 1 .lt. gys + gym)) then
               xlt = x_v(row - 1 + gxm + x_i)
            endif

            if ((j .gt. gys) .and. (i + 1 .lt. gxs + gxm)) then
               xrb = x_v(row + 1 - gxm + x_i)
            endif

            d1 = xc-xl
            d2 = xc-xr
            d3 = xc-xt
            d4 = xc-xb
            d5 = xr-xrb
            d6 = xrb-xb
            d7 = xlt-xl
            d8 = xt-xlt

            df1dxc = d1 * hydhx
            df2dxc = d1 * hydhx + d4 * hxdhy
            df3dxc = d3 * hxdhy
            df4dxc = d2 * hydhx + d3 * hxdhy
            df5dxc = d2 * hydhx
            df6dxc = d4 * hxdhy

            d1 = d1 * rhx
            d2 = d2 * rhx
            d3 = d3 * rhy
            d4 = d4 * rhy
            d5 = d5 * rhy
            d6 = d6 * rhx
            d7 = d7 * rhy
            d8 = d8 * rhx

            f1 = sqrt(1.0d0 + d1*d1 + d7*d7)
            f2 = sqrt(1.0d0 + d1*d1 + d4*d4)
            f3 = sqrt(1.0d0 + d3*d3 + d8*d8)
            f4 = sqrt(1.0d0 + d3*d3 + d2*d2)
            f5 = sqrt(1.0d0 + d2*d2 + d5*d5)
            f6 = sqrt(1.0d0 + d4*d4 + d6*d6)

            ft = ft + f2 + f4

            df1dxc = df1dxc / f1
            df2dxc = df2dxc / f2
            df3dxc = df3dxc / f3
            df4dxc = df4dxc / f4
            df5dxc = df5dxc / f5
            df6dxc = df6dxc / f6

            g_v(row + g_i) = 0.5 * (df1dxc + df2dxc + df3dxc + df4dxc +  &
     &                              df5dxc + df6dxc)
         enddo
      enddo

! Compute triangular areas along the border of the domain. 
      if (xs .eq. 0) then  ! left side
         do j=ys,ys+ym-1
            d3 = (left_v(j-ys+1+left_i) - left_v(j-ys+2+left_i))         &
     &                 * rhy
            d2 = (left_v(j-ys+1+left_i) - x_v((j-gys)*gxm + x_i))        &
     &                 * rhx
            ft = ft + sqrt(1.0d0 + d3*d3 + d2*d2)
         enddo
      endif

      
      if (ys .eq. 0) then !bottom side
         do i=xs,xs+xm-1
            d2 = (bottom_v(i+1-xs+bottom_i)-bottom_v(i-xs+2+bottom_i))    &
     &                    * rhx
            d3 = (bottom_v(i-xs+1+bottom_i)-x_v(i-gxs+x_i))*rhy
            ft = ft + sqrt(1.0 + d3*d3 + d2*d2)
         enddo
      endif

      
      if (xs + xm .eq. mx) then ! right side
         do j=ys,ys+ym-1
            d1 = (x_v((j+1-gys)*gxm-1+x_i)-right_v(j-ys+1+right_i))*rhx
            d4 = (right_v(j-ys+right_i) - right_v(j-ys+1+right_i))*rhy
            ft = ft + sqrt(1.0d0 + d1*d1 + d4*d4)
         enddo
      endif

      
      if (ys + ym .eq. my) then
         do i=xs,xs+xm-1
            d1 = (x_v((gym-1)*gxm+i-gxs+x_i) - top_v(i-xs+1+top_i))*rhy
            d4 = (top_v(i-xs+1+top_i) - top_v(i-xs+top_i))*rhx
            ft = ft + sqrt(1.0d0 + d1*d1 + d4*d4)
         enddo
      endif

      
      if ((ys .eq. 0) .and. (xs .eq. 0)) then
         d1 = (left_v(0 + left_i) - left_v(1 + left_i)) * rhy
         d2 = (bottom_v(0+bottom_i)-bottom_v(1+bottom_i))*rhx
         ft = ft + sqrt(1.0d0 + d1*d1 + d2*d2)
      endif

      if ((ys + ym .eq. my) .and. (xs + xm .eq. mx)) then
         d1 = (right_v(ym+1+right_i) - right_v(ym+right_i))*rhy
         d2 = (top_v(xm+1+top_i) - top_v(xm + top_i))*rhx
         ft = ft + sqrt(1.0d0 + d1*d1 + d2*d2)
      endif

      ft = ft * area
      call MPI_Allreduce(ft,fcn,1,MPI_DOUBLE_PRECISION,                  &
     &             MPI_SUM,MPI_COMM_WORLD,info)



! Restore vectors
      call VecRestoreArray(localX,x_v,x_i,info)
      call VecRestoreArray(localV,g_v,g_i,info)
      call VecRestoreArray(Left,left_v,left_i,info)
      call VecRestoreArray(Top,top_v,top_i,info)
      call VecRestoreArray(Bottom,bottom_v,bottom_i,info)
      call VecRestoreArray(Right,right_v,right_i,info)

! Scatter values to global vector
      call DALocalToGlobal(da,localV,INSERT_VALUES,G,info)

      call PetscLogFlops(70*xm*ym,info)

      return
      end  !FormFunctionGradient
      




! ----------------------------------------------------------------------------
! 
!/*
!   FormHessian - Evaluates Hessian matrix.
!
!   Input Parameters:
!.  tao  - the TAO_SOLVER context
!.  X    - input vector
!.  dummy  - not used 
!
!   Output Parameters:
!.  Hessian    - Hessian matrix
!.  Hpc    - optionally different preconditioning matrix
!.  flag - flag indicating matrix structure
!
!   Notes:
!   Due to mesh point reordering with DAs, we must always work
!   with the local mesh points, and then transform them to the new
!   global numbering with the local-to-global mapping.  We cannot work
!   directly with the global numbers for the original uniprocessor mesh!  
!
!   Two methods are available for imposing this transformation
!   when setting matrix entries:
!     (A) MatSetValuesLocal(), using the local ordering (including
!         ghost points!)
!         - Do the following two steps once, before calling TaoSolve()
!           - Use DAGetISLocalToGlobalMapping() to extract the
!             local-to-global map from the DA
!           - Associate this map with the matrix by calling
!             MatSetLocalToGlobalMapping() 
!         - Then set matrix entries using the local ordering
!           by calling MatSetValuesLocal()
!     (B) MatSetValues(), using the global ordering 
!         - Use DAGetGlobalIndices() to extract the local-to-global map
!         - Then apply this map explicitly yourself
!         - Set matrix entries using the global ordering by calling
!           MatSetValues()
!   Option (A) seems cleaner/easier in many cases, and is the procedure
!   used in this example.
*/
      subroutine FormHessian(tao, X, Hessian, Hpc, flg, dummy, info)
      implicit none

! da,Top,Left,Right,Bottom,mx,my,localX defined in plate2f.h
#include "plate2f.h"
      
      TAO_SOLVER     tao
      Vec            X
      Mat            Hessian,Hpc
      MatStructure   flg
      integer        dummy,info

      integer        i,j,k,row
      integer        xs,xm,gxs,gxm,ys,ym,gys,gym,col(0:6)
      PetscScalar    hx,hy,hydhx,hxdhy,rhx,rhy
      PetscScalar    f1,f2,f3,f4,f5,f6,d1,d2,d3
      PetscScalar    d4,d5,d6,d7,d8
      PetscScalar    xc,xl,xr,xt,xb,xlt,xrb
      PetscScalar    hl,hr,ht,hb,hc,htl,hbr

! PETSc's VecGetArray acts differently in Fortran than it does in C.
! Calling VecGetArray((Vec) X, (PetscScalar) x_array(0:1), (PetscOffset) x_index, info)
! will return an array of doubles referenced by x_array offset by x_index.
!  i.e.,  to reference the kth element of X, use x_array(k + x_index).
! Notice that by declaring the arrays with range (0:1), we are using the C 0-indexing practice.
      PetscScalar   right_v(0:1),left_v(0:1)
      PetscScalar   bottom_v(0:1),top_v(0:1)
      PetscScalar   x_v(0:1)
      PetscOffset   x_i,right_i,left_i
      PetscOffset   bottom_i,top_i
      PetscScalar   v(0:6)
      PetscTruth    assembled
      
! Set various matrix options 
      call MatSetOption(Hessian,MAT_IGNORE_OFF_PROC_ENTRIES,info)
      call MatSetOption(Hessian,MAT_COLUMNS_SORTED,info)
      call MatSetOption(Hessian,MAT_ROWS_SORTED,info)

! Get local mesh boundaries
      call DAGetCorners(da,xs,ys,PETSC_NULL_INTEGER,xm,ym,              &
     &                  PETSC_NULL_INTEGER,info)
      call DAGetGhostCorners(da,gxs,gys,PETSC_NULL_INTEGER,gxm,gym,     &
     &                       PETSC_NULL_INTEGER,info)

! Scatter ghost points to local vectors
      call DAGlobalToLocalBegin(da,X,INSERT_VALUES,localX,info)
      call DAGlobalToLocalEnd(da,X,INSERT_VALUES,localX,info)

! Get pointers to vector data (see note on Fortran arrays above)
      call VecGetArray(localX,x_v,x_i,info)
      call VecGetArray(Top,top_v,top_i,info)
      call VecGetArray(Bottom,bottom_v,bottom_i,info)
      call VecGetArray(Left,left_v,left_i,info)
      call VecGetArray(Right,right_v,right_i,info)

! Initialize matrix entries to zero
      call MatAssembled(Hessian,assembled,info)
      if (assembled .eq. PETSC_TRUE) call MatZeroEntries(Hessian,info)


      rhx = mx + 1.0
      rhy = my + 1.0
      hx = 1.0/rhx
      hy = 1.0/rhy
      hydhx = hy/hx
      hxdhy = hx/hy
! compute Hessian over the locally owned part of the mesh

      do  i=xs,xs+xm-1
         do  j=ys,ys+ym-1
            row = (j-gys)*gxm + (i-gxs)
            
            xc = x_v(row + x_i)
            xt = xc
            xb = xc
            xr = xc
            xl = xc
            xrb = xc
            xlt = xc

            if (i .eq. gxs) then   ! Left side
               xl = left_v(left_i + j - ys + 1)
               xlt = left_v(left_i + j - ys + 2)
            else
               xl = x_v(x_i + row -1 )
            endif

            if (j .eq. gys) then ! bottom side
               xb = bottom_v(bottom_i + i - xs + 1)
               xrb = bottom_v(bottom_i + i - xs + 2)
            else
               xb = x_v(x_i + row - gxm)
            endif

            if (i+1 .eq. gxs + gxm) then !right side
               xr = right_v(right_i + j - ys + 1)
               xrb = right_v(right_i + j - ys)
            else
               xr = x_v(x_i + row + 1)
            endif

            if (j+1 .eq. gym+gys) then !top side
               xt = top_v(top_i +i - xs + 1)
               xlt = top_v(top_i + i - xs)
            else
               xt = x_v(x_i + row + gxm)
            endif

            if ((i .gt. gxs) .and. (j+1 .lt. gys+gym)) then
               xlt = x_v(x_i + row - 1 + gxm)
            endif
            
            if ((i+1 .lt. gxs+gxm) .and. (j .gt. gys)) then
               xrb = x_v(x_i + row + 1 - gxm)
            endif

            d1 = (xc-xl)*rhx
            d2 = (xc-xr)*rhx
            d3 = (xc-xt)*rhy
            d4 = (xc-xb)*rhy
            d5 = (xrb-xr)*rhy
            d6 = (xrb-xb)*rhx
            d7 = (xlt-xl)*rhy
            d8 = (xlt-xt)*rhx
            
            f1 = sqrt( 1.0d0 + d1*d1 + d7*d7)
            f2 = sqrt( 1.0d0 + d1*d1 + d4*d4)
            f3 = sqrt( 1.0d0 + d3*d3 + d8*d8)
            f4 = sqrt( 1.0d0 + d3*d3 + d2*d2)
            f5 = sqrt( 1.0d0 + d2*d2 + d5*d5)
            f6 = sqrt( 1.0d0 + d4*d4 + d6*d6)
            
            
            hl = (-hydhx*(1.0+d7*d7)+d1*d7)/(f1*f1*f1)+                 &
     &              (-hydhx*(1.0+d4*d4)+d1*d4)/(f2*f2*f2)

            hr = (-hydhx*(1.0+d5*d5)+d2*d5)/(f5*f5*f5)+                 &
     &            (-hydhx*(1.0+d3*d3)+d2*d3)/(f4*f4*f4)

            ht = (-hxdhy*(1.0+d8*d8)+d3*d8)/(f3*f3*f3)+                 &
     &                (-hxdhy*(1.0+d2*d2)+d2*d3)/(f4*f4*f4)

            hb = (-hxdhy*(1.0+d6*d6)+d4*d6)/(f6*f6*f6)+                 &
     &              (-hxdhy*(1.0+d1*d1)+d1*d4)/(f2*f2*f2)
            
            hbr = -d2*d5/(f5*f5*f5) - d4*d6/(f6*f6*f6)
            htl = -d1*d7/(f1*f1*f1) - d3*d8/(f3*f3*f3)
            
            hc = hydhx*(1.0+d7*d7)/(f1*f1*f1) +                         &
     &              hxdhy*(1.0+d8*d8)/(f3*f3*f3) +                      &
     &              hydhx*(1.0+d5*d5)/(f5*f5*f5) +                      &
     &              hxdhy*(1.0+d6*d6)/(f6*f6*f6) +                      &
     &              (hxdhy*(1.0+d1*d1)+hydhx*(1.0+d4*d4)-               &
     &              2*d1*d4)/(f2*f2*f2) +  (hxdhy*(1.0+d2*d2)+          &
     &              hydhx*(1.0+d3*d3)-2*d2*d3)/(f4*f4*f4)               
            
            hl = hl * 0.5
            hr = hr * 0.5
            ht = ht * 0.5
            hb = hb * 0.5
            hbr = hbr * 0.5
            htl = htl * 0.5
            hc = hc * 0.5 

            k = 0

            if (j .gt. 0) then
               v(k) = hb
               col(k) = row - gxm
               k=k+1
            endif

            if ((j .gt. 0) .and. (i .lt. mx-1)) then
               v(k) = hbr
               col(k) = row-gxm+1
               k=k+1
            endif

            if (i .gt. 0) then
               v(k) = hl
               col(k) = row - 1
               k = k+1
            endif

            v(k) = hc
            col(k) = row
            k=k+1

            if (i .lt. mx-1) then
               v(k) = hr
               col(k) = row + 1
               k=k+1
            endif

            if ((i .gt. 0) .and. (j .lt. my-1)) then
               v(k) = htl
               col(k) = row + gxm - 1
               k=k+1
            endif

            if (j .lt. my-1) then
               v(k) = ht
               col(k) = row + gxm
               k=k+1
            endif

! Set matrix values using local numbering, defined earlier in main routine
            call MatSetValuesLocal(Hessian,1,row,k,col,v,INSERT_VALUES,       &
     &                              info)

            

         enddo
      enddo
      
! restore vectors
      call VecRestoreArray(localX,x_v,x_i,info)
      call VecRestoreArray(Left,left_v,left_i,info)
      call VecRestoreArray(Right,right_v,right_i,info)
      call VecRestoreArray(Top,top_v,top_i,info)
      call VecRestoreArray(Bottom,bottom_v,bottom_i,info)


! Assemble the matrix
      call MatAssemblyBegin(Hessian,MAT_FINAL_ASSEMBLY,info)
      call MatAssemblyEnd(Hessian,MAT_FINAL_ASSEMBLY,info)

      call PetscLogFlops(199*xm*ym,info)

      return
      end  
      
      



! Top,Left,Right,Bottom,bheight,mx,my,bmx,bmy,H, defined in plate2f.h

! ----------------------------------------------------------------------------
!
!/*
!     MSA_BoundaryConditions - calculates the boundary conditions for the region
!
!
!*/

      subroutine MSA_BoundaryConditions(info)
      implicit none

! Top,Left,Right,Bottom,bheight,mx,my,bmx,bmy defined in plate2f.h
#include "plate2f.h"

      integer i,j,k,limit,info,maxits
      integer          xs, xm, gxs, gxm, ys, ym, gys, gym
      integer bsize, lsize, tsize, rsize
      PetscScalar      one,two,three,tol
      PetscScalar      scl,fnorm,det,xt
      PetscScalar      yt,hx,hy,u1,u2,nf1,nf2
      PetscScalar      njac11,njac12,njac21,njac22
      PetscScalar      b, t, l, r
      PetscScalar      boundary_v(0:1)
      PetscOffset      boundary_i
      logical exitloop
      TaoTruth flg

      limit=0
      maxits = 5
      tol=1e-10
      b=-0.5d0
      t= 0.5d0
      l=-0.5d0
      r= 0.5d0
      xt=0
      yt=0
      one=1.0d0
      two=2.0d0
      three=3.0d0


      call DAGetCorners(da,xs,ys,PETSC_NULL_INTEGER,xm,ym,               &
     &                  PETSC_NULL_INTEGER,info)
      call DAGetGhostCorners(da,gxs,gys,PETSC_NULL_INTEGER,              &
     &                       gxm,gym,PETSC_NULL_INTEGER,info)

      bsize = xm + 2
      lsize = ym + 2
      rsize = ym + 2
      tsize = xm + 2
      

      call VecCreateMPI(MPI_COMM_WORLD,bsize,PETSC_DECIDE,Bottom,info)
      call VecCreateMPI(MPI_COMM_WORLD,tsize,PETSC_DECIDE,Top,info)
      call VecCreateMPI(MPI_COMM_WORLD,lsize,PETSC_DECIDE,Left,info)
      call VecCreateMPI(MPI_COMM_WORLD,rsize,PETSC_DECIDE,Right,info)

      hx= (r-l)/(mx+1)
      hy= (t-b)/(my+1)

      do j=0,3
         
         if (j.eq.0) then
            yt=b
            xt=l+hx*xs
            limit=bsize
            call VecGetArray(Bottom,boundary_v,boundary_i,info)
            

         elseif (j.eq.1) then
            yt=t
            xt=l+hx*xs
            limit=tsize
            call VecGetArray(Top,boundary_v,boundary_i,info)

         elseif (j.eq.2) then
            yt=b+hy*ys
            xt=l
            limit=lsize
            call VecGetArray(Left,boundary_v,boundary_i,info)

         elseif (j.eq.3) then
            yt=b+hy*ys
            xt=r
            limit=rsize
            call VecGetArray(Right,boundary_v,boundary_i,info)
         endif
         

         do i=0,limit-1
            
            u1=xt
            u2=-yt
            k = 0
            exitloop = .false.
            do while (k .lt. maxits .and. (.not. exitloop) )

               nf1=u1 + u1*u2*u2 - u1*u1*u1/three-xt
               nf2=-u2 - u1*u1*u2 + u2*u2*u2/three-yt
               fnorm=sqrt(nf1*nf1+nf2*nf2)
               if (fnorm .gt. tol) then
                  njac11=one+u2*u2-u1*u1
                  njac12=two*u1*u2
                  njac21=-two*u1*u2
                  njac22=-one - u1*u1 + u2*u2
                  det = njac11*njac22-njac21*njac12
                  u1 = u1-(njac22*nf1-njac12*nf2)/det
                  u2 = u2-(njac11*nf2-njac21*nf1)/det
               else 
                  exitloop = .true.
               endif
               k=k+1
            enddo

            boundary_v(i + boundary_i) = u1*u1-u2*u2
            if ((j .eq. 0) .or. (j .eq. 1)) then
               xt = xt + hx
            else
               yt = yt + hy
            endif

         enddo
               

         if (j.eq.0) then
            call VecRestoreArray(Bottom,boundary_v,boundary_i,info)
         elseif (j.eq.1) then
            call VecRestoreArray(Top,boundary_v,boundary_i,info)
         elseif (j.eq.2) then
            call VecRestoreArray(Left,boundary_v,boundary_i,info)
         elseif (j.eq.3) then
            call VecRestoreArray(Right,boundary_v,boundary_i,info)
         endif
         
      enddo


! Scale the boundary if desired
      call PetscOptionsGetReal(PETSC_NULL_CHARACTER,"-bottom",            &
     &                         scl,flg,info)
      if (flg .eq. PETSC_TRUE) then
         call VecScale(scl,Bottom,info)
      endif

      call PetscOptionsGetReal(PETSC_NULL_CHARACTER,"-top",               &
     &                         scl,flg,info)
      if (flg .eq. PETSC_TRUE) then
         call VecScale(scl,Top,info)
      endif

      call PetscOptionsGetReal(PETSC_NULL_CHARACTER,"-right",             &
     &                         scl,flg,info)
      if (flg .eq. PETSC_TRUE) then
         call VecScale(scl,Right,info)
      endif

      call PetscOptionsGetReal(PETSC_NULL_CHARACTER,"-left",              &
     &                         scl,flg,info)
      if (flg .eq. PETSC_TRUE) then
         call VecScale(scl,Left,info)
      endif
         
      
      return
      end

! ----------------------------------------------------------------------------
!
!/*
!     MSA_Plate - Calculates an obstacle for surface to stretch over
!
!     Output Parameter:
!.    xl - lower bound vector
!.    xu - upper bound vector
!
!*/

      subroutine MSA_Plate(xl,xu,info)
      implicit none

! mx,my,bmx,bmy,da,bheight defined in plate2f.h
#include "plate2f.h"
      Vec              xl,xu
      integer          i,j,row,info
      integer          xs, xm, ys, ym
      PetscScalar      lb,ub

! PETSc's VecGetArray acts differently in Fortran than it does in C.
! Calling VecGetArray((Vec) X, (PetscScalar) x_array(0:1), (PetscOffset) x_index, info)
! will return an array of doubles referenced by x_array offset by x_index.
!  i.e.,  to reference the kth element of X, use x_array(k + x_index).
! Notice that by declaring the arrays with range (0:1), we are using the C 0-indexing practice.
      PetscScalar      xl_v(0:1)
      PetscOffset      xl_i


      lb = -1.0d300
      ub = 1.0d300

      if (bmy .lt. 0) bmy = 0
      if (bmy .gt. my) bmy = my
      if (bmx .lt. 0) bmx = 0
      if (bmx .gt. mx) bmx = mx
      

      call DAGetCorners(da,xs,ys,PETSC_NULL_INTEGER,xm,ym,               &
     &             PETSC_NULL_INTEGER,info)

      call VecSet(xl,lb,info)
      call VecSet(xu,ub,info)

      call VecGetArray(xl,xl_v,xl_i,info)
      

      do i=xs,xs+xm-1

         do j=ys,ys+ym-1
            
            row=(j-ys)*xm + (i-xs)

            if (i.ge.((mx-bmx)/2) .and. i.lt.(mx-(mx-bmx)/2) .and.           &
     &          j.ge.((my-bmy)/2) .and. j.lt.(my-(my-bmy)/2)) then  
               xl_v(xl_i+row) = bheight

            endif

         enddo
      enddo


      call VecRestoreArray(xl,xl_v,xl_i,info)
      
      return
      end



      
      
! ----------------------------------------------------------------------------
!
!/*
!     MSA_InitialPoint - Calculates an obstacle for surface to stretch over
!
!     Output Parameter:
!.    X - vector for initial guess
!
!*/

      subroutine MSA_InitialPoint(X, info)
      implicit none

! mx,my,localX,da,Top,Left,Bottom,Right defined in plate2f.h
#include "plate2f.h"
      Vec               X

      integer           start,i,j,info
      integer           row,xs,xm,gxs,gxm,ys,ym,gys,gym
      PetscScalar       zero, np5

! PETSc's VecGetArray acts differently in Fortran than it does in C.
! Calling VecGetArray((Vec) X, (PetscScalar) x_array(0:1), (integer) x_index, info)
! will return an array of doubles referenced by x_array offset by x_index.
!  i.e.,  to reference the kth element of X, use x_array(k + x_index).
! Notice that by declaring the arrays with range (0:1), we are using the C 0-indexing practice.
      PetscScalar   left_v(0:1),right_v(0:1)
      PetscScalar   bottom_v(0:1),top_v(0:1)
      PetscScalar   x_v(0:1)
      PetscOffset   left_i, right_i, top_i
      PetscOffset   bottom_i,x_i
      PetscTruth    flg
      PetscRandom   rctx
      
      zero = 0.0d0
      np5 = -0.5d0

      call PetscOptionsGetInt(PETSC_NULL_CHARACTER,"-start",            &
     &                        start,flg,info)

      if ((flg .eq. PETSC_TRUE) .and. (start .eq. 0)) then  ! the zero vector is reasonable
         call VecSet(X,zero,info)

      elseif ((flg .eq. PETSC_TRUE) .and. (start .gt. 0)) then  ! random start -0.5 < xi < 0.5 
         call PetscRandomCreate(MPI_COMM_WORLD,rctx,info)
         do i=0,start-1
            call VecSetRandom(X,rctx,info)
         enddo

         call PetscRandomDestroy(rctx,info)
         call VecShift(X,np5,info)

      else   ! average of boundary conditions
         
!        Get Local mesh boundaries
         call DAGetCorners(da,xs,ys,PETSC_NULL_INTEGER,xm,ym,             &
     &                     PETSC_NULL_INTEGER,info) 
         call DAGetGhostCorners(da,gxs,gys,PETSC_NULL_INTEGER,gxm,gym,    &
     &                     PETSC_NULL_INTEGER,info)



!        Get pointers to vector data
         call VecGetArray(Top,top_v,top_i,info)
         call VecGetArray(Bottom,bottom_v,bottom_i,info)
         call VecGetArray(Left,left_v,left_i,info)
         call VecGetArray(Right,right_v,right_i,info)
         
         call VecGetArray(localX,x_v,x_i,info)
      
!        Perform local computations 
         do  j=ys,ys+ym-1
            do i=xs,xs+xm-1
               row = (j-gys)*gxm  + (i-gxs)
               x_v(x_i + row) = ((j+1)*bottom_v(bottom_i +i-xs+1)/my        &
     &             + (my-j+1)*top_v(top_i+i-xs+1)/(my+2) +                  &
     &              (i+1)*left_v(left_i+j-ys+1)/mx       +                  &
     &              (mx-i+1)*right_v(right_i+j-ys+1)/(mx+2))*0.5
            enddo
         enddo

!        Restore vectors
         call VecRestoreArray(localX,x_v,x_i,info)

         call VecRestoreArray(Left,left_v,left_i,info)
         call VecRestoreArray(Top,top_v,top_i,info)
         call VecRestoreArray(Bottom,bottom_v,bottom_i,info)
         call VecRestoreArray(Right,right_v,right_i,info)

         call DALocalToGlobal(da,localX,INSERT_VALUES,X,info)

      endif

      return
      end