Home

Context Navigation

← Previous Change
Next Change →

poisfft.f90

Timestamp:

Aug 15, 2013 8:46:27 AM (11 years ago)

Author:

raasch

Message:

tridia-solver moved to seperate module; the tridiagonal matrix coefficients of array tri are calculated only once at the beginning

File:

: 1 edited

palm/trunk/SOURCE/poisfft.f90 (modified) (5 diffs)

Legend:

: Unmodified
: Added
: Removed

palm/trunk/SOURCE/poisfft.f90

-                      r1209
+                      r1212
 ! Current revisions:
 ! -----------------
+!
+! tridia routines moved to seperate module tridia_solver
+!
 ! Former revisions:
 …
     USE indices
     USE transpose_indices
+    USE tridia_solver
     IMPLICIT NONE
     LOGICAL, SAVE ::  poisfft_initialized = .FALSE.
-    REAL, DIMENSION(:,:), ALLOCATABLE ::  ddzuw
     PRIVATE
 …
        CALL fft_init
+       ALLOCATE( ddzuw(0:nz-1,3) )
+       DO  k = 0, nz-1
+          ddzuw(k,1) = ddzu_pres(k+1) * ddzw(k+1)
+          ddzuw(k,2) = ddzu_pres(k+2) * ddzw(k+1)
+          ddzuw(k,3) = -1.0 * &
+                       ( ddzu_pres(k+2) * ddzw(k+1) + ddzu_pres(k+1) * ddzw(k+1) )
+       ENDDO
+!
+!--    Calculate constant coefficients of the tridiagonal matrix
+#if ! defined ( __check )
+       CALL maketri
+#endif
+       CALL tridia_init
        poisfft_initialized = .TRUE.
 …
 !--       Solve the tridiagonal equation system along z
           CALL cpu_log( log_point_s(6), 'tridia', 'start' )
           CALL tridia( ar )
+          CALL tridia_substi( ar )
           CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
 …
     END SUBROUTINE ffty_tri_ffty
-    SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
-!------------------------------------------------------------------------------!
-! Solves the linear system of equations for a 1d-decomposition along x (see
-! tridia)
+!
-! Attention:  when using the intel compilers older than 12.0, array tri must
-!             be passed as an argument to the contained subroutines. Otherwise
-!             addres faults will occur. This feature can be activated with
-!             cpp-switch __intel11
-!             On NEC, tri should not be passed (except for routine substi_1dd)
-!             because this causes very bad performance.
-!------------------------------------------------------------------------------!
-       USE arrays_3d
-       USE control_parameters
-       USE pegrid
-       IMPLICIT NONE
-       INTEGER ::  i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
-       REAL    ::  ddx2, ddy2
-       REAL, DIMENSION(0:nx,1:nz)     ::  ar
-       REAL, DIMENSION(5,0:nx,0:nz-1) ::  tri
-       nnyh = ( ny + 1 ) / 2
+!
-!--    Define constant elements of the tridiagonal matrix.
-!--    The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
-!--    the exchanged loops create bank conflicts. The following directive
-!--    prohibits loop exchange and the loops perform much better.
-!       tn = omp_get_thread_num()
-!       WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
-!       CALL local_flush( 120+tn )
-!CDIR NOLOOPCHG
-       DO  k = 0, nz-1
-          DO  i = 0,nx
-             tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
-             tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
-          ENDDO
-       ENDDO
-!       WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop   thread=', omp_get_thread_num()
-!       CALL local_flush( 120+tn )
-       IF ( j <= nnyh )  THEN
-#if defined( __intel11 )
-          CALL maketri_1dd( j, tri )
-#else
-          CALL maketri_1dd( j )
 #endif
-       ELSE
-#if defined( __intel11 )
-          CALL maketri_1dd( ny+1-j, tri )
-#else
-          CALL maketri_1dd( ny+1-j )
-#endif
-       ENDIF
-#if defined( __intel11 )
-       CALL split_1dd( tri )
-#else
-       CALL split_1dd
-#endif
-       CALL substi_1dd( ar, tri )
-    CONTAINS
-#if defined( __intel11 )
-       SUBROUTINE maketri_1dd( j, tri )
-#else
-       SUBROUTINE maketri_1dd( j )
-#endif
-!------------------------------------------------------------------------------!
-! computes the i- and j-dependent component of the matrix
-!------------------------------------------------------------------------------!
-          USE constants
-          IMPLICIT NONE
-          INTEGER ::  i, j, k, nnxh
-          REAL    ::  a, c
-          REAL, DIMENSION(0:nx) ::  l
-#if defined( __intel11 )
-          REAL, DIMENSION(5,0:nx,0:nz-1) ::  tri
-#endif
-          nnxh = ( nx + 1 ) / 2
+!
-!--       Provide the tridiagonal matrix for solution of the Poisson equation in
-!--       Fourier space. The coefficients are computed following the method of
-!--       Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
-!--       Siano's original version by discretizing the Poisson equation,
-!--       before it is Fourier-transformed
-          DO  i = 0, nx
-             IF ( i >= 0 .AND. i <= nnxh ) THEN
-                l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
-                                         REAL( nx+1 ) ) ) * ddx2 + &
-.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
-                                         REAL( ny+1 ) ) ) * ddy2
-             ELSE
-                l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
-                                         REAL( nx+1 ) ) ) * ddx2 + &
-.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
-                                         REAL( ny+1 ) ) ) * ddy2
-             ENDIF
-          ENDDO
-          DO  k = 0, nz-1
-             DO  i = 0, nx
-                a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
-                c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
-                tri(1,i,k) = a + c - l(i)
-             ENDDO
-          ENDDO
-          IF ( ibc_p_b == 1 )  THEN
-             DO  i = 0, nx
-                tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
-             ENDDO
-          ENDIF
-          IF ( ibc_p_t == 1 )  THEN
-             DO  i = 0, nx
-                tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
-             ENDDO
-          ENDIF
-       END SUBROUTINE maketri_1dd
-#if defined( __intel11 )
-       SUBROUTINE split_1dd( tri )
-#else
-       SUBROUTINE split_1dd
-#endif
-!------------------------------------------------------------------------------!
-! Splitting of the tridiagonal matrix (Thomas algorithm)
-!------------------------------------------------------------------------------!
-          IMPLICIT NONE
-          INTEGER ::  i, k
-#if defined( __intel11 )
-          REAL, DIMENSION(5,0:nx,0:nz-1) ::  tri
-#endif
+!
-!--       Splitting
-          DO  i = 0, nx
-             tri(4,i,0) = tri(1,i,0)
-          ENDDO
-          DO  k = 1, nz-1
-             DO  i = 0, nx
-                tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
-                tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
-             ENDDO
-          ENDDO
-       END SUBROUTINE split_1dd
-       SUBROUTINE substi_1dd( ar, tri )
-!------------------------------------------------------------------------------!
-! Substitution (Forward and Backward) (Thomas algorithm)
-!------------------------------------------------------------------------------!
-          IMPLICIT NONE
-          INTEGER ::  i, k
-          REAL, DIMENSION(0:nx,nz)       ::  ar
-          REAL, DIMENSION(0:nx,0:nz-1)   ::  ar1
-          REAL, DIMENSION(5,0:nx,0:nz-1) ::  tri
+!
-!--       Forward substitution
-          DO  i = 0, nx
-             ar1(i,0) = ar(i,1)
-          ENDDO
-          DO  k = 1, nz-1
-             DO  i = 0, nx
-                ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
-             ENDDO
-          ENDDO
+!
-!--       Backward substitution
-!--       Note, the add of 1.0E-20 in the denominator is due to avoid divisions
-!--       by zero appearing if the pressure bc is set to neumann at the top of
-!--       the model domain.
-          DO  i = 0, nx
-             ar(i,nz) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )
-          ENDDO
-          DO  k = nz-2, 0, -1
-             DO  i = 0, nx
-                ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
-                            / tri(4,i,k)
-             ENDDO
-          ENDDO
+!
-!--       Indices i=0, j=0 correspond to horizontally averaged pressure.
-!--       The respective values of ar should be zero at all k-levels if
-!--       acceleration of horizontally averaged vertical velocity is zero.
-          IF ( ibc_p_b == 1  .AND.  ibc_p_t == 1 )  THEN
-             IF ( j == 0 )  THEN
-                DO  k = 1, nz
-                   ar(0,k) = 0.0
-                ENDDO
-             ENDIF
-          ENDIF
-       END SUBROUTINE substi_1dd
-    END SUBROUTINE tridia_1dd
-    SUBROUTINE tridia( ar )
-!------------------------------------------------------------------------------!
-! solves the linear system of equations:
+!
-! -(4 pi^2(i^2/(dx^2*nnx^2)+j^2/(dy^2*nny^2))+
-!   1/(dzu(k)*dzw(k))+1/(dzu(k-1)*dzw(k)))*p(i,j,k)+
-! 1/(dzu(k)*dzw(k))*p(i,j,k+1)+1/(dzu(k-1)*dzw(k))*p(i,j,k-1)=d(i,j,k)
+!
-! by using the Thomas algorithm
-!------------------------------------------------------------------------------!
-       USE arrays_3d
-       IMPLICIT NONE
-       INTEGER ::  i, j, k
-       !$acc declare create( tri )
-       REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1,2) ::  tri
-       REAL    ::  ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)
-       CALL split( tri )
-       CALL substi( ar, tri )
-    END SUBROUTINE tridia
-    SUBROUTINE maketri
-!------------------------------------------------------------------------------!
-! Computes the i- and j-dependent component of the matrix
-!------------------------------------------------------------------------------!
-          USE arrays_3d,  ONLY: tric
-          USE constants
-          USE control_parameters
-          USE grid_variables
-          IMPLICIT NONE
-          INTEGER ::  i, j, k, nnxh, nnyh
-          !$acc declare create( ll )
-          REAL    ::  ll(nxl_z:nxr_z,nys_z:nyn_z)
-          nnxh = ( nx + 1 ) / 2
-          nnyh = ( ny + 1 ) / 2
+!
-!--       Provide the constant coefficients of the tridiagonal matrix for solution
-!--       of the Poisson equation in Fourier space.
-!--       The coefficients are computed following the method of
-!--       Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
-!--       Siano's original version by discretizing the Poisson equation,
-!--       before it is Fourier-transformed.
-          !$acc kernels present( tric )
-          !$acc loop vector( 32 )
-          DO  j = nys_z, nyn_z
-             DO  i = nxl_z, nxr_z
-                IF ( j >= 0  .AND.  j <= nnyh )  THEN
-                   IF ( i >= 0  .AND.  i <= nnxh )  THEN
-                      ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
-                                                  REAL( nx+1 ) ) ) / ( dx * dx ) + &
-.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
-                                                  REAL( ny+1 ) ) ) / ( dy * dy )
-                   ELSE
-                      ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
-                                                  REAL( nx+1 ) ) ) / ( dx * dx ) + &
-.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
-                                                  REAL( ny+1 ) ) ) / ( dy * dy )
-                   ENDIF
-                ELSE
-                   IF ( i >= 0  .AND.  i <= nnxh )  THEN
-                      ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
-                                                  REAL( nx+1 ) ) ) / ( dx * dx ) + &
-.0 * ( 1.0 - COS( ( 2.0 * pi * ( ny+1-j ) ) / &
-                                                  REAL( ny+1 ) ) ) / ( dy * dy )
-                   ELSE
-                      ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
-                                                  REAL( nx+1 ) ) ) / ( dx * dx ) + &
-.0 * ( 1.0 - COS( ( 2.0 * pi * ( ny+1-j ) ) / &
-                                                  REAL( ny+1 ) ) ) / ( dy * dy )
-                   ENDIF
-                ENDIF
-             ENDDO
-          ENDDO
-          !$acc loop
-          DO  k = 0, nz-1
-             DO  j = nys_z, nyn_z
-                !$acc loop vector( 32 )
-                DO  i = nxl_z, nxr_z
-                   tric(i,j,k) = ddzuw(k,3) - ll(i,j)
-                ENDDO
-             ENDDO
-          ENDDO
-          !$acc end kernels
-          IF ( ibc_p_b == 1 )  THEN
-             !$acc kernels present( tric )
-             !$acc loop
-             DO  j = nys_z, nyn_z
-                DO  i = nxl_z, nxr_z
-                   tric(i,j,0) = tric(i,j,0) + ddzuw(0,1)
-                ENDDO
-             ENDDO
-             !$acc end kernels
-          ENDIF
-          IF ( ibc_p_t == 1 )  THEN
-             !$acc kernels present( tric )
-             !$acc loop
-             DO  j = nys_z, nyn_z
-                DO  i = nxl_z, nxr_z
-                   tric(i,j,nz-1) = tric(i,j,nz-1) + ddzuw(nz-1,2)
-                ENDDO
-             ENDDO
-             !$acc end kernels
-          ENDIF
-    END SUBROUTINE maketri
-    SUBROUTINE substi( ar, tri )
-!------------------------------------------------------------------------------!
-! Substitution (Forward and Backward) (Thomas algorithm)
-!------------------------------------------------------------------------------!
-          USE control_parameters
-          IMPLICIT NONE
-          INTEGER ::  i, j, k
-          REAL    ::  ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)
-          REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1,2) ::  tri
-          !$acc declare create( ar1 )
-          REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1)   ::  ar1
+!
-!--       Forward substitution
-          DO  k = 0, nz - 1
-             !$acc kernels present( ar, tri )
-             !$acc loop
-             DO  j = nys_z, nyn_z
-                DO  i = nxl_z, nxr_z
-                   IF ( k == 0 )  THEN
-                      ar1(i,j,k) = ar(i,j,k+1)
-                   ELSE
-                      ar1(i,j,k) = ar(i,j,k+1) - tri(i,j,k,2) * ar1(i,j,k-1)
-                   ENDIF
-                ENDDO
-             ENDDO
-             !$acc end kernels
-          ENDDO
+!
-!--       Backward substitution
-!--       Note, the 1.0E-20 in the denominator is due to avoid divisions
-!--       by zero appearing if the pressure bc is set to neumann at the top of
-!--       the model domain.
-          DO  k = nz-1, 0, -1
-             !$acc kernels present( ar, tri )
-             !$acc loop
-             DO  j = nys_z, nyn_z
-                DO  i = nxl_z, nxr_z
-                   IF ( k == nz-1 )  THEN
-                      ar(i,j,k+1) = ar1(i,j,k) / ( tri(i,j,k,1) + 1.0E-20 )
-                   ELSE
-                      ar(i,j,k+1) = ( ar1(i,j,k) - ddzuw(k,2) * ar(i,j,k+2) ) &
-                              / tri(i,j,k,1)
-                   ENDIF
-                ENDDO
-             ENDDO
-             !$acc end kernels
-          ENDDO
+!
-!--       Indices i=0, j=0 correspond to horizontally averaged pressure.
-!--       The respective values of ar should be zero at all k-levels if
-!--       acceleration of horizontally averaged vertical velocity is zero.
-          IF ( ibc_p_b == 1  .AND.  ibc_p_t == 1 )  THEN
-             IF ( nys_z == 0  .AND.  nxl_z == 0 )  THEN
-                !$acc kernels loop present( ar )
-                DO  k = 1, nz
-                   ar(nxl_z,nys_z,k) = 0.0
-                ENDDO
-             ENDIF
-          ENDIF
-    END SUBROUTINE substi
-    SUBROUTINE split( tri )
-!------------------------------------------------------------------------------!
-! Splitting of the tridiagonal matrix (Thomas algorithm)
-!------------------------------------------------------------------------------!
-          USE arrays_3d,  ONLY: tric
-          IMPLICIT NONE
-          INTEGER ::  i, j, k
-          REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1,2) ::  tri
+!
-!--       Splitting
-          !$acc kernels present( tri, tric )
-          !$acc loop
-          DO  j = nys_z, nyn_z
-             !$acc loop vector( 32 )
-             DO  i = nxl_z, nxr_z
-                tri(i,j,0,1) = tric(i,j,0)
-             ENDDO
-          ENDDO
-          !$acc end kernels
-          DO  k = 1, nz-1
-             !$acc kernels present( tri, tric )
-             !$acc loop
-             DO  j = nys_z, nyn_z
-                !$acc loop vector( 32 )
-                DO  i = nxl_z, nxr_z
-                   tri(i,j,k,2) = ddzuw(k,1) / tri(i,j,k-1,1)
-                   tri(i,j,k,1) = tric(i,j,k) - ddzuw(k-1,2) * tri(i,j,k,2)
-                ENDDO
-             ENDDO
-             !$acc end kernels
-          ENDDO
-    END SUBROUTINE split
-#endif
  END MODULE poisfft_mod

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 1212 for palm/trunk/SOURCE/poisfft.f90

Legend:

palm/trunk/SOURCE/poisfft.f90

Download in other formats: