Home

Context Navigation

← Previous Change
Next Change →

fft_xy_mod.f90

Timestamp:

Aug 24, 2020 4:02:40 PM (4 years ago)

Author:

raasch

Message:

files re-formatted to follow the PALM coding standard

File:

: 1 edited

palm/trunk/SOURCE/fft_xy_mod.f90 (modified) (67 diffs)

Legend:

: Unmodified
: Added
: Removed

palm/trunk/SOURCE/fft_xy_mod.f90

-                      r4370
+                      r4646
 !> @file fft_xy_mod.f90
 !------------------------------------------------------------------------------!
+!--------------------------------------------------------------------------------------------------!
 ! This file is part of the PALM model system.
+!
+! PALM is free software: you can redistribute it and/or modify it under the
+! terms of the GNU General Public License as published by the Free Software
+! Foundation, either version 3 of the License, or (at your option) any later
+! version.
+!
+! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
+! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along with
+! PALM. If not, see <http://www.gnu.org/licenses/>.
+! PALM is free software: you can redistribute it and/or modify it under the terms of the GNU General
+! Public License as published by the Free Software Foundation, either version 3 of the License, or
+! (at your option) any later version.
+!
+! PALM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+! Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along with PALM. If not, see
+! <http://www.gnu.org/licenses/>.
+!
 ! Copyright 1997-2020 Leibniz Universitaet Hannover
 !------------------------------------------------------------------------------!
+!--------------------------------------------------------------------------------------------------!
+!
 ! Current revisions:
 ! -----------------
+!
+!
+!
+!
 ! Former revisions:
 ! -----------------
 ! $Id$
+! file re-formatted to follow the PALM coding standard
+!
+! 4370 2020-01-10 14:00:44Z raasch
 ! bugfix for Temperton-fft usage on GPU
+!
+!
 ! 4366 2020-01-09 08:12:43Z raasch
 ! Vectorized Temperton-fft added
+!
+!
 ! 4360 2020-01-07 11:25:50Z suehring
 ! Corrected "Former revisions" section
+!
+!
 ! 4069 2019-07-01 14:05:51Z Giersch
 ! Code added to avoid compiler warnings
+!
+!
 ! 3655 2019-01-07 16:51:22Z knoop
 ! OpenACC port for SPEC
 …
 !------------------------------------------------------------------------------!
  MODULE fft_xy
     USE control_parameters,                                                    &
+    USE control_parameters,                                                                        &
         ONLY:  fft_method, loop_optimization, message_string
     USE cuda_fft_interfaces
     USE indices,                                                               &
+    USE indices,                                                                                   &
         ONLY:  nx, ny, nz
 …
     USE kinds
     USE singleton,                                                             &
+    USE singleton,                                                                                 &
         ONLY: fftn
     USE temperton_fft
     USE transpose_indices,                                                     &
+    USE transpose_indices,                                                                         &
         ONLY:  nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
 …
     PRIVATE
     PUBLIC fft_x, fft_x_1d, fft_y, fft_y_1d, fft_init, fft_x_m, fft_y_m, f_vec_x, temperton_fft_vec
+    PUBLIC fft_init, f_vec_x, fft_x, fft_x_1d, fft_x_m, fft_y, fft_y_1d, fft_y_m, temperton_fft_vec
     INTEGER(iwp), DIMENSION(:), ALLOCATABLE, SAVE ::  ifax_x  !<
 …
     REAL(wp), SAVE ::  sqr_dnx  !<
     REAL(wp), SAVE ::  sqr_dny  !<
     REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trigs_x  !<
+    REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trigs_x  !<
     REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trigs_y  !<
 …
 #if defined( __ibm )
     INTEGER(iwp), PARAMETER ::  nau1 = 20000  !<
+    INTEGER(iwp), PARAMETER ::  nau1 = 20000  !<
     INTEGER(iwp), PARAMETER ::  nau2 = 22000  !<
+!
+!-- The following working arrays contain tables and have to be "save" and
+!-- shared in OpenMP sense
+    REAL(wp), DIMENSION(nau1), SAVE ::  aux1  !<
+!-- The following working arrays contain tables and have to be "save" and shared in OpenMP sense
+    REAL(wp), DIMENSION(nau1), SAVE ::  aux1  !<
     REAL(wp), DIMENSION(nau1), SAVE ::  auy1  !<
     REAL(wp), DIMENSION(nau1), SAVE ::  aux3  !<
+    REAL(wp), DIMENSION(nau1), SAVE ::  aux3  !<
     REAL(wp), DIMENSION(nau1), SAVE ::  auy3  !<
 #elif defined( __nec_fft )
     INTEGER(iwp), SAVE ::  nz1  !<
     REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trig_xb  !<
     REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trig_xf  !<
+    REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trig_xf  !<
     REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trig_yb  !<
     REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trig_yf  !<
 #elif defined( __cuda_fft )
     INTEGER(C_INT), SAVE ::  plan_xf  !<
 …
 #if defined( __fftw )
     INCLUDE  'fftw3.f03'
+    COMPLEX(KIND=C_DOUBLE_COMPLEX), DIMENSION(:), ALLOCATABLE, SAVE ::  x_out  !<
+    COMPLEX(KIND=C_DOUBLE_COMPLEX), DIMENSION(:), ALLOCATABLE, SAVE ::  y_out  !<
     INTEGER(KIND=C_INT) ::  nx_c  !<
     INTEGER(KIND=C_INT) ::  ny_c  !<
+    COMPLEX(KIND=C_DOUBLE_COMPLEX), DIMENSION(:), ALLOCATABLE, SAVE ::  x_out  !<
+    COMPLEX(KIND=C_DOUBLE_COMPLEX), DIMENSION(:), ALLOCATABLE, SAVE ::         &
+       y_out  !<
+    REAL(KIND=C_DOUBLE), DIMENSION(:), ALLOCATABLE, SAVE ::                    &
+       x_in   !<
+    REAL(KIND=C_DOUBLE), DIMENSION(:), ALLOCATABLE, SAVE ::                    &
+       y_in   !<
+    REAL(KIND=C_DOUBLE), DIMENSION(:), ALLOCATABLE, SAVE ::  x_in   !<
+    REAL(KIND=C_DOUBLE), DIMENSION(:), ALLOCATABLE, SAVE ::  y_in   !<
     !$OMP THREADPRIVATE( x_out, y_out, x_in, y_in )
     TYPE(C_PTR), SAVE ::  plan_xf, plan_xi, plan_yf, plan_yi
 #endif
 …
 !------------------------------------------------------------------------------!
+!--------------------------------------------------------------------------------------------------!
 ! Description:
 ! ------------
 !> @todo Missing subroutine description.
 !------------------------------------------------------------------------------!
+!--------------------------------------------------------------------------------------------------!
     SUBROUTINE fft_init
 …
 !--    in OpenMP sense
 #if defined( __ibm )
+       REAL(wp), DIMENSION(nau2)   ::  aux2   !<
+       REAL(wp), DIMENSION(nau2)   ::  auy2   !<
+       REAL(wp), DIMENSION(nau2)   ::  aux4   !<
+       REAL(wp), DIMENSION(nau2)   ::  auy4   !<
        REAL(wp), DIMENSION(0:nx+2) ::  workx  !<
        REAL(wp), DIMENSION(0:ny+2) ::  worky  !<
-       REAL(wp), DIMENSION(nau2)   ::  aux2   !<
-       REAL(wp), DIMENSION(nau2)   ::  auy2   !<
-       REAL(wp), DIMENSION(nau2)   ::  aux4   !<
-       REAL(wp), DIMENSION(nau2)   ::  auy4   !<
 #elif defined( __nec_fft )
        REAL(wp), DIMENSION(0:nx+3,nz+1)   ::  work_x  !<
 …
        REAL(wp), DIMENSION(6*(nx+3),nz+1) ::  workx   !<
        REAL(wp), DIMENSION(6*(ny+3),nz+1) ::  worky   !<
 #endif
+#endif
+!
 …
+!
 !--       Initialize tables for fft along x
+          CALL DRCFT( 1, workx, 1, workx, 1, nx+1, 1,  1, sqr_dnx, aux1, nau1, &
+                      aux2, nau2 )
+          CALL DCRFT( 1, workx, 1, workx, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, &
+                      aux4, nau2 )
+          CALL DRCFT( 1, workx, 1, workx, 1, nx+1, 1,  1, sqr_dnx, aux1, nau1, aux2, nau2 )
+          CALL DCRFT( 1, workx, 1, workx, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, aux4, nau2 )
+!
 !--       Initialize tables for fft along y
+          CALL DRCFT( 1, worky, 1, worky, 1, ny+1, 1,  1, sqr_dny, auy1, nau1, &
+                      auy2, nau2 )
+          CALL DCRFT( 1, worky, 1, worky, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, &
+                      auy4, nau2 )
+          CALL DRCFT( 1, worky, 1, worky, 1, ny+1, 1,  1, sqr_dny, auy1, nau1, auy2, nau2 )
+          CALL DCRFT( 1, worky, 1, worky, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, auy4, nau2 )
 #elif defined( __nec_fft )
+          message_string = 'fft method "' // TRIM( fft_method) // &
+                           '" currently does not work on NEC'
+          message_string = 'fft method "' // TRIM( fft_method) // '" currently does not work on NEC'
           CALL message( 'fft_init', 'PA0187', 1, 2, 0, 6, 0 )
+          ALLOCATE( trig_xb(2*(nx+1)), trig_xf(2*(nx+1)),                      &
+                    trig_yb(2*(ny+1)), trig_yf(2*(ny+1)) )
+          ALLOCATE( trig_xb(2*(nx+1)), trig_xf(2*(nx+1)), trig_yb(2*(ny+1)), trig_yf(2*(ny+1)) )
           work_x = 0.0_wp
 …
           CALL DZFFT( 0, nx+1, sqr_dnx, work_x, work_x, trig_xf, workx, 0 )
           CALL ZDFFT( 0, nx+1, sqr_dnx, work_x, work_x, trig_xb, workx, 0 )
+          CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4,      &
+                       trig_xf, workx, 0 )
+          CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4,      &
+                       trig_xb, workx, 0 )
+          CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4, trig_xf, workx, 0 )
+          CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4, trig_xb, workx, 0 )
+!
 !--       Initialize tables for fft along y (non-vector and vector case (M))
           CALL DZFFT( 0, ny+1, sqr_dny, work_y, work_y, trig_yf, worky, 0 )
           CALL ZDFFT( 0, ny+1, sqr_dny, work_y, work_y, trig_yb, worky, 0 )
+          CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4,      &
+                       trig_yf, worky, 0 )
+          CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4,      &
+                       trig_yb, worky, 0 )
+          CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4, trig_yf, worky, 0 )
+          CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4, trig_yb, worky, 0 )
 #elif defined( __cuda_fft )
           CALL CUFFTPLAN1D( plan_xf, nx+1, CUFFT_D2Z, (nyn_x-nys_x+1) * (nzt_x-nzb_x+1) )
 …
           ny_c = ny+1
           !$OMP PARALLEL
+          ALLOCATE( x_in(0:nx+2), y_in(0:ny+2), x_out(0:(nx+1)/2),             &
+                    y_out(0:(ny+1)/2) )
+          ALLOCATE( x_in(0:nx+2), y_in(0:ny+2), x_out(0:(nx+1)/2), y_out(0:(ny+1)/2) )
           !$OMP END PARALLEL
           plan_xf = FFTW_PLAN_DFT_R2C_1D( nx_c, x_in, x_out, FFTW_ESTIMATE )
 …
        ELSE
+          message_string = 'fft method "' // TRIM( fft_method) // &
+                           '" not available'
+          message_string = 'fft method "' // TRIM( fft_method) // '" not available'
           CALL message( 'fft_init', 'PA0189', 1, 2, 0, 6, 0 )
        ENDIF
 …
 !------------------------------------------------------------------------------!
+!--------------------------------------------------------------------------------------------------!
 ! Description:
 ! ------------
 !> Fourier-transformation along x-direction.
+!> Fourier-transformation along x-direction.
 !> Version for 2D-decomposition.
 !> It uses internal algorithms (Singleton or Temperton) or
 !> system-specific routines, if they are available
 !------------------------------------------------------------------------------!
+!> It uses internal algorithms (Singleton or Temperton) or system-specific routines, if they are
+!> available.
+!--------------------------------------------------------------------------------------------------!
     SUBROUTINE fft_x( ar, direction, ar_2d, ar_inv )
 …
        CHARACTER (LEN=*) ::  direction  !<
        COMPLEX(wp), DIMENSION(:), ALLOCATABLE ::  cwork  !<
        INTEGER(iwp) ::  i          !<
+       INTEGER(iwp) ::  i          !<
        INTEGER(iwp) ::  ishape(1)  !<
        INTEGER(iwp) ::  j          !<
 …
        LOGICAL ::  forward_fft !<
        REAL(wp), DIMENSION(0:nx+2) ::  work   !<
        REAL(wp), DIMENSION(nx+2)   ::  work1  !<
        REAL(wp), DIMENSION(:,:), ALLOCATABLE           ::  work_vec  !<
        REAL(wp), DIMENSION(0:nx,nys_x:nyn_x), OPTIONAL ::  ar_2d     !<
+       REAL(wp), DIMENSION(0:nx,nys_x:nyn_x,nzb_x:nzt_x)           ::  ar       !<
        REAL(wp), DIMENSION(nys_x:nyn_x,nzb_x:nzt_x,0:nx), OPTIONAL ::  ar_inv   !<
-       REAL(wp), DIMENSION(0:nx,nys_x:nyn_x,nzb_x:nzt_x)           ::  ar       !<
 #if defined( __ibm )
        REAL(wp), DIMENSION(nau2) ::  aux2  !<
+       REAL(wp), DIMENSION(nau2) ::  aux2  !<
        REAL(wp), DIMENSION(nau2) ::  aux4  !<
 #elif defined( __nec_fft )
 …
+!
 !--       Performing the fft with singleton's software works on every system,
 !--       since it is part of the model
+!--       Performing the fft with singleton's software works on every system, since it is part of
+!--       the model.
           ALLOCATE( cwork(0:nx) )
           IF ( forward_fft )   then
+          IF ( forward_fft )  THEN
              !$OMP PARALLEL PRIVATE ( cwork, i, ishape, j, k )
 …
                    cwork(0) = CMPLX( ar(0,j,k), 0.0_wp, KIND=wp )
                    DO  i = 1, (nx+1)/2 - 1
+                      cwork(i)      = CMPLX( ar(i,j,k), -ar(nx+1-i,j,k),       &
+                                             KIND=wp )
+                      cwork(nx+1-i) = CMPLX( ar(i,j,k),  ar(nx+1-i,j,k),       &
+                                             KIND=wp )
+                      cwork(i)      = CMPLX( ar(i,j,k), -ar(nx+1-i,j,k), KIND=wp )
+                      cwork(nx+1-i) = CMPLX( ar(i,j,k),  ar(nx+1-i,j,k), KIND=wp )
                    ENDDO
                    cwork((nx+1)/2) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, KIND=wp )
 …
+!
 !--       Performing the fft with Temperton's software works on every system,
 !--       since it is part of the model
+!--       Performing the fft with Temperton's software works on every system, since it is part of
+!--       the model.
           IF ( forward_fft )  THEN
 …
                       x_out(0) = CMPLX( ar_2d(0,j), 0.0_wp, KIND=wp )
                       DO  i = 1, (nx+1)/2 - 1
+                         x_out(i) = CMPLX( ar_2d(i,j), ar_2d(nx+1-i,j),        &
+                                           KIND=wp )
+                      ENDDO
+                      x_out((nx+1)/2) = CMPLX( ar_2d((nx+1)/2,j), 0.0_wp,      &
+                                               KIND=wp )
+                         x_out(i) = CMPLX( ar_2d(i,j), ar_2d(nx+1-i,j), KIND=wp )
+                      ENDDO
+                      x_out((nx+1)/2) = CMPLX( ar_2d((nx+1)/2,j), 0.0_wp, KIND=wp )
                    ELSE
 …
                          x_out(i) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k), KIND=wp )
                       ENDDO
+                      x_out((nx+1)/2) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp,       &
+                                               KIND=wp )
+                      x_out((nx+1)/2) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, KIND=wp )
                    ENDIF
 …
                 DO  j = nys_x, nyn_x
+                   CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1,   &
+                               nau1, aux2, nau2 )
+                   CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1, aux2, nau2 )
                    DO  i = 0, (nx+1)/2
 …
                    work(nx+2) = 0.0_wp
+                   CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx,      &
+                               aux3, nau1, aux4, nau2 )
+                   CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, aux4, nau2 )
                    DO  i = 0, nx
 …
                    CALL DZFFT( 1, nx+1, sqr_dnx, work, work, trig_xf, work2, 0 )
                    DO  i = 0, (nx+1)/2
                       ar(i,j,k) = work(2*i)
 …
                    DO  i = 1, (nx+1)/2 - 1
+                      ar_tmp(i,j,k) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k),        &
+                                             KIND=wp )
+                   ENDDO
+                   ar_tmp((nx+1)/2,j,k) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp,     &
+                                                 KIND=wp )
+                      ar_tmp(i,j,k) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k), KIND=wp )
+                   ENDDO
+                   ar_tmp((nx+1)/2,j,k) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, KIND=wp )
                 ENDDO
 …
     END SUBROUTINE fft_x
 !------------------------------------------------------------------------------!
+!--------------------------------------------------------------------------------------------------!
 ! Description:
 ! ------------
 !> Fourier-transformation along x-direction.
 !> Version for 1D-decomposition.
 !> It uses internal algorithms (Singleton or Temperton) or
 !> system-specific routines, if they are available
 !------------------------------------------------------------------------------!
+!> It uses internal algorithms (Singleton or Temperton) or system-specific routines, if they are
+!> available.
+!--------------------------------------------------------------------------------------------------!
     SUBROUTINE fft_x_1d( ar, direction )
 …
        CHARACTER (LEN=*) ::  direction  !<
        INTEGER(iwp) ::  i               !<
        INTEGER(iwp) ::  ishape(1)       !<
 …
        REAL(wp), DIMENSION(0:nx+2) ::  work   !<
        REAL(wp), DIMENSION(nx+2)   ::  work1  !<
        COMPLEX(wp), DIMENSION(:), ALLOCATABLE ::  cwork  !<
 #if defined( __ibm )
        REAL(wp), DIMENSION(nau2) ::  aux2       !<
 …
+!
 !--       Performing the fft with singleton's software works on every system,
 !--       since it is part of the model
+!--       Performing the fft with singleton's software works on every system, since it is part of
+!--       the model.
           ALLOCATE( cwork(0:nx) )
           IF ( forward_fft )   then
+          IF ( forward_fft )  THEN
              DO  i = 0, nx
 …
+!
 !--       Performing the fft with Temperton's software works on every system,
 !--       since it is part of the model
+!--       Performing the fft with Temperton's software works on every system, since it is part of
+!--       the model.
           IF ( forward_fft )  THEN
 …
           IF ( forward_fft )  THEN
+             CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1,   &
+                         aux2, nau2 )
+             CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1, aux2, nau2 )
              DO  i = 0, (nx+1)/2
 …
              work(nx+2) = 0.0_wp
+             CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, &
+                         aux4, nau2 )
+             CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, aux4, nau2 )
              DO  i = 0, nx
 …
              CALL DZFFT( 1, nx+1, sqr_dnx, work, work, trig_xf, work2, 0 )
              DO  i = 0, (nx+1)/2
                 ar(i) = work(2*i)
 …
     END SUBROUTINE fft_x_1d
 !------------------------------------------------------------------------------!
+!--------------------------------------------------------------------------------------------------!
 ! Description:
 ! ------------
 !> Fourier-transformation along y-direction.
 !> Version for 2D-decomposition.
 !> It uses internal algorithms (Singleton or Temperton) or
 !> system-specific routines, if they are available.
 !>
+!> It uses internal algorithms (Singleton or Temperton) or system-specific routines, if they are
+!> available.
+!>
 !> direction:  'forward' or 'backward'
 !> ar, ar_tr:  3D data arrays
+!> ar, ar_tr:  3D data arrays
 !>             forward:   ar: before  ar_tr: after transformation
 !>             backward:  ar_tr: before  ar: after transfosition
 !>
+!>
 !> In case of non-overlapping transposition/transformation:
 !> nxl_y_bound = nxl_y_l = nxl_y
 !> nxr_y_bound = nxr_y_l = nxr_y
 !>
+!> nxl_y_bound = nxl_y_l = nxl_y
+!> nxr_y_bound = nxr_y_l = nxr_y
+!>
 !> In case of overlapping transposition/transformation
+!> - nxl_y_bound  and  nxr_y_bound have the original values of
+!>   nxl_y, nxr_y.  ar_tr is dimensioned using these values.
+!> - nxl_y_l = nxr_y_r.  ar is dimensioned with these values, so that
+!>   transformation is carried out for a 2D-plane only.
+!------------------------------------------------------------------------------!
+    SUBROUTINE fft_y( ar, direction, ar_tr, nxl_y_bound, nxr_y_bound, nxl_y_l, &
+                      nxr_y_l, ar_inv )
+!> - nxl_y_bound  and  nxr_y_bound have the original values of nxl_y, nxr_y.  ar_tr is dimensioned
+!>   using these values.
+!> - nxl_y_l = nxr_y_r.  ar is dimensioned with these values, so that transformation is carried out
+!>   for a 2D-plane only.
+!--------------------------------------------------------------------------------------------------!
+    SUBROUTINE fft_y( ar, direction, ar_tr, nxl_y_bound, nxr_y_bound, nxl_y_l, nxr_y_l, ar_inv )
 …
        CHARACTER (LEN=*) ::  direction  !<
        INTEGER(iwp) ::  i            !<
        INTEGER(iwp) ::  j            !<
+       INTEGER(iwp) ::  j            !<
        INTEGER(iwp) ::  jshape(1)    !<
        INTEGER(iwp) ::  k            !<
 …
        REAL(wp), DIMENSION(0:ny+2) ::  work   !<
        REAL(wp), DIMENSION(ny+2)   ::  work1  !<
        REAL(wp), DIMENSION(:,:), ALLOCATABLE ::  f_vec_y
        REAL(wp), DIMENSION(:,:), ALLOCATABLE ::  work_vec
 …
        COMPLEX(wp), DIMENSION(:), ALLOCATABLE ::  cwork  !<
 #if defined( __ibm )
        REAL(wp), DIMENSION(nau2) ::  auy2  !<
 …
        REAL(wp), DIMENSION(6*(ny+1)) ::  work2  !<
 #elif defined( __cuda_fft )
+       COMPLEX(dp), DIMENSION(0:(ny+1)/2,nxl_y:nxr_y,nzb_y:nzt_y) ::           &
+          ar_tmp  !<
+       COMPLEX(dp), DIMENSION(0:(ny+1)/2,nxl_y:nxr_y,nzb_y:nzt_y) ::  ar_tmp  !<
        !$ACC DECLARE CREATE(ar_tmp)
 #endif
 …
+!
 !--       Performing the fft with singleton's software works on every system,
 !--       since it is part of the model
+!--       Performing the fft with singleton's software works on every system, since it is part of
+!--       the model.
           ALLOCATE( cwork(0:ny) )
           IF ( forward_fft )   then
+          IF ( forward_fft )  THEN
              !$OMP PARALLEL PRIVATE ( cwork, i, jshape, j, k )
 …
                    cwork(0) = CMPLX( ar_tr(0,i,k), 0.0_wp, KIND=wp )
                    DO  j = 1, (ny+1)/2 - 1
+                      cwork(j)      = CMPLX( ar_tr(j,i,k), -ar_tr(ny+1-j,i,k), &
+                                             KIND=wp )
+                      cwork(ny+1-j) = CMPLX( ar_tr(j,i,k),  ar_tr(ny+1-j,i,k), &
+                                             KIND=wp )
+                   ENDDO
+                   cwork((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0_wp,       &
+                                            KIND=wp )
+                      cwork(j)      = CMPLX( ar_tr(j,i,k), -ar_tr(ny+1-j,i,k), KIND=wp )
+                      cwork(ny+1-j) = CMPLX( ar_tr(j,i,k),  ar_tr(ny+1-j,i,k), KIND=wp )
+                   ENDDO
+                   cwork((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0_wp, KIND=wp )
                    jshape = SHAPE( cwork )
 …
+!
 !--       Performing the fft with Temperton's software works on every system,
 !--       since it is part of the model
+!--       Performing the fft with Temperton's software works on every system, since it is part of
+!--       the model.
           IF ( forward_fft )  THEN
 …
                    y_out(0) = CMPLX( ar_tr(0,i,k), 0.0_wp, KIND=wp )
                    DO  j = 1, (ny+1)/2 - 1
+                      y_out(j) = CMPLX( ar_tr(j,i,k), ar_tr(ny+1-j,i,k),       &
+                                        KIND=wp )
+                   ENDDO
+                   y_out((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0_wp,       &
+                                            KIND=wp )
+                      y_out(j) = CMPLX( ar_tr(j,i,k), ar_tr(ny+1-j,i,k), KIND=wp )
+                   ENDDO
+                   y_out((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0_wp, KIND=wp )
                    CALL FFTW_EXECUTE_DFT_C2R( plan_yi, y_out, y_in )
 …
                 DO  i = nxl_y_l, nxr_y_l
+                   CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1,   &
+                               nau1, auy2, nau2 )
+                   CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1, nau1, auy2, nau2 )
                    DO  j = 0, (ny+1)/2
 …
                    work(ny+2) = 0.0_wp
+                   CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny,      &
+                               auy3, nau1, auy4, nau2 )
+                   CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, auy4, nau2 )
                    DO  j = 0, ny
 …
                    DO  j = 0, (ny+1)/2
                       ar(j,i,k)      = REAL( ar_tmp(j,i,k), KIND=wp )  * dny
+                      ar(j,i,k)      = REAL( ar_tmp(j,i,k), KIND=wp ) * dny
                    ENDDO
 …
                    DO  j = 1, (ny+1)/2 - 1
+                      ar_tmp(j,i,k) = CMPLX( ar(j,i,k), ar(ny+1-j,i,k),        &
+                                             KIND=wp )
+                   ENDDO
+                   ar_tmp((ny+1)/2,i,k) = CMPLX( ar((ny+1)/2,i,k), 0.0_wp,     &
+                                                 KIND=wp )
+                      ar_tmp(j,i,k) = CMPLX( ar(j,i,k), ar(ny+1-j,i,k), KIND=wp )
+                   ENDDO
+                   ar_tmp((ny+1)/2,i,k) = CMPLX( ar((ny+1)/2,i,k), 0.0_wp, KIND=wp )
                 ENDDO
 …
     END SUBROUTINE fft_y
 !------------------------------------------------------------------------------!
+!--------------------------------------------------------------------------------------------------!
 ! Description:
 ! ------------
 !> Fourier-transformation along y-direction.
 !> Version for 1D-decomposition.
 !> It uses internal algorithms (Singleton or Temperton) or
 !> system-specific routines, if they are available.
 !------------------------------------------------------------------------------!
+!> It uses internal algorithms (Singleton or Temperton) or system-specific routines, if they are
+!> available.
+!--------------------------------------------------------------------------------------------------!
     SUBROUTINE fft_y_1d( ar, direction )
 …
        CHARACTER (LEN=*) ::  direction
        INTEGER(iwp) ::  j          !<
        INTEGER(iwp) ::  jshape(1)  !<
 …
        REAL(wp), DIMENSION(0:ny+2)  ::  work   !<
        REAL(wp), DIMENSION(ny+2)    ::  work1  !<
        COMPLEX(wp), DIMENSION(:), ALLOCATABLE ::  cwork  !<
 #if defined( __ibm )
        REAL(wp), DIMENSION(nau2) ::  auy2  !<
 …
+!
 !--       Performing the fft with singleton's software works on every system,
 !--       since it is part of the model
+!--       Performing the fft with singleton's software works on every system, since it is part of
+!--       the model.
           ALLOCATE( cwork(0:ny) )
 …
+!
 !--       Performing the fft with Temperton's software works on every system,
 !--       since it is part of the model
+!--       Performing the fft with Temperton's software works on every system, since it is part of
+!--       the model.
           IF ( forward_fft )  THEN
 …
           IF ( forward_fft )  THEN
+             CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1, nau1,   &
+                         auy2, nau2 )
+             CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1, nau1, auy2, nau2 )
              DO  j = 0, (ny+1)/2
 …
              work(ny+2) = 0.0_wp
+             CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny, auy3,      &
+                         nau1, auy4, nau2 )
+             CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, auy4, nau2 )
              DO  j = 0, ny
 …
     END SUBROUTINE fft_y_1d
 !------------------------------------------------------------------------------!
+!--------------------------------------------------------------------------------------------------!
 ! Description:
 ! ------------
 !> Fourier-transformation along x-direction.
 !> Version for 1d domain decomposition
+!> Version for 1d domain decomposition,
 !> using multiple 1D FFT from Math Keisan on NEC or Temperton-algorithm
 !> (no singleton-algorithm on NEC because it does not vectorize)
 !------------------------------------------------------------------------------!
+!> (no singleton-algorithm on NEC because it does not vectorize).
+!--------------------------------------------------------------------------------------------------!
     SUBROUTINE fft_x_m( ar, direction )
 …
        CHARACTER (LEN=*) ::  direction  !<
        INTEGER(iwp) ::  i     !<
        INTEGER(iwp) ::  k     !<
 …
        REAL(wp), DIMENSION(0:nx+3,nz+1)   ::  ai     !<
        REAL(wp), DIMENSION(6*(nx+4),nz+1) ::  work1  !<
 #if defined( __nec_fft )
        COMPLEX(wp), DIMENSION(:,:), ALLOCATABLE ::  work
 …
+!
+!--          Tables are initialized once more. This call should not be
+!--          necessary, but otherwise program aborts in asymmetric case
+             CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4,       &
+                          trig_xf, work1, 0 )
+!--          Tables are initialized once more. This call should not be necessary, but otherwise
+!--          program aborts in asymmetric case.
+             CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4, trig_xf, work1, 0 )
              ai(0:nx,1:nz) = ar(0:nx,1:nz)
 …
              ENDIF
+             CALL DZFFTM( 1, nx+1, nz1, sqr_dnx, ai, siza, work, sizw,         &
+                          trig_xf, work1, 0 )
+             CALL DZFFTM( 1, nx+1, nz1, sqr_dnx, ai, siza, work, sizw, trig_xf, work1, 0 )
              DO  k = 1, nz
 …
 !--          Tables are initialized once more. This call should not be
 !--          necessary, but otherwise program aborts in asymmetric case
+             CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4,       &
+                          trig_xb, work1, 0 )
+             CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4, trig_xb, work1, 0 )
              IF ( nz1 > nz )  THEN
 …
              ENDDO
+             CALL ZDFFTM( -1, nx+1, nz1, sqr_dnx, work, sizw, ai, siza, &
+                          trig_xb, work1, 0 )
+             CALL ZDFFTM( -1, nx+1, nz1, sqr_dnx, work, sizw, ai, siza, trig_xb, work1, 0 )
              ar(0:nx,1:nz) = ai(0:nx,1:nz)
 …
     END SUBROUTINE fft_x_m
 !------------------------------------------------------------------------------!
+!--------------------------------------------------------------------------------------------------!
 ! Description:
 ! ------------
 !> Fourier-transformation along y-direction.
 !> Version for 1d domain decomposition
+!> Version for 1d domain decomposition,
 !> using multiple 1D FFT from Math Keisan on NEC or Temperton-algorithm
 !> (no singleton-algorithm on NEC because it does not vectorize)
 !------------------------------------------------------------------------------!
+!> (no singleton-algorithm on NEC because it does not vectorize).
+!--------------------------------------------------------------------------------------------------!
     SUBROUTINE fft_y_m( ar, ny1, direction )
 …
        CHARACTER (LEN=*) ::  direction  !<
        INTEGER(iwp) ::  j     !<
+       INTEGER(iwp) ::  j     !<
        INTEGER(iwp) ::  k     !<
        INTEGER(iwp) ::  ny1   !<
 …
+!
+!--          Tables are initialized once more. This call should not be
+!--          necessary, but otherwise program aborts in asymmetric case
+             CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, &
+                          trig_yf, work1, 0 )
+!--          Tables are initialized once more. This call should not be necessary, but otherwise
+!--          program aborts in asymmetric case.
+             CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, trig_yf, work1, 0 )
              ai(0:ny,1:nz) = ar(0:ny,1:nz)
 …
              ENDIF
+             CALL DZFFTM( 1, ny+1, nz1, sqr_dny, ai, siza, work, sizw, &
+                          trig_yf, work1, 0 )
+             CALL DZFFTM( 1, ny+1, nz1, sqr_dny, ai, siza, work, sizw, trig_yf, work1, 0 )
              DO  k = 1, nz
 …
+!
+!--          Tables are initialized once more. This call should not be
+!--          necessary, but otherwise program aborts in asymmetric case
+             CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, &
+                          trig_yb, work1, 0 )
+!--          Tables are initialized once more. This call should not be necessary, but otherwise
+!--          program aborts in asymmetric case.
+             CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, trig_yb, work1, 0 )
              IF ( nz1 > nz )  THEN
 …
              ENDDO
+             CALL ZDFFTM( -1, ny+1, nz1, sqr_dny, work, sizw, ai, siza, &
+                          trig_yb, work1, 0 )
+             CALL ZDFFTM( -1, ny+1, nz1, sqr_dny, work, sizw, ai, siza, trig_yb, work1, 0 )
              ar(0:ny,1:nz) = ai(0:ny,1:nz)

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 4646 for palm/trunk/SOURCE/fft_xy_mod.f90

Legend:

palm/trunk/SOURCE/fft_xy_mod.f90

Download in other formats: