Ignore:
Timestamp:
Aug 24, 2020 4:02:40 PM (4 years ago)
Author:
raasch
Message:

files re-formatted to follow the PALM coding standard

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SOURCE/fft_xy_mod.f90

    r4370 r4646  
    11!> @file fft_xy_mod.f90
    2 !------------------------------------------------------------------------------!
     2!--------------------------------------------------------------------------------------------------!
    33! This file is part of the PALM model system.
    44!
    5 ! PALM is free software: you can redistribute it and/or modify it under the
    6 ! terms of the GNU General Public License as published by the Free Software
    7 ! Foundation, either version 3 of the License, or (at your option) any later
    8 ! version.
    9 !
    10 ! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
    11 ! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
    12 ! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
    13 !
    14 ! You should have received a copy of the GNU General Public License along with
    15 ! PALM. If not, see <http://www.gnu.org/licenses/>.
     5! PALM is free software: you can redistribute it and/or modify it under the terms of the GNU General
     6! Public License as published by the Free Software Foundation, either version 3 of the License, or
     7! (at your option) any later version.
     8!
     9! PALM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
     10! implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
     11! Public License for more details.
     12!
     13! You should have received a copy of the GNU General Public License along with PALM. If not, see
     14! <http://www.gnu.org/licenses/>.
    1615!
    1716! Copyright 1997-2020 Leibniz Universitaet Hannover
    18 !------------------------------------------------------------------------------!
     17!--------------------------------------------------------------------------------------------------!
    1918!
    2019! Current revisions:
    2120! -----------------
    22 ! 
    23 ! 
     21!
     22!
    2423! Former revisions:
    2524! -----------------
    2625! $Id$
     26! file re-formatted to follow the PALM coding standard
     27!
     28! 4370 2020-01-10 14:00:44Z raasch
    2729! bugfix for Temperton-fft usage on GPU
    28 ! 
     30!
    2931! 4366 2020-01-09 08:12:43Z raasch
    3032! Vectorized Temperton-fft added
    31 ! 
     33!
    3234! 4360 2020-01-07 11:25:50Z suehring
    3335! Corrected "Former revisions" section
    34 ! 
     36!
    3537! 4069 2019-07-01 14:05:51Z Giersch
    3638! Code added to avoid compiler warnings
    37 ! 
     39!
    3840! 3655 2019-01-07 16:51:22Z knoop
    3941! OpenACC port for SPEC
     
    5052!------------------------------------------------------------------------------!
    5153 MODULE fft_xy
    52  
    53 
    54     USE control_parameters,                                                    &
     54
     55
     56    USE control_parameters,                                                                        &
    5557        ONLY:  fft_method, loop_optimization, message_string
    56        
     58
    5759    USE cuda_fft_interfaces
    58        
    59     USE indices,                                                               &
     60
     61    USE indices,                                                                                   &
    6062        ONLY:  nx, ny, nz
    6163
     
    6769
    6870    USE kinds
    69    
    70     USE singleton,                                                             &
     71
     72    USE singleton,                                                                                 &
    7173        ONLY: fftn
    72    
     74
    7375    USE temperton_fft
    74    
    75     USE transpose_indices,                                                     &
     76
     77    USE transpose_indices,                                                                         &
    7678        ONLY:  nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
    7779
     
    7981
    8082    PRIVATE
    81     PUBLIC fft_x, fft_x_1d, fft_y, fft_y_1d, fft_init, fft_x_m, fft_y_m, f_vec_x, temperton_fft_vec
     83    PUBLIC fft_init, f_vec_x, fft_x, fft_x_1d, fft_x_m, fft_y, fft_y_1d, fft_y_m, temperton_fft_vec
    8284
    8385    INTEGER(iwp), DIMENSION(:), ALLOCATABLE, SAVE ::  ifax_x  !<
     
    9193    REAL(wp), SAVE ::  sqr_dnx  !<
    9294    REAL(wp), SAVE ::  sqr_dny  !<
    93    
    94     REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trigs_x  !< 
     95
     96    REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trigs_x  !<
    9597    REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trigs_y  !<
    9698
     
    98100
    99101#if defined( __ibm )
    100     INTEGER(iwp), PARAMETER ::  nau1 = 20000  !< 
     102    INTEGER(iwp), PARAMETER ::  nau1 = 20000  !<
    101103    INTEGER(iwp), PARAMETER ::  nau2 = 22000  !<
    102104!
    103 !-- The following working arrays contain tables and have to be "save" and
    104 !-- shared in OpenMP sense
    105     REAL(wp), DIMENSION(nau1), SAVE ::  aux1  !<
     105!-- The following working arrays contain tables and have to be "save" and shared in OpenMP sense
     106    REAL(wp), DIMENSION(nau1), SAVE ::  aux1  !<
    106107    REAL(wp), DIMENSION(nau1), SAVE ::  auy1  !<
    107     REAL(wp), DIMENSION(nau1), SAVE ::  aux3  !< 
     108    REAL(wp), DIMENSION(nau1), SAVE ::  aux3  !<
    108109    REAL(wp), DIMENSION(nau1), SAVE ::  auy3  !<
    109    
     110
    110111#elif defined( __nec_fft )
    111112    INTEGER(iwp), SAVE ::  nz1  !<
    112    
     113
    113114    REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trig_xb  !<
    114     REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trig_xf  !< 
     115    REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trig_xf  !<
    115116    REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trig_yb  !<
    116117    REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE ::  trig_yf  !<
    117    
     118
    118119#elif defined( __cuda_fft )
    119120    INTEGER(C_INT), SAVE ::  plan_xf  !<
     
    126127#if defined( __fftw )
    127128    INCLUDE  'fftw3.f03'
     129    COMPLEX(KIND=C_DOUBLE_COMPLEX), DIMENSION(:), ALLOCATABLE, SAVE ::  x_out  !<
     130    COMPLEX(KIND=C_DOUBLE_COMPLEX), DIMENSION(:), ALLOCATABLE, SAVE ::  y_out  !<
     131
    128132    INTEGER(KIND=C_INT) ::  nx_c  !<
    129133    INTEGER(KIND=C_INT) ::  ny_c  !<
    130    
    131     COMPLEX(KIND=C_DOUBLE_COMPLEX), DIMENSION(:), ALLOCATABLE, SAVE ::  x_out  !<
    132     COMPLEX(KIND=C_DOUBLE_COMPLEX), DIMENSION(:), ALLOCATABLE, SAVE ::         &
    133        y_out  !<
    134    
    135     REAL(KIND=C_DOUBLE), DIMENSION(:), ALLOCATABLE, SAVE ::                    &
    136        x_in   !<
    137     REAL(KIND=C_DOUBLE), DIMENSION(:), ALLOCATABLE, SAVE ::                    &
    138        y_in   !<
     134
     135    REAL(KIND=C_DOUBLE), DIMENSION(:), ALLOCATABLE, SAVE ::  x_in   !<
     136    REAL(KIND=C_DOUBLE), DIMENSION(:), ALLOCATABLE, SAVE ::  y_in   !<
    139137    !$OMP THREADPRIVATE( x_out, y_out, x_in, y_in )
    140    
    141    
     138
     139
    142140    TYPE(C_PTR), SAVE ::  plan_xf, plan_xi, plan_yf, plan_yi
    143141#endif
     
    176174
    177175
    178 !------------------------------------------------------------------------------!
     176!--------------------------------------------------------------------------------------------------!
    179177! Description:
    180178! ------------
    181179!> @todo Missing subroutine description.
    182 !------------------------------------------------------------------------------!
     180!--------------------------------------------------------------------------------------------------!
    183181    SUBROUTINE fft_init
    184182
     
    192190!--    in OpenMP sense
    193191#if defined( __ibm )
     192       REAL(wp), DIMENSION(nau2)   ::  aux2   !<
     193       REAL(wp), DIMENSION(nau2)   ::  auy2   !<
     194       REAL(wp), DIMENSION(nau2)   ::  aux4   !<
     195       REAL(wp), DIMENSION(nau2)   ::  auy4   !<
    194196       REAL(wp), DIMENSION(0:nx+2) ::  workx  !<
    195197       REAL(wp), DIMENSION(0:ny+2) ::  worky  !<
    196        REAL(wp), DIMENSION(nau2)   ::  aux2   !<
    197        REAL(wp), DIMENSION(nau2)   ::  auy2   !<
    198        REAL(wp), DIMENSION(nau2)   ::  aux4   !<
    199        REAL(wp), DIMENSION(nau2)   ::  auy4   !<
    200198#elif defined( __nec_fft )
    201199       REAL(wp), DIMENSION(0:nx+3,nz+1)   ::  work_x  !<
     
    203201       REAL(wp), DIMENSION(6*(nx+3),nz+1) ::  workx   !<
    204202       REAL(wp), DIMENSION(6*(ny+3),nz+1) ::  worky   !<
    205 #endif 
     203#endif
    206204
    207205!
     
    233231!
    234232!--       Initialize tables for fft along x
    235           CALL DRCFT( 1, workx, 1, workx, 1, nx+1, 1,  1, sqr_dnx, aux1, nau1, &
    236                       aux2, nau2 )
    237           CALL DCRFT( 1, workx, 1, workx, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, &
    238                       aux4, nau2 )
     233          CALL DRCFT( 1, workx, 1, workx, 1, nx+1, 1,  1, sqr_dnx, aux1, nau1, aux2, nau2 )
     234          CALL DCRFT( 1, workx, 1, workx, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, aux4, nau2 )
    239235!
    240236!--       Initialize tables for fft along y
    241           CALL DRCFT( 1, worky, 1, worky, 1, ny+1, 1,  1, sqr_dny, auy1, nau1, &
    242                       auy2, nau2 )
    243           CALL DCRFT( 1, worky, 1, worky, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, &
    244                       auy4, nau2 )
     237          CALL DRCFT( 1, worky, 1, worky, 1, ny+1, 1,  1, sqr_dny, auy1, nau1, auy2, nau2 )
     238          CALL DCRFT( 1, worky, 1, worky, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, auy4, nau2 )
    245239#elif defined( __nec_fft )
    246           message_string = 'fft method "' // TRIM( fft_method) // &
    247                            '" currently does not work on NEC'
     240          message_string = 'fft method "' // TRIM( fft_method) // '" currently does not work on NEC'
    248241          CALL message( 'fft_init', 'PA0187', 1, 2, 0, 6, 0 )
    249242
    250           ALLOCATE( trig_xb(2*(nx+1)), trig_xf(2*(nx+1)),                      &
    251                     trig_yb(2*(ny+1)), trig_yf(2*(ny+1)) )
     243          ALLOCATE( trig_xb(2*(nx+1)), trig_xf(2*(nx+1)), trig_yb(2*(ny+1)), trig_yf(2*(ny+1)) )
    252244
    253245          work_x = 0.0_wp
     
    260252          CALL DZFFT( 0, nx+1, sqr_dnx, work_x, work_x, trig_xf, workx, 0 )
    261253          CALL ZDFFT( 0, nx+1, sqr_dnx, work_x, work_x, trig_xb, workx, 0 )
    262           CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4,      &
    263                        trig_xf, workx, 0 )
    264           CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4,      &
    265                        trig_xb, workx, 0 )
     254          CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4, trig_xf, workx, 0 )
     255          CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4, trig_xb, workx, 0 )
    266256!
    267257!--       Initialize tables for fft along y (non-vector and vector case (M))
    268258          CALL DZFFT( 0, ny+1, sqr_dny, work_y, work_y, trig_yf, worky, 0 )
    269259          CALL ZDFFT( 0, ny+1, sqr_dny, work_y, work_y, trig_yb, worky, 0 )
    270           CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4,      &
    271                        trig_yf, worky, 0 )
    272           CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4,      &
    273                        trig_yb, worky, 0 )
     260          CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4, trig_yf, worky, 0 )
     261          CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4, trig_yb, worky, 0 )
    274262#elif defined( __cuda_fft )
    275263          CALL CUFFTPLAN1D( plan_xf, nx+1, CUFFT_D2Z, (nyn_x-nys_x+1) * (nzt_x-nzb_x+1) )
     
    303291          ny_c = ny+1
    304292          !$OMP PARALLEL
    305           ALLOCATE( x_in(0:nx+2), y_in(0:ny+2), x_out(0:(nx+1)/2),             &
    306                     y_out(0:(ny+1)/2) )
     293          ALLOCATE( x_in(0:nx+2), y_in(0:ny+2), x_out(0:(nx+1)/2), y_out(0:(ny+1)/2) )
    307294          !$OMP END PARALLEL
    308295          plan_xf = FFTW_PLAN_DFT_R2C_1D( nx_c, x_in, x_out, FFTW_ESTIMATE )
     
    321308       ELSE
    322309
    323           message_string = 'fft method "' // TRIM( fft_method) // &
    324                            '" not available'
     310          message_string = 'fft method "' // TRIM( fft_method) // '" not available'
    325311          CALL message( 'fft_init', 'PA0189', 1, 2, 0, 6, 0 )
    326312       ENDIF
     
    329315
    330316
    331 !------------------------------------------------------------------------------!
     317!--------------------------------------------------------------------------------------------------!
    332318! Description:
    333319! ------------
    334 !> Fourier-transformation along x-direction.                 
     320!> Fourier-transformation along x-direction.
    335321!> Version for 2D-decomposition.
    336 !> It uses internal algorithms (Singleton or Temperton) or      
    337 !> system-specific routines, if they are available           
    338 !------------------------------------------------------------------------------!
    339  
     322!> It uses internal algorithms (Singleton or Temperton) or system-specific routines, if they are
     323!> available.
     324!--------------------------------------------------------------------------------------------------!
     325
    340326    SUBROUTINE fft_x( ar, direction, ar_2d, ar_inv )
    341327
     
    344330
    345331       CHARACTER (LEN=*) ::  direction  !<
    346        
     332
    347333       COMPLEX(wp), DIMENSION(:), ALLOCATABLE ::  cwork  !<
    348334
    349        INTEGER(iwp) ::  i          !< 
     335       INTEGER(iwp) ::  i          !<
    350336       INTEGER(iwp) ::  ishape(1)  !<
    351337       INTEGER(iwp) ::  j          !<
     
    354340
    355341       LOGICAL ::  forward_fft !<
    356        
     342
    357343       REAL(wp), DIMENSION(0:nx+2) ::  work   !<
    358344       REAL(wp), DIMENSION(nx+2)   ::  work1  !<
    359        
     345
    360346       REAL(wp), DIMENSION(:,:), ALLOCATABLE           ::  work_vec  !<
    361347       REAL(wp), DIMENSION(0:nx,nys_x:nyn_x), OPTIONAL ::  ar_2d     !<
    362348
     349       REAL(wp), DIMENSION(0:nx,nys_x:nyn_x,nzb_x:nzt_x)           ::  ar       !<
    363350       REAL(wp), DIMENSION(nys_x:nyn_x,nzb_x:nzt_x,0:nx), OPTIONAL ::  ar_inv   !<
    364        REAL(wp), DIMENSION(0:nx,nys_x:nyn_x,nzb_x:nzt_x)           ::  ar       !<
    365351
    366352#if defined( __ibm )
    367        REAL(wp), DIMENSION(nau2) ::  aux2  !< 
     353       REAL(wp), DIMENSION(nau2) ::  aux2  !<
    368354       REAL(wp), DIMENSION(nau2) ::  aux4  !<
    369355#elif defined( __nec_fft )
     
    387373
    388374!
    389 !--       Performing the fft with singleton's software works on every system,
    390 !--       since it is part of the model
     375!--       Performing the fft with singleton's software works on every system, since it is part of
     376!--       the model.
    391377          ALLOCATE( cwork(0:nx) )
    392      
    393           IF ( forward_fft )   then
     378
     379          IF ( forward_fft )  THEN
    394380
    395381             !$OMP PARALLEL PRIVATE ( cwork, i, ishape, j, k )
     
    425411                   cwork(0) = CMPLX( ar(0,j,k), 0.0_wp, KIND=wp )
    426412                   DO  i = 1, (nx+1)/2 - 1
    427                       cwork(i)      = CMPLX( ar(i,j,k), -ar(nx+1-i,j,k),       &
    428                                              KIND=wp )
    429                       cwork(nx+1-i) = CMPLX( ar(i,j,k),  ar(nx+1-i,j,k),       &
    430                                              KIND=wp )
     413                      cwork(i)      = CMPLX( ar(i,j,k), -ar(nx+1-i,j,k), KIND=wp )
     414                      cwork(nx+1-i) = CMPLX( ar(i,j,k),  ar(nx+1-i,j,k), KIND=wp )
    431415                   ENDDO
    432416                   cwork((nx+1)/2) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, KIND=wp )
     
    450434
    451435!
    452 !--       Performing the fft with Temperton's software works on every system,
    453 !--       since it is part of the model
     436!--       Performing the fft with Temperton's software works on every system, since it is part of
     437!--       the model.
    454438          IF ( forward_fft )  THEN
    455439
     
    633617                      x_out(0) = CMPLX( ar_2d(0,j), 0.0_wp, KIND=wp )
    634618                      DO  i = 1, (nx+1)/2 - 1
    635                          x_out(i) = CMPLX( ar_2d(i,j), ar_2d(nx+1-i,j),        &
    636                                            KIND=wp )
    637                       ENDDO
    638                       x_out((nx+1)/2) = CMPLX( ar_2d((nx+1)/2,j), 0.0_wp,      &
    639                                                KIND=wp )
     619                         x_out(i) = CMPLX( ar_2d(i,j), ar_2d(nx+1-i,j), KIND=wp )
     620                      ENDDO
     621                      x_out((nx+1)/2) = CMPLX( ar_2d((nx+1)/2,j), 0.0_wp, KIND=wp )
    640622
    641623                   ELSE
     
    645627                         x_out(i) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k), KIND=wp )
    646628                      ENDDO
    647                       x_out((nx+1)/2) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp,       &
    648                                                KIND=wp )
     629                      x_out((nx+1)/2) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, KIND=wp )
    649630
    650631                   ENDIF
     
    670651                DO  j = nys_x, nyn_x
    671652
    672                    CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1,   &
    673                                nau1, aux2, nau2 )
     653                   CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1, aux2, nau2 )
    674654
    675655                   DO  i = 0, (nx+1)/2
     
    700680                   work(nx+2) = 0.0_wp
    701681
    702                    CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx,      &
    703                                aux3, nau1, aux4, nau2 )
     682                   CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, aux4, nau2 )
    704683
    705684                   DO  i = 0, nx
     
    725704
    726705                   CALL DZFFT( 1, nx+1, sqr_dnx, work, work, trig_xf, work2, 0 )
    727      
     706
    728707                   DO  i = 0, (nx+1)/2
    729708                      ar(i,j,k) = work(2*i)
     
    797776
    798777                   DO  i = 1, (nx+1)/2 - 1
    799                       ar_tmp(i,j,k) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k),        &
    800                                              KIND=wp )
    801                    ENDDO
    802                    ar_tmp((nx+1)/2,j,k) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp,     &
    803                                                  KIND=wp )
     778                      ar_tmp(i,j,k) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k), KIND=wp )
     779                   ENDDO
     780                   ar_tmp((nx+1)/2,j,k) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, KIND=wp )
    804781
    805782                ENDDO
     
    818795    END SUBROUTINE fft_x
    819796
    820 !------------------------------------------------------------------------------!
     797!--------------------------------------------------------------------------------------------------!
    821798! Description:
    822799! ------------
    823800!> Fourier-transformation along x-direction.
    824801!> Version for 1D-decomposition.
    825 !> It uses internal algorithms (Singleton or Temperton) or
    826 !> system-specific routines, if they are available
    827 !------------------------------------------------------------------------------!
    828  
     802!> It uses internal algorithms (Singleton or Temperton) or system-specific routines, if they are
     803!> available.
     804!--------------------------------------------------------------------------------------------------!
     805
    829806    SUBROUTINE fft_x_1d( ar, direction )
    830807
     
    833810
    834811       CHARACTER (LEN=*) ::  direction  !<
    835        
     812
    836813       INTEGER(iwp) ::  i               !<
    837814       INTEGER(iwp) ::  ishape(1)       !<
     
    842819       REAL(wp), DIMENSION(0:nx+2) ::  work   !<
    843820       REAL(wp), DIMENSION(nx+2)   ::  work1  !<
    844        
     821
    845822       COMPLEX(wp), DIMENSION(:), ALLOCATABLE ::  cwork  !<
    846        
     823
    847824#if defined( __ibm )
    848825       REAL(wp), DIMENSION(nau2) ::  aux2       !<
     
    861838
    862839!
    863 !--       Performing the fft with singleton's software works on every system,
    864 !--       since it is part of the model
     840!--       Performing the fft with singleton's software works on every system, since it is part of
     841!--       the model.
    865842          ALLOCATE( cwork(0:nx) )
    866      
    867           IF ( forward_fft )   then
     843
     844          IF ( forward_fft )  THEN
    868845
    869846             DO  i = 0, nx
     
    902879
    903880!
    904 !--       Performing the fft with Temperton's software works on every system,
    905 !--       since it is part of the model
     881!--       Performing the fft with Temperton's software works on every system, since it is part of
     882!--       the model.
    906883          IF ( forward_fft )  THEN
    907884
     
    966943          IF ( forward_fft )  THEN
    967944
    968              CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1,   &
    969                          aux2, nau2 )
     945             CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1, aux2, nau2 )
    970946
    971947             DO  i = 0, (nx+1)/2
     
    987963             work(nx+2) = 0.0_wp
    988964
    989              CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, &
    990                          aux4, nau2 )
     965             CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, aux4, nau2 )
    991966
    992967             DO  i = 0, nx
     
    1001976
    1002977             CALL DZFFT( 1, nx+1, sqr_dnx, work, work, trig_xf, work2, 0 )
    1003      
     978
    1004979             DO  i = 0, (nx+1)/2
    1005980                ar(i) = work(2*i)
     
    10311006    END SUBROUTINE fft_x_1d
    10321007
    1033 !------------------------------------------------------------------------------!
     1008!--------------------------------------------------------------------------------------------------!
    10341009! Description:
    10351010! ------------
    10361011!> Fourier-transformation along y-direction.
    10371012!> Version for 2D-decomposition.
    1038 !> It uses internal algorithms (Singleton or Temperton) or
    1039 !> system-specific routines, if they are available.
    1040 !> 
     1013!> It uses internal algorithms (Singleton or Temperton) or system-specific routines, if they are
     1014!> available.
     1015!>
    10411016!> direction:  'forward' or 'backward'
    1042 !> ar, ar_tr:  3D data arrays 
     1017!> ar, ar_tr:  3D data arrays
    10431018!>             forward:   ar: before  ar_tr: after transformation
    10441019!>             backward:  ar_tr: before  ar: after transfosition
    1045 !> 
     1020!>
    10461021!> In case of non-overlapping transposition/transformation:
    1047 !> nxl_y_bound = nxl_y_l = nxl_y 
    1048 !> nxr_y_bound = nxr_y_l = nxr_y 
    1049 !> 
     1022!> nxl_y_bound = nxl_y_l = nxl_y
     1023!> nxr_y_bound = nxr_y_l = nxr_y
     1024!>
    10501025!> In case of overlapping transposition/transformation
    1051 !> - nxl_y_bound  and  nxr_y_bound have the original values of
    1052 !>   nxl_y, nxr_y.  ar_tr is dimensioned using these values.
    1053 !> - nxl_y_l = nxr_y_r.  ar is dimensioned with these values, so that
    1054 !>   transformation is carried out for a 2D-plane only.
    1055 !------------------------------------------------------------------------------!
    1056  
    1057     SUBROUTINE fft_y( ar, direction, ar_tr, nxl_y_bound, nxr_y_bound, nxl_y_l, &
    1058                       nxr_y_l, ar_inv )
     1026!> - nxl_y_bound  and  nxr_y_bound have the original values of nxl_y, nxr_y.  ar_tr is dimensioned
     1027!>   using these values.
     1028!> - nxl_y_l = nxr_y_r.  ar is dimensioned with these values, so that transformation is carried out
     1029!>   for a 2D-plane only.
     1030!--------------------------------------------------------------------------------------------------!
     1031
     1032    SUBROUTINE fft_y( ar, direction, ar_tr, nxl_y_bound, nxr_y_bound, nxl_y_l, nxr_y_l, ar_inv )
    10591033
    10601034
     
    10621036
    10631037       CHARACTER (LEN=*) ::  direction  !<
    1064        
     1038
    10651039       INTEGER(iwp) ::  i            !<
    1066        INTEGER(iwp) ::  j            !< 
     1040       INTEGER(iwp) ::  j            !<
    10671041       INTEGER(iwp) ::  jshape(1)    !<
    10681042       INTEGER(iwp) ::  k            !<
     
    10771051       REAL(wp), DIMENSION(0:ny+2) ::  work   !<
    10781052       REAL(wp), DIMENSION(ny+2)   ::  work1  !<
    1079        
     1053
    10801054       REAL(wp), DIMENSION(:,:), ALLOCATABLE ::  f_vec_y
    10811055       REAL(wp), DIMENSION(:,:), ALLOCATABLE ::  work_vec
     
    10861060
    10871061       COMPLEX(wp), DIMENSION(:), ALLOCATABLE ::  cwork  !<
    1088        
     1062
    10891063#if defined( __ibm )
    10901064       REAL(wp), DIMENSION(nau2) ::  auy2  !<
     
    10931067       REAL(wp), DIMENSION(6*(ny+1)) ::  work2  !<
    10941068#elif defined( __cuda_fft )
    1095        COMPLEX(dp), DIMENSION(0:(ny+1)/2,nxl_y:nxr_y,nzb_y:nzt_y) ::           &
    1096           ar_tmp  !<
     1069       COMPLEX(dp), DIMENSION(0:(ny+1)/2,nxl_y:nxr_y,nzb_y:nzt_y) ::  ar_tmp  !<
    10971070       !$ACC DECLARE CREATE(ar_tmp)
    10981071#endif
     
    11081081
    11091082!
    1110 !--       Performing the fft with singleton's software works on every system,
    1111 !--       since it is part of the model
     1083!--       Performing the fft with singleton's software works on every system, since it is part of
     1084!--       the model.
    11121085          ALLOCATE( cwork(0:ny) )
    11131086
    1114           IF ( forward_fft )   then
     1087          IF ( forward_fft )  THEN
    11151088
    11161089             !$OMP PARALLEL PRIVATE ( cwork, i, jshape, j, k )
     
    11461119                   cwork(0) = CMPLX( ar_tr(0,i,k), 0.0_wp, KIND=wp )
    11471120                   DO  j = 1, (ny+1)/2 - 1
    1148                       cwork(j)      = CMPLX( ar_tr(j,i,k), -ar_tr(ny+1-j,i,k), &
    1149                                              KIND=wp )
    1150                       cwork(ny+1-j) = CMPLX( ar_tr(j,i,k),  ar_tr(ny+1-j,i,k), &
    1151                                              KIND=wp )
    1152                    ENDDO
    1153                    cwork((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0_wp,       &
    1154                                             KIND=wp )
     1121                      cwork(j)      = CMPLX( ar_tr(j,i,k), -ar_tr(ny+1-j,i,k), KIND=wp )
     1122                      cwork(ny+1-j) = CMPLX( ar_tr(j,i,k),  ar_tr(ny+1-j,i,k), KIND=wp )
     1123                   ENDDO
     1124                   cwork((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0_wp, KIND=wp )
    11551125
    11561126                   jshape = SHAPE( cwork )
     
    11721142
    11731143!
    1174 !--       Performing the fft with Temperton's software works on every system,
    1175 !--       since it is part of the model
     1144!--       Performing the fft with Temperton's software works on every system, since it is part of
     1145!--       the model.
    11761146          IF ( forward_fft )  THEN
    11771147
     
    13611331                   y_out(0) = CMPLX( ar_tr(0,i,k), 0.0_wp, KIND=wp )
    13621332                   DO  j = 1, (ny+1)/2 - 1
    1363                       y_out(j) = CMPLX( ar_tr(j,i,k), ar_tr(ny+1-j,i,k),       &
    1364                                         KIND=wp )
    1365                    ENDDO
    1366                    y_out((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0_wp,       &
    1367                                             KIND=wp )
     1333                      y_out(j) = CMPLX( ar_tr(j,i,k), ar_tr(ny+1-j,i,k), KIND=wp )
     1334                   ENDDO
     1335                   y_out((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0_wp, KIND=wp )
    13681336
    13691337                   CALL FFTW_EXECUTE_DFT_C2R( plan_yi, y_out, y_in )
     
    13871355                DO  i = nxl_y_l, nxr_y_l
    13881356
    1389                    CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1,   &
    1390                                nau1, auy2, nau2 )
     1357                   CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1, nau1, auy2, nau2 )
    13911358
    13921359                   DO  j = 0, (ny+1)/2
     
    14171384                   work(ny+2) = 0.0_wp
    14181385
    1419                    CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny,      &
    1420                                auy3, nau1, auy4, nau2 )
     1386                   CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, auy4, nau2 )
    14211387
    14221388                   DO  j = 0, ny
     
    14911457
    14921458                   DO  j = 0, (ny+1)/2
    1493                       ar(j,i,k)      = REAL( ar_tmp(j,i,k), KIND=wp )  * dny
     1459                      ar(j,i,k)      = REAL( ar_tmp(j,i,k), KIND=wp ) * dny
    14941460                   ENDDO
    14951461
     
    15111477
    15121478                   DO  j = 1, (ny+1)/2 - 1
    1513                       ar_tmp(j,i,k) = CMPLX( ar(j,i,k), ar(ny+1-j,i,k),        &
    1514                                              KIND=wp )
    1515                    ENDDO
    1516                    ar_tmp((ny+1)/2,i,k) = CMPLX( ar((ny+1)/2,i,k), 0.0_wp,     &
    1517                                                  KIND=wp )
     1479                      ar_tmp(j,i,k) = CMPLX( ar(j,i,k), ar(ny+1-j,i,k), KIND=wp )
     1480                   ENDDO
     1481                   ar_tmp((ny+1)/2,i,k) = CMPLX( ar((ny+1)/2,i,k), 0.0_wp, KIND=wp )
    15181482
    15191483                ENDDO
     
    15321496    END SUBROUTINE fft_y
    15331497
    1534 !------------------------------------------------------------------------------!
     1498!--------------------------------------------------------------------------------------------------!
    15351499! Description:
    15361500! ------------
    15371501!> Fourier-transformation along y-direction.
    15381502!> Version for 1D-decomposition.
    1539 !> It uses internal algorithms (Singleton or Temperton) or
    1540 !> system-specific routines, if they are available.
    1541 !------------------------------------------------------------------------------!
    1542  
     1503!> It uses internal algorithms (Singleton or Temperton) or system-specific routines, if they are
     1504!> available.
     1505!--------------------------------------------------------------------------------------------------!
     1506
    15431507    SUBROUTINE fft_y_1d( ar, direction )
    15441508
     
    15471511
    15481512       CHARACTER (LEN=*) ::  direction
    1549        
     1513
    15501514       INTEGER(iwp) ::  j          !<
    15511515       INTEGER(iwp) ::  jshape(1)  !<
     
    15561520       REAL(wp), DIMENSION(0:ny+2)  ::  work   !<
    15571521       REAL(wp), DIMENSION(ny+2)    ::  work1  !<
    1558        
     1522
    15591523       COMPLEX(wp), DIMENSION(:), ALLOCATABLE ::  cwork  !<
    1560        
     1524
    15611525#if defined( __ibm )
    15621526       REAL(wp), DIMENSION(nau2) ::  auy2  !<
     
    15751539
    15761540!
    1577 !--       Performing the fft with singleton's software works on every system,
    1578 !--       since it is part of the model
     1541!--       Performing the fft with singleton's software works on every system, since it is part of
     1542!--       the model.
    15791543          ALLOCATE( cwork(0:ny) )
    15801544
     
    16181582
    16191583!
    1620 !--       Performing the fft with Temperton's software works on every system,
    1621 !--       since it is part of the model
     1584!--       Performing the fft with Temperton's software works on every system, since it is part of
     1585!--       the model.
    16221586          IF ( forward_fft )  THEN
    16231587
     
    16821646          IF ( forward_fft )  THEN
    16831647
    1684              CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1, nau1,   &
    1685                          auy2, nau2 )
     1648             CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1, nau1, auy2, nau2 )
    16861649
    16871650             DO  j = 0, (ny+1)/2
     
    17031666             work(ny+2) = 0.0_wp
    17041667
    1705              CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny, auy3,      &
    1706                          nau1, auy4, nau2 )
     1668             CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, auy4, nau2 )
    17071669
    17081670             DO  j = 0, ny
     
    17471709    END SUBROUTINE fft_y_1d
    17481710
    1749 !------------------------------------------------------------------------------!
     1711!--------------------------------------------------------------------------------------------------!
    17501712! Description:
    17511713! ------------
    17521714!> Fourier-transformation along x-direction.
    1753 !> Version for 1d domain decomposition
     1715!> Version for 1d domain decomposition,
    17541716!> using multiple 1D FFT from Math Keisan on NEC or Temperton-algorithm
    1755 !> (no singleton-algorithm on NEC because it does not vectorize)
    1756 !------------------------------------------------------------------------------!
    1757  
     1717!> (no singleton-algorithm on NEC because it does not vectorize).
     1718!--------------------------------------------------------------------------------------------------!
     1719
    17581720    SUBROUTINE fft_x_m( ar, direction )
    17591721
     
    17621724
    17631725       CHARACTER (LEN=*) ::  direction  !<
    1764        
     1726
    17651727       INTEGER(iwp) ::  i     !<
    17661728       INTEGER(iwp) ::  k     !<
     
    17731735       REAL(wp), DIMENSION(0:nx+3,nz+1)   ::  ai     !<
    17741736       REAL(wp), DIMENSION(6*(nx+4),nz+1) ::  work1  !<
    1775        
     1737
    17761738#if defined( __nec_fft )
    17771739       COMPLEX(wp), DIMENSION(:,:), ALLOCATABLE ::  work
     
    18271789
    18281790!
    1829 !--          Tables are initialized once more. This call should not be
    1830 !--          necessary, but otherwise program aborts in asymmetric case
    1831              CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4,       &
    1832                           trig_xf, work1, 0 )
     1791!--          Tables are initialized once more. This call should not be necessary, but otherwise
     1792!--          program aborts in asymmetric case.
     1793             CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4, trig_xf, work1, 0 )
    18331794
    18341795             ai(0:nx,1:nz) = ar(0:nx,1:nz)
     
    18371798             ENDIF
    18381799
    1839              CALL DZFFTM( 1, nx+1, nz1, sqr_dnx, ai, siza, work, sizw,         &
    1840                           trig_xf, work1, 0 )
     1800             CALL DZFFTM( 1, nx+1, nz1, sqr_dnx, ai, siza, work, sizw, trig_xf, work1, 0 )
    18411801
    18421802             DO  k = 1, nz
     
    18541814!--          Tables are initialized once more. This call should not be
    18551815!--          necessary, but otherwise program aborts in asymmetric case
    1856              CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4,       &
    1857                           trig_xb, work1, 0 )
     1816             CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4, trig_xb, work1, 0 )
    18581817
    18591818             IF ( nz1 > nz )  THEN
     
    18681827             ENDDO
    18691828
    1870              CALL ZDFFTM( -1, nx+1, nz1, sqr_dnx, work, sizw, ai, siza, &
    1871                           trig_xb, work1, 0 )
     1829             CALL ZDFFTM( -1, nx+1, nz1, sqr_dnx, work, sizw, ai, siza, trig_xb, work1, 0 )
    18721830
    18731831             ar(0:nx,1:nz) = ai(0:nx,1:nz)
     
    18821840    END SUBROUTINE fft_x_m
    18831841
    1884 !------------------------------------------------------------------------------!
     1842!--------------------------------------------------------------------------------------------------!
    18851843! Description:
    18861844! ------------
    18871845!> Fourier-transformation along y-direction.
    1888 !> Version for 1d domain decomposition
     1846!> Version for 1d domain decomposition,
    18891847!> using multiple 1D FFT from Math Keisan on NEC or Temperton-algorithm
    1890 !> (no singleton-algorithm on NEC because it does not vectorize)
    1891 !------------------------------------------------------------------------------!
    1892  
     1848!> (no singleton-algorithm on NEC because it does not vectorize).
     1849!--------------------------------------------------------------------------------------------------!
     1850
    18931851    SUBROUTINE fft_y_m( ar, ny1, direction )
    18941852
     
    18971855
    18981856       CHARACTER (LEN=*) ::  direction  !<
    1899        
    1900        INTEGER(iwp) ::  j     !< 
     1857
     1858       INTEGER(iwp) ::  j     !<
    19011859       INTEGER(iwp) ::  k     !<
    19021860       INTEGER(iwp) ::  ny1   !<
     
    19641922
    19651923!
    1966 !--          Tables are initialized once more. This call should not be
    1967 !--          necessary, but otherwise program aborts in asymmetric case
    1968              CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, &
    1969                           trig_yf, work1, 0 )
     1924!--          Tables are initialized once more. This call should not be necessary, but otherwise
     1925!--          program aborts in asymmetric case.
     1926             CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, trig_yf, work1, 0 )
    19701927
    19711928             ai(0:ny,1:nz) = ar(0:ny,1:nz)
     
    19741931             ENDIF
    19751932
    1976              CALL DZFFTM( 1, ny+1, nz1, sqr_dny, ai, siza, work, sizw, &
    1977                           trig_yf, work1, 0 )
     1933             CALL DZFFTM( 1, ny+1, nz1, sqr_dny, ai, siza, work, sizw, trig_yf, work1, 0 )
    19781934
    19791935             DO  k = 1, nz
     
    19891945
    19901946!
    1991 !--          Tables are initialized once more. This call should not be
    1992 !--          necessary, but otherwise program aborts in asymmetric case
    1993              CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, &
    1994                           trig_yb, work1, 0 )
     1947!--          Tables are initialized once more. This call should not be necessary, but otherwise
     1948!--          program aborts in asymmetric case.
     1949             CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, trig_yb, work1, 0 )
    19951950
    19961951             IF ( nz1 > nz )  THEN
     
    20051960             ENDDO
    20061961
    2007              CALL ZDFFTM( -1, ny+1, nz1, sqr_dny, work, sizw, ai, siza, &
    2008                           trig_yb, work1, 0 )
     1962             CALL ZDFFTM( -1, ny+1, nz1, sqr_dny, work, sizw, ai, siza, trig_yb, work1, 0 )
    20091963
    20101964             ar(0:ny,1:nz) = ai(0:ny,1:nz)
Note: See TracChangeset for help on using the changeset viewer.