Changeset 4646 for palm/trunk/SOURCE/fft_xy_mod.f90
- Timestamp:
- Aug 24, 2020 4:02:40 PM (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/fft_xy_mod.f90
r4370 r4646 1 1 !> @file fft_xy_mod.f90 2 !------------------------------------------------------------------------------ !2 !--------------------------------------------------------------------------------------------------! 3 3 ! This file is part of the PALM model system. 4 4 ! 5 ! PALM is free software: you can redistribute it and/or modify it under the 6 ! terms of the GNU General Public License as published by the Free Software 7 ! Foundation, either version 3 of the License, or (at your option) any later 8 ! version. 9 ! 10 ! PALM is distributed in the hope that it will be useful, but WITHOUT ANY 11 ! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 12 ! A PARTICULAR PURPOSE. See the GNU General Public License for more details. 13 ! 14 ! You should have received a copy of the GNU General Public License along with 15 ! PALM. If not, see <http://www.gnu.org/licenses/>. 5 ! PALM is free software: you can redistribute it and/or modify it under the terms of the GNU General 6 ! Public License as published by the Free Software Foundation, either version 3 of the License, or 7 ! (at your option) any later version. 8 ! 9 ! PALM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the 10 ! implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 11 ! Public License for more details. 12 ! 13 ! You should have received a copy of the GNU General Public License along with PALM. If not, see 14 ! <http://www.gnu.org/licenses/>. 16 15 ! 17 16 ! Copyright 1997-2020 Leibniz Universitaet Hannover 18 !------------------------------------------------------------------------------ !17 !--------------------------------------------------------------------------------------------------! 19 18 ! 20 19 ! Current revisions: 21 20 ! ----------------- 22 ! 23 ! 21 ! 22 ! 24 23 ! Former revisions: 25 24 ! ----------------- 26 25 ! $Id$ 26 ! file re-formatted to follow the PALM coding standard 27 ! 28 ! 4370 2020-01-10 14:00:44Z raasch 27 29 ! bugfix for Temperton-fft usage on GPU 28 ! 30 ! 29 31 ! 4366 2020-01-09 08:12:43Z raasch 30 32 ! Vectorized Temperton-fft added 31 ! 33 ! 32 34 ! 4360 2020-01-07 11:25:50Z suehring 33 35 ! Corrected "Former revisions" section 34 ! 36 ! 35 37 ! 4069 2019-07-01 14:05:51Z Giersch 36 38 ! Code added to avoid compiler warnings 37 ! 39 ! 38 40 ! 3655 2019-01-07 16:51:22Z knoop 39 41 ! OpenACC port for SPEC … … 50 52 !------------------------------------------------------------------------------! 51 53 MODULE fft_xy 52 53 54 USE control_parameters, &54 55 56 USE control_parameters, & 55 57 ONLY: fft_method, loop_optimization, message_string 56 58 57 59 USE cuda_fft_interfaces 58 59 USE indices, &60 61 USE indices, & 60 62 ONLY: nx, ny, nz 61 63 … … 67 69 68 70 USE kinds 69 70 USE singleton, &71 72 USE singleton, & 71 73 ONLY: fftn 72 74 73 75 USE temperton_fft 74 75 USE transpose_indices, &76 77 USE transpose_indices, & 76 78 ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y 77 79 … … 79 81 80 82 PRIVATE 81 PUBLIC fft_ x, fft_x_1d, fft_y, fft_y_1d, fft_init, fft_x_m, fft_y_m, f_vec_x, temperton_fft_vec83 PUBLIC fft_init, f_vec_x, fft_x, fft_x_1d, fft_x_m, fft_y, fft_y_1d, fft_y_m, temperton_fft_vec 82 84 83 85 INTEGER(iwp), DIMENSION(:), ALLOCATABLE, SAVE :: ifax_x !< … … 91 93 REAL(wp), SAVE :: sqr_dnx !< 92 94 REAL(wp), SAVE :: sqr_dny !< 93 94 REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE :: trigs_x !< 95 96 REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE :: trigs_x !< 95 97 REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE :: trigs_y !< 96 98 … … 98 100 99 101 #if defined( __ibm ) 100 INTEGER(iwp), PARAMETER :: nau1 = 20000 !< 102 INTEGER(iwp), PARAMETER :: nau1 = 20000 !< 101 103 INTEGER(iwp), PARAMETER :: nau2 = 22000 !< 102 104 ! 103 !-- The following working arrays contain tables and have to be "save" and 104 !-- shared in OpenMP sense 105 REAL(wp), DIMENSION(nau1), SAVE :: aux1 !< 105 !-- The following working arrays contain tables and have to be "save" and shared in OpenMP sense 106 REAL(wp), DIMENSION(nau1), SAVE :: aux1 !< 106 107 REAL(wp), DIMENSION(nau1), SAVE :: auy1 !< 107 REAL(wp), DIMENSION(nau1), SAVE :: aux3 !< 108 REAL(wp), DIMENSION(nau1), SAVE :: aux3 !< 108 109 REAL(wp), DIMENSION(nau1), SAVE :: auy3 !< 109 110 110 111 #elif defined( __nec_fft ) 111 112 INTEGER(iwp), SAVE :: nz1 !< 112 113 113 114 REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE :: trig_xb !< 114 REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE :: trig_xf !< 115 REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE :: trig_xf !< 115 116 REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE :: trig_yb !< 116 117 REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE :: trig_yf !< 117 118 118 119 #elif defined( __cuda_fft ) 119 120 INTEGER(C_INT), SAVE :: plan_xf !< … … 126 127 #if defined( __fftw ) 127 128 INCLUDE 'fftw3.f03' 129 COMPLEX(KIND=C_DOUBLE_COMPLEX), DIMENSION(:), ALLOCATABLE, SAVE :: x_out !< 130 COMPLEX(KIND=C_DOUBLE_COMPLEX), DIMENSION(:), ALLOCATABLE, SAVE :: y_out !< 131 128 132 INTEGER(KIND=C_INT) :: nx_c !< 129 133 INTEGER(KIND=C_INT) :: ny_c !< 130 131 COMPLEX(KIND=C_DOUBLE_COMPLEX), DIMENSION(:), ALLOCATABLE, SAVE :: x_out !< 132 COMPLEX(KIND=C_DOUBLE_COMPLEX), DIMENSION(:), ALLOCATABLE, SAVE :: & 133 y_out !< 134 135 REAL(KIND=C_DOUBLE), DIMENSION(:), ALLOCATABLE, SAVE :: & 136 x_in !< 137 REAL(KIND=C_DOUBLE), DIMENSION(:), ALLOCATABLE, SAVE :: & 138 y_in !< 134 135 REAL(KIND=C_DOUBLE), DIMENSION(:), ALLOCATABLE, SAVE :: x_in !< 136 REAL(KIND=C_DOUBLE), DIMENSION(:), ALLOCATABLE, SAVE :: y_in !< 139 137 !$OMP THREADPRIVATE( x_out, y_out, x_in, y_in ) 140 141 138 139 142 140 TYPE(C_PTR), SAVE :: plan_xf, plan_xi, plan_yf, plan_yi 143 141 #endif … … 176 174 177 175 178 !------------------------------------------------------------------------------ !176 !--------------------------------------------------------------------------------------------------! 179 177 ! Description: 180 178 ! ------------ 181 179 !> @todo Missing subroutine description. 182 !------------------------------------------------------------------------------ !180 !--------------------------------------------------------------------------------------------------! 183 181 SUBROUTINE fft_init 184 182 … … 192 190 !-- in OpenMP sense 193 191 #if defined( __ibm ) 192 REAL(wp), DIMENSION(nau2) :: aux2 !< 193 REAL(wp), DIMENSION(nau2) :: auy2 !< 194 REAL(wp), DIMENSION(nau2) :: aux4 !< 195 REAL(wp), DIMENSION(nau2) :: auy4 !< 194 196 REAL(wp), DIMENSION(0:nx+2) :: workx !< 195 197 REAL(wp), DIMENSION(0:ny+2) :: worky !< 196 REAL(wp), DIMENSION(nau2) :: aux2 !<197 REAL(wp), DIMENSION(nau2) :: auy2 !<198 REAL(wp), DIMENSION(nau2) :: aux4 !<199 REAL(wp), DIMENSION(nau2) :: auy4 !<200 198 #elif defined( __nec_fft ) 201 199 REAL(wp), DIMENSION(0:nx+3,nz+1) :: work_x !< … … 203 201 REAL(wp), DIMENSION(6*(nx+3),nz+1) :: workx !< 204 202 REAL(wp), DIMENSION(6*(ny+3),nz+1) :: worky !< 205 #endif 203 #endif 206 204 207 205 ! … … 233 231 ! 234 232 !-- Initialize tables for fft along x 235 CALL DRCFT( 1, workx, 1, workx, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1, & 236 aux2, nau2 ) 237 CALL DCRFT( 1, workx, 1, workx, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, & 238 aux4, nau2 ) 233 CALL DRCFT( 1, workx, 1, workx, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1, aux2, nau2 ) 234 CALL DCRFT( 1, workx, 1, workx, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, aux4, nau2 ) 239 235 ! 240 236 !-- Initialize tables for fft along y 241 CALL DRCFT( 1, worky, 1, worky, 1, ny+1, 1, 1, sqr_dny, auy1, nau1, & 242 auy2, nau2 ) 243 CALL DCRFT( 1, worky, 1, worky, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, & 244 auy4, nau2 ) 237 CALL DRCFT( 1, worky, 1, worky, 1, ny+1, 1, 1, sqr_dny, auy1, nau1, auy2, nau2 ) 238 CALL DCRFT( 1, worky, 1, worky, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, auy4, nau2 ) 245 239 #elif defined( __nec_fft ) 246 message_string = 'fft method "' // TRIM( fft_method) // & 247 '" currently does not work on NEC' 240 message_string = 'fft method "' // TRIM( fft_method) // '" currently does not work on NEC' 248 241 CALL message( 'fft_init', 'PA0187', 1, 2, 0, 6, 0 ) 249 242 250 ALLOCATE( trig_xb(2*(nx+1)), trig_xf(2*(nx+1)), & 251 trig_yb(2*(ny+1)), trig_yf(2*(ny+1)) ) 243 ALLOCATE( trig_xb(2*(nx+1)), trig_xf(2*(nx+1)), trig_yb(2*(ny+1)), trig_yf(2*(ny+1)) ) 252 244 253 245 work_x = 0.0_wp … … 260 252 CALL DZFFT( 0, nx+1, sqr_dnx, work_x, work_x, trig_xf, workx, 0 ) 261 253 CALL ZDFFT( 0, nx+1, sqr_dnx, work_x, work_x, trig_xb, workx, 0 ) 262 CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4, & 263 trig_xf, workx, 0 ) 264 CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4, & 265 trig_xb, workx, 0 ) 254 CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4, trig_xf, workx, 0 ) 255 CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4, trig_xb, workx, 0 ) 266 256 ! 267 257 !-- Initialize tables for fft along y (non-vector and vector case (M)) 268 258 CALL DZFFT( 0, ny+1, sqr_dny, work_y, work_y, trig_yf, worky, 0 ) 269 259 CALL ZDFFT( 0, ny+1, sqr_dny, work_y, work_y, trig_yb, worky, 0 ) 270 CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4, & 271 trig_yf, worky, 0 ) 272 CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4, & 273 trig_yb, worky, 0 ) 260 CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4, trig_yf, worky, 0 ) 261 CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4, trig_yb, worky, 0 ) 274 262 #elif defined( __cuda_fft ) 275 263 CALL CUFFTPLAN1D( plan_xf, nx+1, CUFFT_D2Z, (nyn_x-nys_x+1) * (nzt_x-nzb_x+1) ) … … 303 291 ny_c = ny+1 304 292 !$OMP PARALLEL 305 ALLOCATE( x_in(0:nx+2), y_in(0:ny+2), x_out(0:(nx+1)/2), & 306 y_out(0:(ny+1)/2) ) 293 ALLOCATE( x_in(0:nx+2), y_in(0:ny+2), x_out(0:(nx+1)/2), y_out(0:(ny+1)/2) ) 307 294 !$OMP END PARALLEL 308 295 plan_xf = FFTW_PLAN_DFT_R2C_1D( nx_c, x_in, x_out, FFTW_ESTIMATE ) … … 321 308 ELSE 322 309 323 message_string = 'fft method "' // TRIM( fft_method) // & 324 '" not available' 310 message_string = 'fft method "' // TRIM( fft_method) // '" not available' 325 311 CALL message( 'fft_init', 'PA0189', 1, 2, 0, 6, 0 ) 326 312 ENDIF … … 329 315 330 316 331 !------------------------------------------------------------------------------ !317 !--------------------------------------------------------------------------------------------------! 332 318 ! Description: 333 319 ! ------------ 334 !> Fourier-transformation along x-direction. 320 !> Fourier-transformation along x-direction. 335 321 !> Version for 2D-decomposition. 336 !> It uses internal algorithms (Singleton or Temperton) or 337 !> system-specific routines, if they are available338 !------------------------------------------------------------------------------ !339 322 !> It uses internal algorithms (Singleton or Temperton) or system-specific routines, if they are 323 !> available. 324 !--------------------------------------------------------------------------------------------------! 325 340 326 SUBROUTINE fft_x( ar, direction, ar_2d, ar_inv ) 341 327 … … 344 330 345 331 CHARACTER (LEN=*) :: direction !< 346 332 347 333 COMPLEX(wp), DIMENSION(:), ALLOCATABLE :: cwork !< 348 334 349 INTEGER(iwp) :: i !< 335 INTEGER(iwp) :: i !< 350 336 INTEGER(iwp) :: ishape(1) !< 351 337 INTEGER(iwp) :: j !< … … 354 340 355 341 LOGICAL :: forward_fft !< 356 342 357 343 REAL(wp), DIMENSION(0:nx+2) :: work !< 358 344 REAL(wp), DIMENSION(nx+2) :: work1 !< 359 345 360 346 REAL(wp), DIMENSION(:,:), ALLOCATABLE :: work_vec !< 361 347 REAL(wp), DIMENSION(0:nx,nys_x:nyn_x), OPTIONAL :: ar_2d !< 362 348 349 REAL(wp), DIMENSION(0:nx,nys_x:nyn_x,nzb_x:nzt_x) :: ar !< 363 350 REAL(wp), DIMENSION(nys_x:nyn_x,nzb_x:nzt_x,0:nx), OPTIONAL :: ar_inv !< 364 REAL(wp), DIMENSION(0:nx,nys_x:nyn_x,nzb_x:nzt_x) :: ar !<365 351 366 352 #if defined( __ibm ) 367 REAL(wp), DIMENSION(nau2) :: aux2 !< 353 REAL(wp), DIMENSION(nau2) :: aux2 !< 368 354 REAL(wp), DIMENSION(nau2) :: aux4 !< 369 355 #elif defined( __nec_fft ) … … 387 373 388 374 ! 389 !-- Performing the fft with singleton's software works on every system, 390 !-- since it is part of the model375 !-- Performing the fft with singleton's software works on every system, since it is part of 376 !-- the model. 391 377 ALLOCATE( cwork(0:nx) ) 392 393 IF ( forward_fft ) then378 379 IF ( forward_fft ) THEN 394 380 395 381 !$OMP PARALLEL PRIVATE ( cwork, i, ishape, j, k ) … … 425 411 cwork(0) = CMPLX( ar(0,j,k), 0.0_wp, KIND=wp ) 426 412 DO i = 1, (nx+1)/2 - 1 427 cwork(i) = CMPLX( ar(i,j,k), -ar(nx+1-i,j,k), & 428 KIND=wp ) 429 cwork(nx+1-i) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k), & 430 KIND=wp ) 413 cwork(i) = CMPLX( ar(i,j,k), -ar(nx+1-i,j,k), KIND=wp ) 414 cwork(nx+1-i) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k), KIND=wp ) 431 415 ENDDO 432 416 cwork((nx+1)/2) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, KIND=wp ) … … 450 434 451 435 ! 452 !-- Performing the fft with Temperton's software works on every system, 453 !-- since it is part of the model436 !-- Performing the fft with Temperton's software works on every system, since it is part of 437 !-- the model. 454 438 IF ( forward_fft ) THEN 455 439 … … 633 617 x_out(0) = CMPLX( ar_2d(0,j), 0.0_wp, KIND=wp ) 634 618 DO i = 1, (nx+1)/2 - 1 635 x_out(i) = CMPLX( ar_2d(i,j), ar_2d(nx+1-i,j), & 636 KIND=wp ) 637 ENDDO 638 x_out((nx+1)/2) = CMPLX( ar_2d((nx+1)/2,j), 0.0_wp, & 639 KIND=wp ) 619 x_out(i) = CMPLX( ar_2d(i,j), ar_2d(nx+1-i,j), KIND=wp ) 620 ENDDO 621 x_out((nx+1)/2) = CMPLX( ar_2d((nx+1)/2,j), 0.0_wp, KIND=wp ) 640 622 641 623 ELSE … … 645 627 x_out(i) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k), KIND=wp ) 646 628 ENDDO 647 x_out((nx+1)/2) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, & 648 KIND=wp ) 629 x_out((nx+1)/2) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, KIND=wp ) 649 630 650 631 ENDIF … … 670 651 DO j = nys_x, nyn_x 671 652 672 CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1, & 673 nau1, aux2, nau2 ) 653 CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1, aux2, nau2 ) 674 654 675 655 DO i = 0, (nx+1)/2 … … 700 680 work(nx+2) = 0.0_wp 701 681 702 CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx, & 703 aux3, nau1, aux4, nau2 ) 682 CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, aux4, nau2 ) 704 683 705 684 DO i = 0, nx … … 725 704 726 705 CALL DZFFT( 1, nx+1, sqr_dnx, work, work, trig_xf, work2, 0 ) 727 706 728 707 DO i = 0, (nx+1)/2 729 708 ar(i,j,k) = work(2*i) … … 797 776 798 777 DO i = 1, (nx+1)/2 - 1 799 ar_tmp(i,j,k) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k), & 800 KIND=wp ) 801 ENDDO 802 ar_tmp((nx+1)/2,j,k) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, & 803 KIND=wp ) 778 ar_tmp(i,j,k) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k), KIND=wp ) 779 ENDDO 780 ar_tmp((nx+1)/2,j,k) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, KIND=wp ) 804 781 805 782 ENDDO … … 818 795 END SUBROUTINE fft_x 819 796 820 !------------------------------------------------------------------------------ !797 !--------------------------------------------------------------------------------------------------! 821 798 ! Description: 822 799 ! ------------ 823 800 !> Fourier-transformation along x-direction. 824 801 !> Version for 1D-decomposition. 825 !> It uses internal algorithms (Singleton or Temperton) or 826 !> system-specific routines, if they are available827 !------------------------------------------------------------------------------ !828 802 !> It uses internal algorithms (Singleton or Temperton) or system-specific routines, if they are 803 !> available. 804 !--------------------------------------------------------------------------------------------------! 805 829 806 SUBROUTINE fft_x_1d( ar, direction ) 830 807 … … 833 810 834 811 CHARACTER (LEN=*) :: direction !< 835 812 836 813 INTEGER(iwp) :: i !< 837 814 INTEGER(iwp) :: ishape(1) !< … … 842 819 REAL(wp), DIMENSION(0:nx+2) :: work !< 843 820 REAL(wp), DIMENSION(nx+2) :: work1 !< 844 821 845 822 COMPLEX(wp), DIMENSION(:), ALLOCATABLE :: cwork !< 846 823 847 824 #if defined( __ibm ) 848 825 REAL(wp), DIMENSION(nau2) :: aux2 !< … … 861 838 862 839 ! 863 !-- Performing the fft with singleton's software works on every system, 864 !-- since it is part of the model840 !-- Performing the fft with singleton's software works on every system, since it is part of 841 !-- the model. 865 842 ALLOCATE( cwork(0:nx) ) 866 867 IF ( forward_fft ) then843 844 IF ( forward_fft ) THEN 868 845 869 846 DO i = 0, nx … … 902 879 903 880 ! 904 !-- Performing the fft with Temperton's software works on every system, 905 !-- since it is part of the model881 !-- Performing the fft with Temperton's software works on every system, since it is part of 882 !-- the model. 906 883 IF ( forward_fft ) THEN 907 884 … … 966 943 IF ( forward_fft ) THEN 967 944 968 CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1, & 969 aux2, nau2 ) 945 CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1, aux2, nau2 ) 970 946 971 947 DO i = 0, (nx+1)/2 … … 987 963 work(nx+2) = 0.0_wp 988 964 989 CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, & 990 aux4, nau2 ) 965 CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, aux4, nau2 ) 991 966 992 967 DO i = 0, nx … … 1001 976 1002 977 CALL DZFFT( 1, nx+1, sqr_dnx, work, work, trig_xf, work2, 0 ) 1003 978 1004 979 DO i = 0, (nx+1)/2 1005 980 ar(i) = work(2*i) … … 1031 1006 END SUBROUTINE fft_x_1d 1032 1007 1033 !------------------------------------------------------------------------------ !1008 !--------------------------------------------------------------------------------------------------! 1034 1009 ! Description: 1035 1010 ! ------------ 1036 1011 !> Fourier-transformation along y-direction. 1037 1012 !> Version for 2D-decomposition. 1038 !> It uses internal algorithms (Singleton or Temperton) or 1039 !> system-specific routines, if they areavailable.1040 !> 1013 !> It uses internal algorithms (Singleton or Temperton) or system-specific routines, if they are 1014 !> available. 1015 !> 1041 1016 !> direction: 'forward' or 'backward' 1042 !> ar, ar_tr: 3D data arrays 1017 !> ar, ar_tr: 3D data arrays 1043 1018 !> forward: ar: before ar_tr: after transformation 1044 1019 !> backward: ar_tr: before ar: after transfosition 1045 !> 1020 !> 1046 1021 !> In case of non-overlapping transposition/transformation: 1047 !> nxl_y_bound = nxl_y_l = nxl_y 1048 !> nxr_y_bound = nxr_y_l = nxr_y 1049 !> 1022 !> nxl_y_bound = nxl_y_l = nxl_y 1023 !> nxr_y_bound = nxr_y_l = nxr_y 1024 !> 1050 1025 !> In case of overlapping transposition/transformation 1051 !> - nxl_y_bound and nxr_y_bound have the original values of 1052 !> nxl_y, nxr_y. ar_tr is dimensioned using these values. 1053 !> - nxl_y_l = nxr_y_r. ar is dimensioned with these values, so that 1054 !> transformation is carried out for a 2D-plane only. 1055 !------------------------------------------------------------------------------! 1056 1057 SUBROUTINE fft_y( ar, direction, ar_tr, nxl_y_bound, nxr_y_bound, nxl_y_l, & 1058 nxr_y_l, ar_inv ) 1026 !> - nxl_y_bound and nxr_y_bound have the original values of nxl_y, nxr_y. ar_tr is dimensioned 1027 !> using these values. 1028 !> - nxl_y_l = nxr_y_r. ar is dimensioned with these values, so that transformation is carried out 1029 !> for a 2D-plane only. 1030 !--------------------------------------------------------------------------------------------------! 1031 1032 SUBROUTINE fft_y( ar, direction, ar_tr, nxl_y_bound, nxr_y_bound, nxl_y_l, nxr_y_l, ar_inv ) 1059 1033 1060 1034 … … 1062 1036 1063 1037 CHARACTER (LEN=*) :: direction !< 1064 1038 1065 1039 INTEGER(iwp) :: i !< 1066 INTEGER(iwp) :: j !< 1040 INTEGER(iwp) :: j !< 1067 1041 INTEGER(iwp) :: jshape(1) !< 1068 1042 INTEGER(iwp) :: k !< … … 1077 1051 REAL(wp), DIMENSION(0:ny+2) :: work !< 1078 1052 REAL(wp), DIMENSION(ny+2) :: work1 !< 1079 1053 1080 1054 REAL(wp), DIMENSION(:,:), ALLOCATABLE :: f_vec_y 1081 1055 REAL(wp), DIMENSION(:,:), ALLOCATABLE :: work_vec … … 1086 1060 1087 1061 COMPLEX(wp), DIMENSION(:), ALLOCATABLE :: cwork !< 1088 1062 1089 1063 #if defined( __ibm ) 1090 1064 REAL(wp), DIMENSION(nau2) :: auy2 !< … … 1093 1067 REAL(wp), DIMENSION(6*(ny+1)) :: work2 !< 1094 1068 #elif defined( __cuda_fft ) 1095 COMPLEX(dp), DIMENSION(0:(ny+1)/2,nxl_y:nxr_y,nzb_y:nzt_y) :: & 1096 ar_tmp !< 1069 COMPLEX(dp), DIMENSION(0:(ny+1)/2,nxl_y:nxr_y,nzb_y:nzt_y) :: ar_tmp !< 1097 1070 !$ACC DECLARE CREATE(ar_tmp) 1098 1071 #endif … … 1108 1081 1109 1082 ! 1110 !-- Performing the fft with singleton's software works on every system, 1111 !-- since it is part of the model1083 !-- Performing the fft with singleton's software works on every system, since it is part of 1084 !-- the model. 1112 1085 ALLOCATE( cwork(0:ny) ) 1113 1086 1114 IF ( forward_fft ) then1087 IF ( forward_fft ) THEN 1115 1088 1116 1089 !$OMP PARALLEL PRIVATE ( cwork, i, jshape, j, k ) … … 1146 1119 cwork(0) = CMPLX( ar_tr(0,i,k), 0.0_wp, KIND=wp ) 1147 1120 DO j = 1, (ny+1)/2 - 1 1148 cwork(j) = CMPLX( ar_tr(j,i,k), -ar_tr(ny+1-j,i,k), & 1149 KIND=wp ) 1150 cwork(ny+1-j) = CMPLX( ar_tr(j,i,k), ar_tr(ny+1-j,i,k), & 1151 KIND=wp ) 1152 ENDDO 1153 cwork((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0_wp, & 1154 KIND=wp ) 1121 cwork(j) = CMPLX( ar_tr(j,i,k), -ar_tr(ny+1-j,i,k), KIND=wp ) 1122 cwork(ny+1-j) = CMPLX( ar_tr(j,i,k), ar_tr(ny+1-j,i,k), KIND=wp ) 1123 ENDDO 1124 cwork((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0_wp, KIND=wp ) 1155 1125 1156 1126 jshape = SHAPE( cwork ) … … 1172 1142 1173 1143 ! 1174 !-- Performing the fft with Temperton's software works on every system, 1175 !-- since it is part of the model1144 !-- Performing the fft with Temperton's software works on every system, since it is part of 1145 !-- the model. 1176 1146 IF ( forward_fft ) THEN 1177 1147 … … 1361 1331 y_out(0) = CMPLX( ar_tr(0,i,k), 0.0_wp, KIND=wp ) 1362 1332 DO j = 1, (ny+1)/2 - 1 1363 y_out(j) = CMPLX( ar_tr(j,i,k), ar_tr(ny+1-j,i,k), & 1364 KIND=wp ) 1365 ENDDO 1366 y_out((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0_wp, & 1367 KIND=wp ) 1333 y_out(j) = CMPLX( ar_tr(j,i,k), ar_tr(ny+1-j,i,k), KIND=wp ) 1334 ENDDO 1335 y_out((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0_wp, KIND=wp ) 1368 1336 1369 1337 CALL FFTW_EXECUTE_DFT_C2R( plan_yi, y_out, y_in ) … … 1387 1355 DO i = nxl_y_l, nxr_y_l 1388 1356 1389 CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1, & 1390 nau1, auy2, nau2 ) 1357 CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1, nau1, auy2, nau2 ) 1391 1358 1392 1359 DO j = 0, (ny+1)/2 … … 1417 1384 work(ny+2) = 0.0_wp 1418 1385 1419 CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny, & 1420 auy3, nau1, auy4, nau2 ) 1386 CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, auy4, nau2 ) 1421 1387 1422 1388 DO j = 0, ny … … 1491 1457 1492 1458 DO j = 0, (ny+1)/2 1493 ar(j,i,k) = REAL( ar_tmp(j,i,k), KIND=wp ) 1459 ar(j,i,k) = REAL( ar_tmp(j,i,k), KIND=wp ) * dny 1494 1460 ENDDO 1495 1461 … … 1511 1477 1512 1478 DO j = 1, (ny+1)/2 - 1 1513 ar_tmp(j,i,k) = CMPLX( ar(j,i,k), ar(ny+1-j,i,k), & 1514 KIND=wp ) 1515 ENDDO 1516 ar_tmp((ny+1)/2,i,k) = CMPLX( ar((ny+1)/2,i,k), 0.0_wp, & 1517 KIND=wp ) 1479 ar_tmp(j,i,k) = CMPLX( ar(j,i,k), ar(ny+1-j,i,k), KIND=wp ) 1480 ENDDO 1481 ar_tmp((ny+1)/2,i,k) = CMPLX( ar((ny+1)/2,i,k), 0.0_wp, KIND=wp ) 1518 1482 1519 1483 ENDDO … … 1532 1496 END SUBROUTINE fft_y 1533 1497 1534 !------------------------------------------------------------------------------ !1498 !--------------------------------------------------------------------------------------------------! 1535 1499 ! Description: 1536 1500 ! ------------ 1537 1501 !> Fourier-transformation along y-direction. 1538 1502 !> Version for 1D-decomposition. 1539 !> It uses internal algorithms (Singleton or Temperton) or 1540 !> system-specific routines, if they areavailable.1541 !------------------------------------------------------------------------------ !1542 1503 !> It uses internal algorithms (Singleton or Temperton) or system-specific routines, if they are 1504 !> available. 1505 !--------------------------------------------------------------------------------------------------! 1506 1543 1507 SUBROUTINE fft_y_1d( ar, direction ) 1544 1508 … … 1547 1511 1548 1512 CHARACTER (LEN=*) :: direction 1549 1513 1550 1514 INTEGER(iwp) :: j !< 1551 1515 INTEGER(iwp) :: jshape(1) !< … … 1556 1520 REAL(wp), DIMENSION(0:ny+2) :: work !< 1557 1521 REAL(wp), DIMENSION(ny+2) :: work1 !< 1558 1522 1559 1523 COMPLEX(wp), DIMENSION(:), ALLOCATABLE :: cwork !< 1560 1524 1561 1525 #if defined( __ibm ) 1562 1526 REAL(wp), DIMENSION(nau2) :: auy2 !< … … 1575 1539 1576 1540 ! 1577 !-- Performing the fft with singleton's software works on every system, 1578 !-- since it is part of the model1541 !-- Performing the fft with singleton's software works on every system, since it is part of 1542 !-- the model. 1579 1543 ALLOCATE( cwork(0:ny) ) 1580 1544 … … 1618 1582 1619 1583 ! 1620 !-- Performing the fft with Temperton's software works on every system, 1621 !-- since it is part of the model1584 !-- Performing the fft with Temperton's software works on every system, since it is part of 1585 !-- the model. 1622 1586 IF ( forward_fft ) THEN 1623 1587 … … 1682 1646 IF ( forward_fft ) THEN 1683 1647 1684 CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1, nau1, & 1685 auy2, nau2 ) 1648 CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1, nau1, auy2, nau2 ) 1686 1649 1687 1650 DO j = 0, (ny+1)/2 … … 1703 1666 work(ny+2) = 0.0_wp 1704 1667 1705 CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny, auy3, & 1706 nau1, auy4, nau2 ) 1668 CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, auy4, nau2 ) 1707 1669 1708 1670 DO j = 0, ny … … 1747 1709 END SUBROUTINE fft_y_1d 1748 1710 1749 !------------------------------------------------------------------------------ !1711 !--------------------------------------------------------------------------------------------------! 1750 1712 ! Description: 1751 1713 ! ------------ 1752 1714 !> Fourier-transformation along x-direction. 1753 !> Version for 1d domain decomposition 1715 !> Version for 1d domain decomposition, 1754 1716 !> using multiple 1D FFT from Math Keisan on NEC or Temperton-algorithm 1755 !> (no singleton-algorithm on NEC because it does not vectorize) 1756 !------------------------------------------------------------------------------ !1757 1717 !> (no singleton-algorithm on NEC because it does not vectorize). 1718 !--------------------------------------------------------------------------------------------------! 1719 1758 1720 SUBROUTINE fft_x_m( ar, direction ) 1759 1721 … … 1762 1724 1763 1725 CHARACTER (LEN=*) :: direction !< 1764 1726 1765 1727 INTEGER(iwp) :: i !< 1766 1728 INTEGER(iwp) :: k !< … … 1773 1735 REAL(wp), DIMENSION(0:nx+3,nz+1) :: ai !< 1774 1736 REAL(wp), DIMENSION(6*(nx+4),nz+1) :: work1 !< 1775 1737 1776 1738 #if defined( __nec_fft ) 1777 1739 COMPLEX(wp), DIMENSION(:,:), ALLOCATABLE :: work … … 1827 1789 1828 1790 ! 1829 !-- Tables are initialized once more. This call should not be 1830 !-- necessary, but otherwise program aborts in asymmetric case 1831 CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4, & 1832 trig_xf, work1, 0 ) 1791 !-- Tables are initialized once more. This call should not be necessary, but otherwise 1792 !-- program aborts in asymmetric case. 1793 CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4, trig_xf, work1, 0 ) 1833 1794 1834 1795 ai(0:nx,1:nz) = ar(0:nx,1:nz) … … 1837 1798 ENDIF 1838 1799 1839 CALL DZFFTM( 1, nx+1, nz1, sqr_dnx, ai, siza, work, sizw, & 1840 trig_xf, work1, 0 ) 1800 CALL DZFFTM( 1, nx+1, nz1, sqr_dnx, ai, siza, work, sizw, trig_xf, work1, 0 ) 1841 1801 1842 1802 DO k = 1, nz … … 1854 1814 !-- Tables are initialized once more. This call should not be 1855 1815 !-- necessary, but otherwise program aborts in asymmetric case 1856 CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4, & 1857 trig_xb, work1, 0 ) 1816 CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4, trig_xb, work1, 0 ) 1858 1817 1859 1818 IF ( nz1 > nz ) THEN … … 1868 1827 ENDDO 1869 1828 1870 CALL ZDFFTM( -1, nx+1, nz1, sqr_dnx, work, sizw, ai, siza, & 1871 trig_xb, work1, 0 ) 1829 CALL ZDFFTM( -1, nx+1, nz1, sqr_dnx, work, sizw, ai, siza, trig_xb, work1, 0 ) 1872 1830 1873 1831 ar(0:nx,1:nz) = ai(0:nx,1:nz) … … 1882 1840 END SUBROUTINE fft_x_m 1883 1841 1884 !------------------------------------------------------------------------------ !1842 !--------------------------------------------------------------------------------------------------! 1885 1843 ! Description: 1886 1844 ! ------------ 1887 1845 !> Fourier-transformation along y-direction. 1888 !> Version for 1d domain decomposition 1846 !> Version for 1d domain decomposition, 1889 1847 !> using multiple 1D FFT from Math Keisan on NEC or Temperton-algorithm 1890 !> (no singleton-algorithm on NEC because it does not vectorize) 1891 !------------------------------------------------------------------------------ !1892 1848 !> (no singleton-algorithm on NEC because it does not vectorize). 1849 !--------------------------------------------------------------------------------------------------! 1850 1893 1851 SUBROUTINE fft_y_m( ar, ny1, direction ) 1894 1852 … … 1897 1855 1898 1856 CHARACTER (LEN=*) :: direction !< 1899 1900 INTEGER(iwp) :: j !< 1857 1858 INTEGER(iwp) :: j !< 1901 1859 INTEGER(iwp) :: k !< 1902 1860 INTEGER(iwp) :: ny1 !< … … 1964 1922 1965 1923 ! 1966 !-- Tables are initialized once more. This call should not be 1967 !-- necessary, but otherwise program aborts in asymmetric case 1968 CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, & 1969 trig_yf, work1, 0 ) 1924 !-- Tables are initialized once more. This call should not be necessary, but otherwise 1925 !-- program aborts in asymmetric case. 1926 CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, trig_yf, work1, 0 ) 1970 1927 1971 1928 ai(0:ny,1:nz) = ar(0:ny,1:nz) … … 1974 1931 ENDIF 1975 1932 1976 CALL DZFFTM( 1, ny+1, nz1, sqr_dny, ai, siza, work, sizw, & 1977 trig_yf, work1, 0 ) 1933 CALL DZFFTM( 1, ny+1, nz1, sqr_dny, ai, siza, work, sizw, trig_yf, work1, 0 ) 1978 1934 1979 1935 DO k = 1, nz … … 1989 1945 1990 1946 ! 1991 !-- Tables are initialized once more. This call should not be 1992 !-- necessary, but otherwise program aborts in asymmetric case 1993 CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, & 1994 trig_yb, work1, 0 ) 1947 !-- Tables are initialized once more. This call should not be necessary, but otherwise 1948 !-- program aborts in asymmetric case. 1949 CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, trig_yb, work1, 0 ) 1995 1950 1996 1951 IF ( nz1 > nz ) THEN … … 2005 1960 ENDDO 2006 1961 2007 CALL ZDFFTM( -1, ny+1, nz1, sqr_dny, work, sizw, ai, siza, & 2008 trig_yb, work1, 0 ) 1962 CALL ZDFFTM( -1, ny+1, nz1, sqr_dny, work, sizw, ai, siza, trig_yb, work1, 0 ) 2009 1963 2010 1964 ar(0:ny,1:nz) = ai(0:ny,1:nz)
Note: See TracChangeset
for help on using the changeset viewer.