Changeset 2118 for palm/trunk/SOURCE/fft_xy_mod.f90
- Timestamp:
- Jan 17, 2017 4:38:49 PM (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/fft_xy_mod.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC directives and CUDA-fft related code removed 23 23 ! 24 24 ! Former revisions: … … 31 31 ! 1850 2016-04-08 13:29:27Z maronga 32 32 ! Module renamed 33 !34 33 ! 35 34 ! 1815 2016-04-06 13:49:59Z raasch … … 139 138 ONLY: nx, ny, nz 140 139 141 #if defined( __cuda_fft ) 142 USE ISO_C_BINDING 143 #elif defined( __fftw ) 140 #if defined( __fftw ) 144 141 USE, INTRINSIC :: ISO_C_BINDING 145 142 #endif … … 192 189 REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE :: trig_yf !< 193 190 194 #elif defined( __cuda_fft )195 INTEGER(C_INT), SAVE :: plan_xf !<196 INTEGER(C_INT), SAVE :: plan_xi !<197 INTEGER(C_INT), SAVE :: plan_yf !<198 INTEGER(C_INT), SAVE :: plan_yi !<199 200 INTEGER(iwp), SAVE :: total_points_x_transpo !<201 INTEGER(iwp), SAVE :: total_points_y_transpo !<202 191 #endif 203 192 … … 261 250 SUBROUTINE fft_init 262 251 263 USE cuda_fft_interfaces264 265 252 IMPLICIT NONE 266 253 … … 338 325 CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4, & 339 326 trig_yb, worky, 0 ) 340 #elif defined( __cuda_fft )341 total_points_x_transpo = (nx+1) * (nyn_x-nys_x+1) * (nzt_x-nzb_x+1)342 total_points_y_transpo = (ny+1) * (nxr_y-nxl_y+1) * (nzt_y-nzb_y+1)343 CALL CUFFTPLAN1D( plan_xf, nx+1, CUFFT_D2Z, (nyn_x-nys_x+1) * (nzt_x-nzb_x+1) )344 CALL CUFFTPLAN1D( plan_xi, nx+1, CUFFT_Z2D, (nyn_x-nys_x+1) * (nzt_x-nzb_x+1) )345 CALL CUFFTPLAN1D( plan_yf, ny+1, CUFFT_D2Z, (nxr_y-nxl_y+1) * (nzt_y-nzb_y+1) )346 CALL CUFFTPLAN1D( plan_yi, ny+1, CUFFT_Z2D, (nxr_y-nxl_y+1) * (nzt_y-nzb_y+1) )347 327 #else 348 328 message_string = 'no system-specific fft-call available' … … 403 383 404 384 405 USE cuda_fft_interfaces406 #if defined( __cuda_fft )407 USE ISO_C_BINDING408 #endif409 410 385 IMPLICIT NONE 411 386 … … 429 404 #elif defined( __nec ) 430 405 REAL(wp), DIMENSION(6*(nx+1)) :: work2 !< 431 #elif defined( __cuda_fft )432 COMPLEX(dp), DIMENSION(0:(nx+1)/2,nys_x:nyn_x,nzb_x:nzt_x) :: &433 ar_tmp !<434 ! following does not work for PGI 14.1 -> to be removed later435 ! !$acc declare create( ar_tmp )436 406 #endif 437 407 … … 737 707 738 708 ENDIF 739 740 #elif defined( __cuda_fft )741 742 !$acc data create( ar_tmp )743 IF ( forward_fft ) THEN744 745 !$acc data present( ar )746 CALL CUFFTEXECD2Z( plan_xf, ar, ar_tmp )747 748 !$acc kernels749 DO k = nzb_x, nzt_x750 DO j = nys_x, nyn_x751 752 DO i = 0, (nx+1)/2753 ar(i,j,k) = REAL( ar_tmp(i,j,k), KIND=wp ) * dnx754 ENDDO755 756 DO i = 1, (nx+1)/2 - 1757 ar(nx+1-i,j,k) = AIMAG( ar_tmp(i,j,k) ) * dnx758 ENDDO759 760 ENDDO761 ENDDO762 !$acc end kernels763 !$acc end data764 765 ELSE766 767 !$acc data present( ar )768 !$acc kernels769 DO k = nzb_x, nzt_x770 DO j = nys_x, nyn_x771 772 ar_tmp(0,j,k) = CMPLX( ar(0,j,k), 0.0_wp, KIND=wp )773 774 DO i = 1, (nx+1)/2 - 1775 ar_tmp(i,j,k) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k), &776 KIND=wp )777 ENDDO778 ar_tmp((nx+1)/2,j,k) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, &779 KIND=wp )780 781 ENDDO782 ENDDO783 !$acc end kernels784 785 CALL CUFFTEXECZ2D( plan_xi, ar_tmp, ar )786 !$acc end data787 788 ENDIF789 !$acc end data790 709 791 710 #else … … 1052 971 1053 972 1054 USE cuda_fft_interfaces1055 #if defined( __cuda_fft )1056 USE ISO_C_BINDING1057 #endif1058 1059 973 IMPLICIT NONE 1060 974 … … 1082 996 #elif defined( __nec ) 1083 997 REAL(wp), DIMENSION(6*(ny+1)) :: work2 !< 1084 #elif defined( __cuda_fft )1085 COMPLEX(dp), DIMENSION(0:(ny+1)/2,nxl_y:nxr_y,nzb_y:nzt_y) :: &1086 ar_tmp !<1087 ! following does not work for PGI 14.1 -> to be removed later1088 ! !$acc declare create( ar_tmp )1089 998 #endif 1090 999 … … 1364 1273 1365 1274 ENDIF 1366 #elif defined( __cuda_fft )1367 1368 !$acc data create( ar_tmp )1369 IF ( forward_fft ) THEN1370 1371 !$acc data present( ar )1372 CALL CUFFTEXECD2Z( plan_yf, ar, ar_tmp )1373 1374 !$acc kernels1375 DO k = nzb_y, nzt_y1376 DO i = nxl_y, nxr_y1377 1378 DO j = 0, (ny+1)/21379 ar(j,i,k) = REAL( ar_tmp(j,i,k), KIND=wp ) * dny1380 ENDDO1381 1382 DO j = 1, (ny+1)/2 - 11383 ar(ny+1-j,i,k) = AIMAG( ar_tmp(j,i,k) ) * dny1384 ENDDO1385 1386 ENDDO1387 ENDDO1388 !$acc end kernels1389 !$acc end data1390 1391 ELSE1392 1393 !$acc data present( ar )1394 !$acc kernels1395 DO k = nzb_y, nzt_y1396 DO i = nxl_y, nxr_y1397 1398 ar_tmp(0,i,k) = CMPLX( ar(0,i,k), 0.0_wp, KIND=wp )1399 1400 DO j = 1, (ny+1)/2 - 11401 ar_tmp(j,i,k) = CMPLX( ar(j,i,k), ar(ny+1-j,i,k), &1402 KIND=wp )1403 ENDDO1404 ar_tmp((ny+1)/2,i,k) = CMPLX( ar((ny+1)/2,i,k), 0.0_wp, &1405 KIND=wp )1406 1407 ENDDO1408 ENDDO1409 !$acc end kernels1410 1411 CALL CUFFTEXECZ2D( plan_yi, ar_tmp, ar )1412 !$acc end data1413 1414 ENDIF1415 !$acc end data1416 1417 1275 #else 1418 1276 message_string = 'no system-specific fft-call available'
Note: See TracChangeset
for help on using the changeset viewer.