Changeset 4366 for palm/trunk/SOURCE/fft_xy_mod.f90
- Timestamp:
- Jan 9, 2020 8:12:43 AM (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/fft_xy_mod.f90
r4360 r4366 25 25 ! ----------------- 26 26 ! $Id$ 27 ! Vectorized Temperton-fft added 28 ! 29 ! 4360 2020-01-07 11:25:50Z suehring 27 30 ! Corrected "Former revisions" section 28 31 ! … … 41 44 !> Fast Fourier transformation along x and y for 1d domain decomposition along x. 42 45 !> Original version: Klaus Ketelsen (May 2002) 46 !> @todo openmp support for vectorized Temperton fft 43 47 !------------------------------------------------------------------------------! 44 48 MODULE fft_xy … … 46 50 47 51 USE control_parameters, & 48 ONLY: fft_method, message_string52 ONLY: fft_method, loop_optimization, message_string 49 53 50 54 USE cuda_fft_interfaces … … 52 56 USE indices, & 53 57 ONLY: nx, ny, nz 54 58 55 59 #if defined( __cuda_fft ) 56 60 USE ISO_C_BINDING … … 72 76 73 77 PRIVATE 74 PUBLIC fft_x, fft_x_1d, fft_y, fft_y_1d, fft_init, fft_x_m, fft_y_m 78 PUBLIC fft_x, fft_x_1d, fft_y, fft_y_1d, fft_init, fft_x_m, fft_y_m, f_vec, temperton_fft_vec 75 79 76 80 INTEGER(iwp), DIMENSION(:), ALLOCATABLE, SAVE :: ifax_x !< 77 81 INTEGER(iwp), DIMENSION(:), ALLOCATABLE, SAVE :: ifax_y !< 78 82 79 LOGICAL, SAVE :: init_fft = .FALSE. !< 83 LOGICAL, SAVE :: init_fft = .FALSE. !< 84 LOGICAL, SAVE :: temperton_fft_vec = .FALSE. !< 80 85 81 86 REAL(wp), SAVE :: dnx !< … … 86 91 REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE :: trigs_x !< 87 92 REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE :: trigs_y !< 93 94 REAL(wp), DIMENSION(:,:), ALLOCATABLE, SAVE :: f_vec 88 95 89 96 #if defined( __ibm ) … … 200 207 ENDIF 201 208 209 ! 210 !-- Switch to tell the Poisson-solver that the vectorized version of Temperton-fft is to be used. 211 IF ( fft_method == 'temperton-algorithm' .AND. loop_optimization == 'vector' ) THEN 212 temperton_fft_vec = .TRUE. 213 ENDIF 214 215 202 216 #if defined( _OPENACC ) && defined( __cuda_fft ) 203 217 fft_method = 'system-specific' … … 270 284 CALL set99( trigs_y, ifax_y, ny+1 ) 271 285 286 IF ( temperton_fft_vec ) THEN 287 ALLOCATE( f_vec((nyn_x-nys_x+1)*(nzt_x-nzb_x+1),0:nx+2) ) 288 ENDIF 289 290 291 272 292 ELSEIF ( fft_method == 'fftw' ) THEN 273 293 ! … … 312 332 !------------------------------------------------------------------------------! 313 333 314 SUBROUTINE fft_x( ar, direction, ar_2d )334 SUBROUTINE fft_x( ar, direction, ar_2d, ar_inv ) 315 335 316 336 … … 325 345 INTEGER(iwp) :: j !< 326 346 INTEGER(iwp) :: k !< 347 INTEGER(iwp) :: mm !< 327 348 328 349 LOGICAL :: forward_fft !< … … 331 352 REAL(wp), DIMENSION(nx+2) :: work1 !< 332 353 354 REAL(wp), DIMENSION(:,:), ALLOCATABLE :: work_vec !< 355 REAL(wp), DIMENSION(0:nx,nys_x:nyn_x), OPTIONAL :: ar_2d !< 356 357 REAL(wp), DIMENSION(nys_x:nyn_x,nzb_x:nzt_x,0:nx), OPTIONAL :: ar_inv !< 358 REAL(wp), DIMENSION(0:nx,nys_x:nyn_x,nzb_x:nzt_x) :: ar !< 359 333 360 #if defined( __ibm ) 334 361 REAL(wp), DIMENSION(nau2) :: aux2 !< … … 337 364 REAL(wp), DIMENSION(6*(nx+1)) :: work2 !< 338 365 #elif defined( __cuda_fft ) 339 COMPLEX(dp), DIMENSION(0:(nx+1)/2,nys_x:nyn_x,nzb_x:nzt_x) :: & 340 ar_tmp !< 366 COMPLEX(dp), DIMENSION(0:(nx+1)/2,nys_x:nyn_x,nzb_x:nzt_x) :: ar_tmp !< 341 367 !$ACC DECLARE CREATE(ar_tmp) 342 368 #endif 343 344 REAL(wp), DIMENSION(0:nx,nys_x:nyn_x), OPTIONAL :: &345 ar_2d !<346 REAL(wp), DIMENSION(0:nx,nys_x:nyn_x,nzb_x:nzt_x) :: &347 ar !<348 369 349 370 ! … … 427 448 IF ( forward_fft ) THEN 428 449 429 !$OMP PARALLEL PRIVATE ( work, work1, i, j, k ) 430 !$OMP DO 431 DO k = nzb_x, nzt_x 432 DO j = nys_x, nyn_x 433 434 work(0:nx) = ar(0:nx,j,k) 435 CALL fft991cy( work, work1, trigs_x, ifax_x, 1, nx+1, nx+1, 1, -1 ) 436 437 DO i = 0, (nx+1)/2 438 ar(i,j,k) = work(2*i) 439 ENDDO 440 DO i = 1, (nx+1)/2 - 1 441 ar(nx+1-i,j,k) = work(2*i+1) 442 ENDDO 443 444 ENDDO 445 ENDDO 446 !$OMP END PARALLEL 447 448 ELSE 449 450 !$OMP PARALLEL PRIVATE ( work, work1, i, j, k ) 451 !$OMP DO 452 DO k = nzb_x, nzt_x 453 DO j = nys_x, nyn_x 454 455 DO i = 0, (nx+1)/2 456 work(2*i) = ar(i,j,k) 457 ENDDO 458 DO i = 1, (nx+1)/2 - 1 459 work(2*i+1) = ar(nx+1-i,j,k) 460 ENDDO 461 work(1) = 0.0_wp 462 work(nx+2) = 0.0_wp 463 464 CALL fft991cy( work, work1, trigs_x, ifax_x, 1, nx+1, nx+1, 1, 1 ) 465 ar(0:nx,j,k) = work(0:nx) 466 467 ENDDO 468 ENDDO 469 !$OMP END PARALLEL 450 IF ( .NOT. temperton_fft_vec ) THEN 451 452 !$OMP PARALLEL PRIVATE ( work, work1, i, j, k ) 453 !$OMP DO 454 DO k = nzb_x, nzt_x 455 DO j = nys_x, nyn_x 456 457 work(0:nx) = ar(0:nx,j,k) 458 CALL fft991cy( work, work1, trigs_x, ifax_x, 1, nx+1, nx+1, 1, -1 ) 459 460 DO i = 0, (nx+1)/2 461 ar(i,j,k) = work(2*i) 462 ENDDO 463 DO i = 1, (nx+1)/2 - 1 464 ar(nx+1-i,j,k) = work(2*i+1) 465 ENDDO 466 467 ENDDO 468 ENDDO 469 !$OMP END PARALLEL 470 471 ELSE 472 473 ! 474 !-- Vector version of the Temperton-algorithm. Computes multiple 1-D FFT's. 475 ALLOCATE( work_vec( (nyn_x-nys_x+1)*(nzt_x-nzb_x+1),nx+2) ) 476 ! 477 !-- f_vec is already set in transpose_zx 478 CALL fft991cy_vec( f_vec, work_vec, trigs_x, ifax_x, nx+1, -1 ) 479 DEALLOCATE( work_vec ) 480 481 IF ( PRESENT( ar_inv ) ) THEN 482 483 DO k = nzb_x, nzt_x 484 DO j = nys_x, nyn_x 485 mm = j-nys_x+1+(k-nzb_x)*(nyn_x-nys_x+1) 486 DO i = 0, (nx+1)/2 487 ar_inv(j,k,i) = f_vec(mm,2*i) 488 ENDDO 489 DO i = 1, (nx+1)/2-1 490 ar_inv(j,k,nx+1-i) = f_vec(mm,2*i+1) 491 ENDDO 492 ENDDO 493 ENDDO 494 495 ELSE 496 497 DO k = nzb_x, nzt_x 498 DO j = nys_x, nyn_x 499 mm = j-nys_x+1+(k-nzb_x)*(nyn_x-nys_x+1) 500 DO i = 0, (nx+1)/2 501 ar(i,j,k) = f_vec(mm,2*i) 502 ENDDO 503 DO i = 1, (nx+1)/2-1 504 ar(nx+1-i,j,k) = f_vec(mm,2*i+1) 505 ENDDO 506 ENDDO 507 ENDDO 508 509 ENDIF 510 511 ENDIF 512 513 ELSE 514 515 ! 516 !-- Backward fft 517 IF ( .NOT. temperton_fft_vec ) THEN 518 519 !$OMP PARALLEL PRIVATE ( work, work1, i, j, k ) 520 !$OMP DO 521 DO k = nzb_x, nzt_x 522 DO j = nys_x, nyn_x 523 524 DO i = 0, (nx+1)/2 525 work(2*i) = ar(i,j,k) 526 ENDDO 527 DO i = 1, (nx+1)/2 - 1 528 work(2*i+1) = ar(nx+1-i,j,k) 529 ENDDO 530 work(1) = 0.0_wp 531 work(nx+2) = 0.0_wp 532 533 CALL fft991cy( work, work1, trigs_x, ifax_x, 1, nx+1, nx+1, 1, 1 ) 534 ar(0:nx,j,k) = work(0:nx) 535 536 ENDDO 537 ENDDO 538 !$OMP END PARALLEL 539 540 ELSE 541 542 IF ( PRESENT( ar_inv ) ) THEN 543 544 DO k = nzb_x, nzt_x 545 DO j = nys_x, nyn_x 546 mm = j-nys_x+1+(k-nzb_x)*(nyn_x-nys_x+1) 547 DO i = 0, (nx+1)/2 548 f_vec(mm,2*i) = ar_inv(j,k,i) 549 ENDDO 550 DO i = 1, (nx+1)/2-1 551 f_vec(mm,2*i+1) = ar_inv(j,k,nx+1-i) 552 ENDDO 553 ENDDO 554 ENDDO 555 556 ELSE 557 558 DO k = nzb_x, nzt_x 559 DO j = nys_x, nyn_x 560 mm = j-nys_x+1+(k-nzb_x)*(nyn_x-nys_x+1) 561 DO i = 0, (nx+1)/2 562 f_vec(mm,2*i) = ar(i,j,k) 563 ENDDO 564 DO i = 1, (nx+1)/2-1 565 f_vec(mm,2*i+1) = ar(nx+1-i,j,k) 566 ENDDO 567 ENDDO 568 ENDDO 569 570 ENDIF 571 f_vec(:,1) = 0.0_wp 572 f_vec(:,nx+2) = 0.0_wp 573 574 ALLOCATE( work_vec((nyn_x-nys_x+1)*(nzt_x-nzb_x+1),nx+2) ) 575 CALL fft991cy_vec( f_vec, work_vec, trigs_x, ifax_x, nx+1, 1 ) 576 DEALLOCATE( work_vec ) 577 578 ENDIF 470 579 471 580 ENDIF … … 941 1050 942 1051 SUBROUTINE fft_y( ar, direction, ar_tr, nxl_y_bound, nxr_y_bound, nxl_y_l, & 943 nxr_y_l )1052 nxr_y_l, ar_inv ) 944 1053 945 1054 … … 952 1061 INTEGER(iwp) :: jshape(1) !< 953 1062 INTEGER(iwp) :: k !< 1063 INTEGER(iwp) :: mm !< 954 1064 INTEGER(iwp) :: nxl_y_bound !< 955 1065 INTEGER(iwp) :: nxl_y_l !< … … 962 1072 REAL(wp), DIMENSION(ny+2) :: work1 !< 963 1073 1074 REAL(wp), DIMENSION(:,:), ALLOCATABLE :: f_vec 1075 REAL(wp), DIMENSION(:,:), ALLOCATABLE :: work_vec 1076 1077 REAL(wp), DIMENSION(0:ny,nxl_y_l:nxr_y_l,nzb_y:nzt_y) :: ar !< 1078 REAL(wp), DIMENSION(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), OPTIONAL :: ar_inv !< 1079 REAL(wp), DIMENSION(0:ny,nxl_y_bound:nxr_y_bound,nzb_y:nzt_y), OPTIONAL :: ar_tr !< 1080 964 1081 COMPLEX(wp), DIMENSION(:), ALLOCATABLE :: cwork !< 965 1082 … … 975 1092 #endif 976 1093 977 REAL(wp), DIMENSION(0:ny,nxl_y_l:nxr_y_l,nzb_y:nzt_y) :: &978 ar !<979 REAL(wp), DIMENSION(0:ny,nxl_y_bound:nxr_y_bound,nzb_y:nzt_y) :: &980 ar_tr !<981 1094 982 1095 IF ( direction == 'forward' ) THEN … … 1057 1170 IF ( forward_fft ) THEN 1058 1171 1059 !$OMP PARALLEL PRIVATE ( work, work1, i, j, k ) 1060 !$OMP DO 1061 DO k = nzb_y, nzt_y 1062 DO i = nxl_y_l, nxr_y_l 1063 1064 work(0:ny) = ar(0:ny,i,k) 1065 CALL fft991cy( work, work1, trigs_y, ifax_y, 1, ny+1, ny+1, 1, -1 ) 1066 1067 DO j = 0, (ny+1)/2 1068 ar_tr(j,i,k) = work(2*j) 1069 ENDDO 1070 DO j = 1, (ny+1)/2 - 1 1071 ar_tr(ny+1-j,i,k) = work(2*j+1) 1072 ENDDO 1073 1074 ENDDO 1075 ENDDO 1076 !$OMP END PARALLEL 1077 1078 ELSE 1079 1080 !$OMP PARALLEL PRIVATE ( work, work1, i, j, k ) 1081 !$OMP DO 1082 DO k = nzb_y, nzt_y 1083 DO i = nxl_y_l, nxr_y_l 1084 1085 DO j = 0, (ny+1)/2 1086 work(2*j) = ar_tr(j,i,k) 1087 ENDDO 1088 DO j = 1, (ny+1)/2 - 1 1089 work(2*j+1) = ar_tr(ny+1-j,i,k) 1090 ENDDO 1091 work(1) = 0.0_wp 1092 work(ny+2) = 0.0_wp 1093 1094 CALL fft991cy( work, work1, trigs_y, ifax_y, 1, ny+1, ny+1, 1, 1 ) 1095 ar(0:ny,i,k) = work(0:ny) 1096 1097 ENDDO 1098 ENDDO 1099 !$OMP END PARALLEL 1172 IF ( .NOT. temperton_fft_vec ) THEN 1173 1174 !$OMP PARALLEL PRIVATE ( work, work1, i, j, k ) 1175 !$OMP DO 1176 DO k = nzb_y, nzt_y 1177 DO i = nxl_y_l, nxr_y_l 1178 1179 work(0:ny) = ar(0:ny,i,k) 1180 CALL fft991cy( work, work1, trigs_y, ifax_y, 1, ny+1, ny+1, 1, -1 ) 1181 1182 DO j = 0, (ny+1)/2 1183 ar_tr(j,i,k) = work(2*j) 1184 ENDDO 1185 DO j = 1, (ny+1)/2 - 1 1186 ar_tr(ny+1-j,i,k) = work(2*j+1) 1187 ENDDO 1188 1189 ENDDO 1190 ENDDO 1191 !$OMP END PARALLEL 1192 1193 ELSE 1194 ! 1195 !-- Vector version of Temperton-fft. Computes multiple 1-D FFT's. 1196 ALLOCATE( f_vec((nxr_y_l-nxl_y_l+1)*(nzt_y-nzb_y+1),0:ny+2) ) 1197 1198 mm = 1 1199 DO k = nzb_y, nzt_y 1200 DO i = nxl_y_l, nxr_y_l 1201 f_vec(mm,0:nx) = ar(0:nx,i,k) 1202 mm = mm+1 1203 ENDDO 1204 ENDDO 1205 1206 ALLOCATE( work_vec( (nxr_y_l-nxl_y_l+1)*(nzt_y-nzb_y+1),ny+2) ) 1207 CALL fft991cy_vec( f_vec, work_vec, trigs_y, ifax_y, ny+1, -1 ) 1208 DEALLOCATE( work_vec ) 1209 1210 IF( PRESENT( ar_inv ) ) THEN 1211 1212 DO k = nzb_y, nzt_y 1213 DO i = nxl_y_l, nxr_y_l 1214 mm = i-nxl_y_l+1+(k-nzb_y)*(nxr_y_l-nxl_y_l+1) 1215 DO j = 0, (ny+1)/2 1216 ar_inv(i,k,j) = f_vec(mm,2*j) 1217 ENDDO 1218 DO j = 1, (ny+1)/2 - 1 1219 ar_inv(i,k,ny+1-j) = f_vec(mm,2*j+1) 1220 ENDDO 1221 ENDDO 1222 ENDDO 1223 1224 ELSE 1225 1226 DO k = nzb_y, nzt_y 1227 DO i = nxl_y_l, nxr_y_l 1228 mm = i-nxl_y_l+1+(k-nzb_y)*(nxr_y_l-nxl_y_l+1) 1229 DO j = 0, (ny+1)/2 1230 ar(j,i,k) = f_vec(mm,2*j) 1231 ENDDO 1232 DO j = 1, (ny+1)/2 - 1 1233 ar(ny+1-j,i,k) = f_vec(mm,2*j+1) 1234 ENDDO 1235 ENDDO 1236 ENDDO 1237 1238 ENDIF 1239 1240 DEALLOCATE( f_vec ) 1241 1242 ENDIF 1243 1244 ELSE 1245 1246 IF ( .NOT. temperton_fft_vec ) THEN 1247 1248 !$OMP PARALLEL PRIVATE ( work, work1, i, j, k ) 1249 !$OMP DO 1250 DO k = nzb_y, nzt_y 1251 DO i = nxl_y_l, nxr_y_l 1252 1253 DO j = 0, (ny+1)/2 1254 work(2*j) = ar_tr(j,i,k) 1255 ENDDO 1256 DO j = 1, (ny+1)/2 - 1 1257 work(2*j+1) = ar_tr(ny+1-j,i,k) 1258 ENDDO 1259 work(1) = 0.0_wp 1260 work(ny+2) = 0.0_wp 1261 1262 CALL fft991cy( work, work1, trigs_y, ifax_y, 1, ny+1, ny+1, 1, 1 ) 1263 ar(0:ny,i,k) = work(0:ny) 1264 1265 ENDDO 1266 ENDDO 1267 !$OMP END PARALLEL 1268 1269 ELSE 1270 1271 ALLOCATE( f_vec((nxr_y_l-nxl_y_l+1)*(nzt_y-nzb_y+1),0:ny+2) ) 1272 1273 IF ( PRESENT( ar_inv ) ) THEN 1274 1275 DO k = nzb_y, nzt_y 1276 DO i = nxl_y_l, nxr_y_l 1277 mm = i-nxl_y_l+1+(k-nzb_y)*(nxr_y_l-nxl_y_l+1) 1278 DO j = 0, (ny+1)/2 1279 f_vec(mm,2*j) = ar_inv(i,k,j) 1280 ENDDO 1281 DO j = 1, (ny+1)/2 - 1 1282 f_vec(mm,2*j+1) = ar_inv(i,k,ny+1-j) 1283 ENDDO 1284 ENDDO 1285 ENDDO 1286 1287 ELSE 1288 1289 DO k = nzb_y, nzt_y 1290 DO i = nxl_y_l, nxr_y_l 1291 mm = i-nxl_y_l+1+(k-nzb_y)*(nxr_y_l-nxl_y_l+1) 1292 DO j = 0, (ny+1)/2 1293 f_vec(mm,2*j) = ar(j,i,k) 1294 ENDDO 1295 DO j = 1, (ny+1)/2 - 1 1296 f_vec(mm,2*j+1) = ar(ny+1-j,i,k) 1297 ENDDO 1298 ENDDO 1299 ENDDO 1300 1301 ENDIF 1302 1303 f_vec(:,1) = 0.0_wp 1304 f_vec(:,ny+2) = 0.0_wp 1305 1306 ALLOCATE( work_vec((nxr_y_l-nxl_y_l+1)*(nzt_y-nzb_y+1),ny+2) ) 1307 CALL fft991cy_vec( f_vec, work_vec, trigs_y, ifax_y, ny+1, 1 ) 1308 DEALLOCATE( work_vec ) 1309 1310 mm = 1 1311 DO k = nzb_y, nzt_y 1312 DO i = nxl_y_l, nxr_y_l 1313 ar(0:ny,i,k) = f_vec(mm,0:ny) 1314 mm = mm+1 1315 ENDDO 1316 ENDDO 1317 1318 DEALLOCATE( f_vec ) 1319 1320 ENDIF 1100 1321 1101 1322 ENDIF
Note: See TracChangeset
for help on using the changeset viewer.