Changeset 1111 for palm/trunk/SOURCE/poisfft.f90
- Timestamp:
- Mar 8, 2013 11:54:10 PM (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/poisfft.f90
r1107 r1111 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! further openACC porting of non-parallel (MPI) branch: 23 ! tridiagonal routines split into extermal subroutines (instead using CONTAINS), 24 ! no distinction between parallel/non-parallel in poisfft and tridia any more, 25 ! tridia routines moved to end of file because of probable bug in PGI compiler 26 ! (otherwise "invalid device function" is indicated during runtime), 27 ! optimization of tridia routines: constant elements and coefficients of tri are 28 ! stored in seperate arrays ddzuw and tric, last dimension of tri reduced from 5 29 ! to 2, 30 ! poisfft_init is now called internally from poisfft, maketri is called from 31 ! poisfft_init, 32 ! ibc_p_b = 2 removed 23 33 ! 24 34 ! Former revisions: … … 157 167 IMPLICIT NONE 158 168 169 LOGICAL, SAVE :: poisfft_initialized = .FALSE. 170 171 REAL, DIMENSION(:,:), ALLOCATABLE :: ddzuw 172 159 173 PRIVATE 160 174 … … 181 195 SUBROUTINE poisfft_init 182 196 197 USE arrays_3d, ONLY: ddzu_pres, ddzw 198 199 IMPLICIT NONE 200 201 INTEGER :: k 202 203 183 204 CALL fft_init 184 205 206 ALLOCATE( ddzuw(0:nz-1,3) ) 207 208 DO k = 0, nz-1 209 ddzuw(k,1) = ddzu_pres(k+1) * ddzw(k+1) 210 ddzuw(k,2) = ddzu_pres(k+2) * ddzw(k+1) 211 ddzuw(k,3) = -1.0 * & 212 ( ddzu_pres(k+2) * ddzw(k+1) + ddzu_pres(k+1) * ddzw(k+1) ) 213 ENDDO 214 ! 215 !-- Calculate constant coefficients of the tridiagonal matrix 216 #if ! defined ( __check ) 217 CALL maketri 218 #endif 219 220 poisfft_initialized = .TRUE. 221 185 222 END SUBROUTINE poisfft_init 223 186 224 187 225 #if ! defined ( __check ) … … 199 237 CALL cpu_log( log_point_s(3), 'poisfft', 'start' ) 200 238 239 IF ( .NOT. poisfft_initialized ) CALL poisfft_init 240 201 241 ! 202 242 !-- Two-dimensional Fourier Transformation in x- and y-direction. 203 #if defined( __parallel ) 204 IF ( pdims(2) == 1 ) THEN 243 IF ( pdims(2) == 1 .AND. pdims(1) > 1 ) THEN 205 244 206 245 ! … … 217 256 CALL tr_xy_ffty( ar, work, ar ) 218 257 219 ELSEIF ( pdims(1) == 1 ) THEN258 ELSEIF ( pdims(1) == 1 .AND. pdims(2) > 1 ) THEN 220 259 221 260 ! … … 235 274 236 275 ! 237 !-- 2d-domain-decomposition 276 !-- 2d-domain-decomposition or no decomposition (1 PE run) 238 277 !-- Transposition z --> x 239 278 CALL cpu_log( log_point_s(5), 'transpo forward', 'start' ) … … 296 335 ENDIF 297 336 298 #else299 300 !301 !-- Two-dimensional Fourier Transformation along x- and y-direction.302 CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )303 !$acc data copyin( ar, work )304 CALL transpose_zx( ar, work, ar )305 !$acc update host( ar )306 CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )307 308 309 CALL cpu_log( log_point_s(4), 'fft_x', 'start' )310 CALL fft_x( ar, 'forward' )311 CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )312 313 CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )314 CALL transpose_xy( ar, work, ar )315 CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )316 317 CALL cpu_log( log_point_s(7), 'fft_y', 'start' )318 CALL fft_y( ar, 'forward' )319 CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )320 321 !322 !-- Solve the tridiagonal equation system along z323 CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )324 CALL transpose_yz( ar, work, ar )325 CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )326 327 CALL cpu_log( log_point_s(6), 'tridia', 'start' )328 CALL tridia( ar )329 CALL cpu_log( log_point_s(6), 'tridia', 'stop' )330 331 CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )332 CALL transpose_zy( ar, work, ar )333 CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )334 335 !336 !-- Inverse Fourier Transformation.337 CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )338 CALL fft_y( ar, 'backward' )339 CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )340 341 CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )342 CALL transpose_yx( ar, work, ar )343 CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )344 345 CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )346 CALL fft_x( ar, 'backward' )347 CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )348 349 CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )350 CALL transpose_xz( ar, work, ar )351 CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )352 353 !$acc end data354 355 #endif356 357 337 CALL cpu_log( log_point_s(3), 'poisfft', 'stop' ) 358 338 … … 361 341 362 342 363 SUBROUTINE tridia( ar )364 365 !------------------------------------------------------------------------------!366 ! solves the linear system of equations:367 !368 ! -(4 pi^2(i^2/(dx^2*nnx^2)+j^2/(dy^2*nny^2))+369 ! 1/(dzu(k)*dzw(k))+1/(dzu(k-1)*dzw(k)))*p(i,j,k)+370 ! 1/(dzu(k)*dzw(k))*p(i,j,k+1)+1/(dzu(k-1)*dzw(k))*p(i,j,k-1)=d(i,j,k)371 !372 ! by using the Thomas algorithm373 !------------------------------------------------------------------------------!374 375 USE arrays_3d376 377 IMPLICIT NONE378 379 INTEGER :: i, j, k, nnyh380 381 REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1382 REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri383 384 REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)385 386 387 nnyh = (ny+1) / 2388 389 !390 !-- Define constant elements of the tridiagonal matrix.391 !$OMP PARALLEL PRIVATE ( k, i )392 !$OMP DO393 DO k = 0, nz-1394 DO i = nxl_z, nxr_z395 tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)396 tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)397 ENDDO398 ENDDO399 !$OMP END PARALLEL400 401 #if defined( __parallel )402 !403 !-- Repeat for all y-levels.404 !$OMP PARALLEL FIRSTPRIVATE( tri ) PRIVATE ( ar1, j )405 !$OMP DO406 DO j = nys_z, nyn_z407 IF ( j <= nnyh ) THEN408 CALL maketri( j )409 ELSE410 CALL maketri( ny+1-j )411 ENDIF412 CALL split413 CALL substi( j )414 ENDDO415 !$OMP END PARALLEL416 #else417 !418 !-- First y-level.419 CALL maketri( nys_z )420 CALL split421 CALL substi( 0 )422 423 !424 !-- Further y-levels.425 DO j = 1, nnyh - 1426 CALL maketri( j )427 CALL split428 CALL substi( j )429 CALL substi( ny+1-j )430 ENDDO431 CALL maketri( nnyh )432 CALL split433 CALL substi( nnyh+nys )434 #endif435 436 CONTAINS437 438 SUBROUTINE maketri( j )439 440 !------------------------------------------------------------------------------!441 ! Computes the i- and j-dependent component of the matrix442 !------------------------------------------------------------------------------!443 444 USE arrays_3d445 USE constants446 USE control_parameters447 USE grid_variables448 449 IMPLICIT NONE450 451 INTEGER :: i, j, k, nnxh452 REAL :: a, c453 REAL :: ll(nxl_z:nxr_z)454 455 456 nnxh = ( nx + 1 ) / 2457 458 !459 !-- Provide the tridiagonal matrix for solution of the Poisson equation in460 !-- Fourier space. The coefficients are computed following the method of461 !-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan462 !-- Siano's original version by discretizing the Poisson equation,463 !-- before it is Fourier-transformed464 #if defined( __parallel )465 DO i = nxl_z, nxr_z466 IF ( i >= 0 .AND. i <= nnxh ) THEN467 ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &468 REAL( nx+1 ) ) ) / ( dx * dx ) + &469 2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &470 REAL( ny+1 ) ) ) / ( dy * dy )471 ELSE472 ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &473 REAL( nx+1 ) ) ) / ( dx * dx ) + &474 2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &475 REAL( ny+1 ) ) ) / ( dy * dy )476 ENDIF477 DO k = 0,nz-1478 a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)479 c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)480 tri(1,i,k) = a + c - ll(i)481 ENDDO482 ENDDO483 #else484 DO i = 0, nnxh485 ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / REAL( nx+1 ) ) ) / &486 ( dx * dx ) + &487 2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / REAL( ny+1 ) ) ) / &488 ( dy * dy )489 DO k = 0, nz-1490 a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)491 c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)492 tri(1,i,k) = a + c - ll(i)493 IF ( i >= 1 .and. i < nnxh ) THEN494 tri(1,nx+1-i,k) = tri(1,i,k)495 ENDIF496 ENDDO497 ENDDO498 #endif499 IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN500 DO i = nxl_z, nxr_z501 tri(1,i,0) = tri(1,i,0) + tri(2,i,0)502 ENDDO503 ENDIF504 IF ( ibc_p_t == 1 ) THEN505 DO i = nxl_z, nxr_z506 tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)507 ENDDO508 ENDIF509 510 END SUBROUTINE maketri511 512 513 SUBROUTINE substi( j )514 515 !------------------------------------------------------------------------------!516 ! Substitution (Forward and Backward) (Thomas algorithm)517 !------------------------------------------------------------------------------!518 519 USE control_parameters520 521 IMPLICIT NONE522 523 INTEGER :: i, j, k524 525 !526 !-- Forward substitution.527 DO i = nxl_z, nxr_z528 ar1(i,0) = ar(i,j,1)529 ENDDO530 DO k = 1, nz - 1531 DO i = nxl_z, nxr_z532 ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)533 ENDDO534 ENDDO535 536 !537 !-- Backward substitution538 !-- Note, the 1.0E-20 in the denominator is due to avoid divisions539 !-- by zero appearing if the pressure bc is set to neumann at the top of540 !-- the model domain.541 DO i = nxl_z, nxr_z542 ar(i,j,nz) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )543 ENDDO544 DO k = nz-2, 0, -1545 DO i = nxl_z, nxr_z546 ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &547 / tri(4,i,k)548 ENDDO549 ENDDO550 551 !552 !-- Indices i=0, j=0 correspond to horizontally averaged pressure.553 !-- The respective values of ar should be zero at all k-levels if554 !-- acceleration of horizontally averaged vertical velocity is zero.555 IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN556 IF ( j == 0 .AND. nxl_z == 0 ) THEN557 DO k = 1, nz558 ar(nxl_z,j,k) = 0.0559 ENDDO560 ENDIF561 ENDIF562 563 END SUBROUTINE substi564 565 566 SUBROUTINE split567 568 !------------------------------------------------------------------------------!569 ! Splitting of the tridiagonal matrix (Thomas algorithm)570 !------------------------------------------------------------------------------!571 572 IMPLICIT NONE573 574 INTEGER :: i, k575 576 !577 !-- Splitting.578 DO i = nxl_z, nxr_z579 tri(4,i,0) = tri(1,i,0)580 ENDDO581 DO k = 1, nz-1582 DO i = nxl_z, nxr_z583 tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)584 tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)585 ENDDO586 ENDDO587 588 END SUBROUTINE split589 590 END SUBROUTINE tridia591 592 593 #if defined( __parallel )594 343 SUBROUTINE ffty_tr_yx( f_in, work, f_out ) 595 344 … … 706 455 ! 707 456 !-- Transpose array 457 #if defined( __parallel ) 708 458 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 709 459 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) … … 712 462 comm1dx, ierr ) 713 463 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 464 #endif 714 465 715 466 END SUBROUTINE ffty_tr_yx … … 745 496 ! 746 497 !-- Transpose array 498 #if defined( __parallel ) 747 499 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 748 500 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) … … 751 503 comm1dx, ierr ) 752 504 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 505 #endif 753 506 754 507 ! … … 1061 814 ! 1062 815 !-- Transpose array 816 #if defined( __parallel ) 1063 817 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 1064 818 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) … … 1067 821 comm1dy, ierr ) 1068 822 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 823 #endif 1069 824 1070 825 END SUBROUTINE fftx_tr_xy … … 1096 851 ! 1097 852 !-- Transpose array 853 #if defined( __parallel ) 1098 854 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 1099 855 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) … … 1102 858 comm1dy, ierr ) 1103 859 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 860 #endif 1104 861 1105 862 ! … … 1426 1183 ENDDO 1427 1184 ENDDO 1428 IF ( ibc_p_b == 1 .OR. ibc_p_b == 2) THEN1185 IF ( ibc_p_b == 1 ) THEN 1429 1186 DO i = 0, nx 1430 1187 tri(1,i,0) = tri(1,i,0) + tri(2,i,0) … … 1530 1287 END SUBROUTINE tridia_1dd 1531 1288 1532 #endif 1533 #endif 1289 1290 SUBROUTINE tridia( ar ) 1291 1292 !------------------------------------------------------------------------------! 1293 ! solves the linear system of equations: 1294 ! 1295 ! -(4 pi^2(i^2/(dx^2*nnx^2)+j^2/(dy^2*nny^2))+ 1296 ! 1/(dzu(k)*dzw(k))+1/(dzu(k-1)*dzw(k)))*p(i,j,k)+ 1297 ! 1/(dzu(k)*dzw(k))*p(i,j,k+1)+1/(dzu(k-1)*dzw(k))*p(i,j,k-1)=d(i,j,k) 1298 ! 1299 ! by using the Thomas algorithm 1300 !------------------------------------------------------------------------------! 1301 1302 USE arrays_3d 1303 1304 IMPLICIT NONE 1305 1306 INTEGER :: i, j, k 1307 1308 !$acc declare create( tri ) 1309 REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1,2) :: tri 1310 1311 REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz) 1312 1313 1314 CALL split( tri ) 1315 CALL substi( ar, tri ) 1316 1317 END SUBROUTINE tridia 1318 1319 1320 SUBROUTINE maketri 1321 1322 !------------------------------------------------------------------------------! 1323 ! Computes the i- and j-dependent component of the matrix 1324 !------------------------------------------------------------------------------! 1325 1326 USE arrays_3d, ONLY: tric 1327 USE constants 1328 USE control_parameters 1329 USE grid_variables 1330 1331 IMPLICIT NONE 1332 1333 INTEGER :: i, j, k, nnxh, nnyh 1334 1335 !$acc declare create( ll ) 1336 REAL :: ll(nxl_z:nxr_z,nys_z:nyn_z) 1337 1338 1339 nnxh = ( nx + 1 ) / 2 1340 nnyh = ( ny + 1 ) / 2 1341 1342 ! 1343 !-- Provide the constant coefficients of the tridiagonal matrix for solution 1344 !-- of the Poisson equation in Fourier space. 1345 !-- The coefficients are computed following the method of 1346 !-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan 1347 !-- Siano's original version by discretizing the Poisson equation, 1348 !-- before it is Fourier-transformed. 1349 1350 !$acc kernels present( tric ) 1351 !$acc loop vector( 32 ) 1352 DO j = nys_z, nyn_z 1353 DO i = nxl_z, nxr_z 1354 IF ( j >= 0 .AND. j <= nnyh ) THEN 1355 IF ( i >= 0 .AND. i <= nnxh ) THEN 1356 ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / & 1357 REAL( nx+1 ) ) ) / ( dx * dx ) + & 1358 2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / & 1359 REAL( ny+1 ) ) ) / ( dy * dy ) 1360 ELSE 1361 ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / & 1362 REAL( nx+1 ) ) ) / ( dx * dx ) + & 1363 2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / & 1364 REAL( ny+1 ) ) ) / ( dy * dy ) 1365 ENDIF 1366 ELSE 1367 IF ( i >= 0 .AND. i <= nnxh ) THEN 1368 ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / & 1369 REAL( nx+1 ) ) ) / ( dx * dx ) + & 1370 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( ny+1-j ) ) / & 1371 REAL( ny+1 ) ) ) / ( dy * dy ) 1372 ELSE 1373 ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / & 1374 REAL( nx+1 ) ) ) / ( dx * dx ) + & 1375 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( ny+1-j ) ) / & 1376 REAL( ny+1 ) ) ) / ( dy * dy ) 1377 ENDIF 1378 ENDIF 1379 ENDDO 1380 ENDDO 1381 1382 !$acc loop 1383 DO k = 0, nz-1 1384 DO j = nys_z, nyn_z 1385 !$acc loop vector( 32 ) 1386 DO i = nxl_z, nxr_z 1387 tric(i,j,k) = ddzuw(k,3) - ll(i,j) 1388 ENDDO 1389 ENDDO 1390 ENDDO 1391 !$acc end kernels 1392 1393 IF ( ibc_p_b == 1 ) THEN 1394 !$acc kernels present( tric ) 1395 !$acc loop 1396 DO j = nys_z, nyn_z 1397 DO i = nxl_z, nxr_z 1398 tric(i,j,0) = tric(i,j,0) + ddzuw(0,1) 1399 ENDDO 1400 ENDDO 1401 !$acc end kernels 1402 ENDIF 1403 IF ( ibc_p_t == 1 ) THEN 1404 !$acc kernels present( tric ) 1405 !$acc loop 1406 DO j = nys_z, nyn_z 1407 DO i = nxl_z, nxr_z 1408 tric(i,j,nz-1) = tric(i,j,nz-1) + ddzuw(nz-1,2) 1409 ENDDO 1410 ENDDO 1411 !$acc end kernels 1412 ENDIF 1413 1414 END SUBROUTINE maketri 1415 1416 1417 SUBROUTINE substi( ar, tri ) 1418 1419 !------------------------------------------------------------------------------! 1420 ! Substitution (Forward and Backward) (Thomas algorithm) 1421 !------------------------------------------------------------------------------! 1422 1423 USE control_parameters 1424 1425 IMPLICIT NONE 1426 1427 INTEGER :: i, j, k 1428 1429 REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz) 1430 REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1,2) :: tri 1431 1432 !$acc declare create( ar1 ) 1433 REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1) :: ar1 1434 1435 ! 1436 !-- Forward substitution 1437 DO k = 0, nz - 1 1438 !$acc kernels present( ar, tri ) 1439 !$acc loop 1440 DO j = nys_z, nyn_z 1441 DO i = nxl_z, nxr_z 1442 1443 IF ( k == 0 ) THEN 1444 ar1(i,j,k) = ar(i,j,k+1) 1445 ELSE 1446 ar1(i,j,k) = ar(i,j,k+1) - tri(i,j,k,2) * ar1(i,j,k-1) 1447 ENDIF 1448 1449 ENDDO 1450 ENDDO 1451 !$acc end kernels 1452 ENDDO 1453 1454 ! 1455 !-- Backward substitution 1456 !-- Note, the 1.0E-20 in the denominator is due to avoid divisions 1457 !-- by zero appearing if the pressure bc is set to neumann at the top of 1458 !-- the model domain. 1459 DO k = nz-1, 0, -1 1460 !$acc kernels present( ar, tri ) 1461 !$acc loop 1462 DO j = nys_z, nyn_z 1463 DO i = nxl_z, nxr_z 1464 1465 IF ( k == nz-1 ) THEN 1466 ar(i,j,k+1) = ar1(i,j,k) / ( tri(i,j,k,1) + 1.0E-20 ) 1467 ELSE 1468 ar(i,j,k+1) = ( ar1(i,j,k) - ddzuw(k,2) * ar(i,j,k+2) ) & 1469 / tri(i,j,k,1) 1470 ENDIF 1471 ENDDO 1472 ENDDO 1473 !$acc end kernels 1474 ENDDO 1475 1476 ! 1477 !-- Indices i=0, j=0 correspond to horizontally averaged pressure. 1478 !-- The respective values of ar should be zero at all k-levels if 1479 !-- acceleration of horizontally averaged vertical velocity is zero. 1480 IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN 1481 IF ( nys_z == 0 .AND. nxl_z == 0 ) THEN 1482 !$acc kernels loop present( ar ) 1483 DO k = 1, nz 1484 ar(nxl_z,nys_z,k) = 0.0 1485 ENDDO 1486 ENDIF 1487 ENDIF 1488 1489 END SUBROUTINE substi 1490 1491 1492 SUBROUTINE split( tri ) 1493 1494 !------------------------------------------------------------------------------! 1495 ! Splitting of the tridiagonal matrix (Thomas algorithm) 1496 !------------------------------------------------------------------------------! 1497 1498 USE arrays_3d, ONLY: tric 1499 1500 IMPLICIT NONE 1501 1502 INTEGER :: i, j, k 1503 1504 REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1,2) :: tri 1505 1506 ! 1507 !-- Splitting 1508 !$acc kernels present( tri, tric ) 1509 !$acc loop 1510 DO j = nys_z, nyn_z 1511 !$acc loop vector( 32 ) 1512 DO i = nxl_z, nxr_z 1513 tri(i,j,0,1) = tric(i,j,0) 1514 ENDDO 1515 ENDDO 1516 !$acc end kernels 1517 1518 DO k = 1, nz-1 1519 !$acc kernels present( tri, tric ) 1520 !$acc loop 1521 DO j = nys_z, nyn_z 1522 !$acc loop vector( 32 ) 1523 DO i = nxl_z, nxr_z 1524 tri(i,j,k,2) = ddzuw(k,1) / tri(i,j,k-1,1) 1525 tri(i,j,k,1) = tric(i,j,k) - ddzuw(k-1,2) * tri(i,j,k,2) 1526 ENDDO 1527 ENDDO 1528 !$acc end kernels 1529 ENDDO 1530 1531 END SUBROUTINE split 1532 1533 #endif 1534 1534 1535 END MODULE poisfft_mod
Note: See TracChangeset
for help on using the changeset viewer.