Changeset 1212 for palm/trunk/SOURCE/poisfft.f90
- Timestamp:
- Aug 15, 2013 8:46:27 AM (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/poisfft.f90
r1209 r1212 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! tridia routines moved to seperate module tridia_solver 23 23 ! 24 24 ! Former revisions: … … 171 171 USE indices 172 172 USE transpose_indices 173 USE tridia_solver 173 174 174 175 IMPLICIT NONE 175 176 176 177 LOGICAL, SAVE :: poisfft_initialized = .FALSE. 177 178 REAL, DIMENSION(:,:), ALLOCATABLE :: ddzuw179 178 180 179 PRIVATE … … 211 210 CALL fft_init 212 211 213 ALLOCATE( ddzuw(0:nz-1,3) ) 214 215 DO k = 0, nz-1 216 ddzuw(k,1) = ddzu_pres(k+1) * ddzw(k+1) 217 ddzuw(k,2) = ddzu_pres(k+2) * ddzw(k+1) 218 ddzuw(k,3) = -1.0 * & 219 ( ddzu_pres(k+2) * ddzw(k+1) + ddzu_pres(k+1) * ddzw(k+1) ) 220 ENDDO 221 ! 222 !-- Calculate constant coefficients of the tridiagonal matrix 223 #if ! defined ( __check ) 224 CALL maketri 225 #endif 212 CALL tridia_init 226 213 227 214 poisfft_initialized = .TRUE. … … 323 310 !-- Solve the tridiagonal equation system along z 324 311 CALL cpu_log( log_point_s(6), 'tridia', 'start' ) 325 CALL tridia ( ar )312 CALL tridia_substi( ar ) 326 313 CALL cpu_log( log_point_s(6), 'tridia', 'stop' ) 327 314 … … 1091 1078 END SUBROUTINE ffty_tri_ffty 1092 1079 1093 1094 SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )1095 1096 !------------------------------------------------------------------------------!1097 ! Solves the linear system of equations for a 1d-decomposition along x (see1098 ! tridia)1099 !1100 ! Attention: when using the intel compilers older than 12.0, array tri must1101 ! be passed as an argument to the contained subroutines. Otherwise1102 ! addres faults will occur. This feature can be activated with1103 ! cpp-switch __intel111104 ! On NEC, tri should not be passed (except for routine substi_1dd)1105 ! because this causes very bad performance.1106 !------------------------------------------------------------------------------!1107 1108 USE arrays_3d1109 USE control_parameters1110 1111 USE pegrid1112 1113 IMPLICIT NONE1114 1115 INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn1116 1117 REAL :: ddx2, ddy21118 1119 REAL, DIMENSION(0:nx,1:nz) :: ar1120 REAL, DIMENSION(5,0:nx,0:nz-1) :: tri1121 1122 1123 nnyh = ( ny + 1 ) / 21124 1125 !1126 !-- Define constant elements of the tridiagonal matrix.1127 !-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,1128 !-- the exchanged loops create bank conflicts. The following directive1129 !-- prohibits loop exchange and the loops perform much better.1130 ! tn = omp_get_thread_num()1131 ! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()1132 ! CALL local_flush( 120+tn )1133 !CDIR NOLOOPCHG1134 DO k = 0, nz-11135 DO i = 0,nx1136 tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)1137 tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)1138 ENDDO1139 ENDDO1140 ! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()1141 ! CALL local_flush( 120+tn )1142 1143 IF ( j <= nnyh ) THEN1144 #if defined( __intel11 )1145 CALL maketri_1dd( j, tri )1146 #else1147 CALL maketri_1dd( j )1148 1080 #endif 1149 ELSE1150 #if defined( __intel11 )1151 CALL maketri_1dd( ny+1-j, tri )1152 #else1153 CALL maketri_1dd( ny+1-j )1154 #endif1155 ENDIF1156 #if defined( __intel11 )1157 CALL split_1dd( tri )1158 #else1159 CALL split_1dd1160 #endif1161 CALL substi_1dd( ar, tri )1162 1163 CONTAINS1164 1165 #if defined( __intel11 )1166 SUBROUTINE maketri_1dd( j, tri )1167 #else1168 SUBROUTINE maketri_1dd( j )1169 #endif1170 1171 !------------------------------------------------------------------------------!1172 ! computes the i- and j-dependent component of the matrix1173 !------------------------------------------------------------------------------!1174 1175 USE constants1176 1177 IMPLICIT NONE1178 1179 INTEGER :: i, j, k, nnxh1180 REAL :: a, c1181 1182 REAL, DIMENSION(0:nx) :: l1183 1184 #if defined( __intel11 )1185 REAL, DIMENSION(5,0:nx,0:nz-1) :: tri1186 #endif1187 1188 1189 nnxh = ( nx + 1 ) / 21190 !1191 !-- Provide the tridiagonal matrix for solution of the Poisson equation in1192 !-- Fourier space. The coefficients are computed following the method of1193 !-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan1194 !-- Siano's original version by discretizing the Poisson equation,1195 !-- before it is Fourier-transformed1196 DO i = 0, nx1197 IF ( i >= 0 .AND. i <= nnxh ) THEN1198 l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &1199 REAL( nx+1 ) ) ) * ddx2 + &1200 2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &1201 REAL( ny+1 ) ) ) * ddy21202 ELSE1203 l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &1204 REAL( nx+1 ) ) ) * ddx2 + &1205 2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &1206 REAL( ny+1 ) ) ) * ddy21207 ENDIF1208 ENDDO1209 1210 DO k = 0, nz-11211 DO i = 0, nx1212 a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)1213 c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)1214 tri(1,i,k) = a + c - l(i)1215 ENDDO1216 ENDDO1217 IF ( ibc_p_b == 1 ) THEN1218 DO i = 0, nx1219 tri(1,i,0) = tri(1,i,0) + tri(2,i,0)1220 ENDDO1221 ENDIF1222 IF ( ibc_p_t == 1 ) THEN1223 DO i = 0, nx1224 tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)1225 ENDDO1226 ENDIF1227 1228 END SUBROUTINE maketri_1dd1229 1230 1231 #if defined( __intel11 )1232 SUBROUTINE split_1dd( tri )1233 #else1234 SUBROUTINE split_1dd1235 #endif1236 1237 !------------------------------------------------------------------------------!1238 ! Splitting of the tridiagonal matrix (Thomas algorithm)1239 !------------------------------------------------------------------------------!1240 1241 IMPLICIT NONE1242 1243 INTEGER :: i, k1244 1245 #if defined( __intel11 )1246 REAL, DIMENSION(5,0:nx,0:nz-1) :: tri1247 #endif1248 1249 1250 !1251 !-- Splitting1252 DO i = 0, nx1253 tri(4,i,0) = tri(1,i,0)1254 ENDDO1255 DO k = 1, nz-11256 DO i = 0, nx1257 tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)1258 tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)1259 ENDDO1260 ENDDO1261 1262 END SUBROUTINE split_1dd1263 1264 1265 SUBROUTINE substi_1dd( ar, tri )1266 1267 !------------------------------------------------------------------------------!1268 ! Substitution (Forward and Backward) (Thomas algorithm)1269 !------------------------------------------------------------------------------!1270 1271 IMPLICIT NONE1272 1273 INTEGER :: i, k1274 1275 REAL, DIMENSION(0:nx,nz) :: ar1276 REAL, DIMENSION(0:nx,0:nz-1) :: ar11277 REAL, DIMENSION(5,0:nx,0:nz-1) :: tri1278 1279 !1280 !-- Forward substitution1281 DO i = 0, nx1282 ar1(i,0) = ar(i,1)1283 ENDDO1284 DO k = 1, nz-11285 DO i = 0, nx1286 ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)1287 ENDDO1288 ENDDO1289 1290 !1291 !-- Backward substitution1292 !-- Note, the add of 1.0E-20 in the denominator is due to avoid divisions1293 !-- by zero appearing if the pressure bc is set to neumann at the top of1294 !-- the model domain.1295 DO i = 0, nx1296 ar(i,nz) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )1297 ENDDO1298 DO k = nz-2, 0, -11299 DO i = 0, nx1300 ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &1301 / tri(4,i,k)1302 ENDDO1303 ENDDO1304 1305 !1306 !-- Indices i=0, j=0 correspond to horizontally averaged pressure.1307 !-- The respective values of ar should be zero at all k-levels if1308 !-- acceleration of horizontally averaged vertical velocity is zero.1309 IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN1310 IF ( j == 0 ) THEN1311 DO k = 1, nz1312 ar(0,k) = 0.01313 ENDDO1314 ENDIF1315 ENDIF1316 1317 END SUBROUTINE substi_1dd1318 1319 END SUBROUTINE tridia_1dd1320 1321 1322 SUBROUTINE tridia( ar )1323 1324 !------------------------------------------------------------------------------!1325 ! solves the linear system of equations:1326 !1327 ! -(4 pi^2(i^2/(dx^2*nnx^2)+j^2/(dy^2*nny^2))+1328 ! 1/(dzu(k)*dzw(k))+1/(dzu(k-1)*dzw(k)))*p(i,j,k)+1329 ! 1/(dzu(k)*dzw(k))*p(i,j,k+1)+1/(dzu(k-1)*dzw(k))*p(i,j,k-1)=d(i,j,k)1330 !1331 ! by using the Thomas algorithm1332 !------------------------------------------------------------------------------!1333 1334 USE arrays_3d1335 1336 IMPLICIT NONE1337 1338 INTEGER :: i, j, k1339 1340 !$acc declare create( tri )1341 REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1,2) :: tri1342 1343 REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)1344 1345 1346 CALL split( tri )1347 CALL substi( ar, tri )1348 1349 END SUBROUTINE tridia1350 1351 1352 SUBROUTINE maketri1353 1354 !------------------------------------------------------------------------------!1355 ! Computes the i- and j-dependent component of the matrix1356 !------------------------------------------------------------------------------!1357 1358 USE arrays_3d, ONLY: tric1359 USE constants1360 USE control_parameters1361 USE grid_variables1362 1363 IMPLICIT NONE1364 1365 INTEGER :: i, j, k, nnxh, nnyh1366 1367 !$acc declare create( ll )1368 REAL :: ll(nxl_z:nxr_z,nys_z:nyn_z)1369 1370 1371 nnxh = ( nx + 1 ) / 21372 nnyh = ( ny + 1 ) / 21373 1374 !1375 !-- Provide the constant coefficients of the tridiagonal matrix for solution1376 !-- of the Poisson equation in Fourier space.1377 !-- The coefficients are computed following the method of1378 !-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan1379 !-- Siano's original version by discretizing the Poisson equation,1380 !-- before it is Fourier-transformed.1381 1382 !$acc kernels present( tric )1383 !$acc loop vector( 32 )1384 DO j = nys_z, nyn_z1385 DO i = nxl_z, nxr_z1386 IF ( j >= 0 .AND. j <= nnyh ) THEN1387 IF ( i >= 0 .AND. i <= nnxh ) THEN1388 ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &1389 REAL( nx+1 ) ) ) / ( dx * dx ) + &1390 2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &1391 REAL( ny+1 ) ) ) / ( dy * dy )1392 ELSE1393 ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &1394 REAL( nx+1 ) ) ) / ( dx * dx ) + &1395 2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &1396 REAL( ny+1 ) ) ) / ( dy * dy )1397 ENDIF1398 ELSE1399 IF ( i >= 0 .AND. i <= nnxh ) THEN1400 ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &1401 REAL( nx+1 ) ) ) / ( dx * dx ) + &1402 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( ny+1-j ) ) / &1403 REAL( ny+1 ) ) ) / ( dy * dy )1404 ELSE1405 ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &1406 REAL( nx+1 ) ) ) / ( dx * dx ) + &1407 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( ny+1-j ) ) / &1408 REAL( ny+1 ) ) ) / ( dy * dy )1409 ENDIF1410 ENDIF1411 ENDDO1412 ENDDO1413 1414 !$acc loop1415 DO k = 0, nz-11416 DO j = nys_z, nyn_z1417 !$acc loop vector( 32 )1418 DO i = nxl_z, nxr_z1419 tric(i,j,k) = ddzuw(k,3) - ll(i,j)1420 ENDDO1421 ENDDO1422 ENDDO1423 !$acc end kernels1424 1425 IF ( ibc_p_b == 1 ) THEN1426 !$acc kernels present( tric )1427 !$acc loop1428 DO j = nys_z, nyn_z1429 DO i = nxl_z, nxr_z1430 tric(i,j,0) = tric(i,j,0) + ddzuw(0,1)1431 ENDDO1432 ENDDO1433 !$acc end kernels1434 ENDIF1435 IF ( ibc_p_t == 1 ) THEN1436 !$acc kernels present( tric )1437 !$acc loop1438 DO j = nys_z, nyn_z1439 DO i = nxl_z, nxr_z1440 tric(i,j,nz-1) = tric(i,j,nz-1) + ddzuw(nz-1,2)1441 ENDDO1442 ENDDO1443 !$acc end kernels1444 ENDIF1445 1446 END SUBROUTINE maketri1447 1448 1449 SUBROUTINE substi( ar, tri )1450 1451 !------------------------------------------------------------------------------!1452 ! Substitution (Forward and Backward) (Thomas algorithm)1453 !------------------------------------------------------------------------------!1454 1455 USE control_parameters1456 1457 IMPLICIT NONE1458 1459 INTEGER :: i, j, k1460 1461 REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)1462 REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1,2) :: tri1463 1464 !$acc declare create( ar1 )1465 REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1) :: ar11466 1467 !1468 !-- Forward substitution1469 DO k = 0, nz - 11470 !$acc kernels present( ar, tri )1471 !$acc loop1472 DO j = nys_z, nyn_z1473 DO i = nxl_z, nxr_z1474 1475 IF ( k == 0 ) THEN1476 ar1(i,j,k) = ar(i,j,k+1)1477 ELSE1478 ar1(i,j,k) = ar(i,j,k+1) - tri(i,j,k,2) * ar1(i,j,k-1)1479 ENDIF1480 1481 ENDDO1482 ENDDO1483 !$acc end kernels1484 ENDDO1485 1486 !1487 !-- Backward substitution1488 !-- Note, the 1.0E-20 in the denominator is due to avoid divisions1489 !-- by zero appearing if the pressure bc is set to neumann at the top of1490 !-- the model domain.1491 DO k = nz-1, 0, -11492 !$acc kernels present( ar, tri )1493 !$acc loop1494 DO j = nys_z, nyn_z1495 DO i = nxl_z, nxr_z1496 1497 IF ( k == nz-1 ) THEN1498 ar(i,j,k+1) = ar1(i,j,k) / ( tri(i,j,k,1) + 1.0E-20 )1499 ELSE1500 ar(i,j,k+1) = ( ar1(i,j,k) - ddzuw(k,2) * ar(i,j,k+2) ) &1501 / tri(i,j,k,1)1502 ENDIF1503 ENDDO1504 ENDDO1505 !$acc end kernels1506 ENDDO1507 1508 !1509 !-- Indices i=0, j=0 correspond to horizontally averaged pressure.1510 !-- The respective values of ar should be zero at all k-levels if1511 !-- acceleration of horizontally averaged vertical velocity is zero.1512 IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN1513 IF ( nys_z == 0 .AND. nxl_z == 0 ) THEN1514 !$acc kernels loop present( ar )1515 DO k = 1, nz1516 ar(nxl_z,nys_z,k) = 0.01517 ENDDO1518 ENDIF1519 ENDIF1520 1521 END SUBROUTINE substi1522 1523 1524 SUBROUTINE split( tri )1525 1526 !------------------------------------------------------------------------------!1527 ! Splitting of the tridiagonal matrix (Thomas algorithm)1528 !------------------------------------------------------------------------------!1529 1530 USE arrays_3d, ONLY: tric1531 1532 IMPLICIT NONE1533 1534 INTEGER :: i, j, k1535 1536 REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1,2) :: tri1537 1538 !1539 !-- Splitting1540 !$acc kernels present( tri, tric )1541 !$acc loop1542 DO j = nys_z, nyn_z1543 !$acc loop vector( 32 )1544 DO i = nxl_z, nxr_z1545 tri(i,j,0,1) = tric(i,j,0)1546 ENDDO1547 ENDDO1548 !$acc end kernels1549 1550 DO k = 1, nz-11551 !$acc kernels present( tri, tric )1552 !$acc loop1553 DO j = nys_z, nyn_z1554 !$acc loop vector( 32 )1555 DO i = nxl_z, nxr_z1556 tri(i,j,k,2) = ddzuw(k,1) / tri(i,j,k-1,1)1557 tri(i,j,k,1) = tric(i,j,k) - ddzuw(k-1,2) * tri(i,j,k,2)1558 ENDDO1559 ENDDO1560 !$acc end kernels1561 ENDDO1562 1563 END SUBROUTINE split1564 1565 #endif1566 1081 1567 1082 END MODULE poisfft_mod
Note: See TracChangeset
for help on using the changeset viewer.