Changeset 1221
- Timestamp:
- Sep 10, 2013 8:59:13 AM (11 years ago)
- Location:
- palm/trunk
- Files:
-
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/INSTALL/MAKE.inc.pgi.openacc
r1171 r1221 6 6 F90=pgf90 7 7 COPT= -Mpreprocess -D__nopointer -D__openacc -D__cuda_fft -D__lc 8 F90FLAGS= -acc -ta=nvidia,5.0 -Minfo=acc -Mcray=pointer -fastsse -r8 -Mcuda=cuda5.09 LDFLAGS= -acc -ta=nvidia,5.0 -Minfo=acc -Mcray=pointer -fastsse -r8 -Mcuda=cuda5.0 -L/muksoft/packages/pgi/2013-133/linux86-64/2013/cuda/5.0/lib64 -lcufft8 F90FLAGS= -acc -ta=nvidia,5.0,nocache,time -Minfo=acc -Mcray=pointer -fastsse -r8 -Mcuda=cuda5.0 9 LDFLAGS= -acc -ta=nvidia,5.0,nocache,time -Minfo=acc -Mcray=pointer -fastsse -r8 -Mcuda=cuda5.0 -L/muksoft/packages/pgi/2013-133/linux86-64/2013/cuda/5.0/lib64 -lcufft -
palm/trunk/SCRIPTS/palm_simple_run
r1172 r1221 24 24 # ----------------- 25 25 # $Id$ 26 # 27 # 1172 2013-05-30 11:46:00Z raasch 28 # for performance reasons set PGI_ACC_SYNCHRONOUS=1 for pgi/openacc execution 26 29 # 27 30 # 1171 2013-05-30 11:27:45Z raasch … … 165 168 (sgi-mpt) mpiexec_mpt -np $mpi_procs ./palm < runfile_atmos;; 166 169 (hpc-flow) mpiexec -machinefile $TMPDIR/machines -n $mpi_procs -env I_MPI_FABRICS shm:ofa ./palm < runfile_atmos;; 167 (pgi-openacc) ./palm;;170 (pgi-openacc) export PGI_ACC_SYNCHRONOUS=1; ./palm;; 168 171 (*) echo "+++ -e option to define execution command is missing";; 169 172 -
palm/trunk/SOURCE/advec_ws.f90
r1132 r1221 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! wall_flags_00 introduced, which holds bits 32-... 23 23 ! 24 24 ! Former revisions: … … 1648 1648 1649 1649 DO k = nzb+1, nzb_max 1650 ibit32 = IBITS(wall_flags_0 (k,j,i),32,1)1650 ibit32 = IBITS(wall_flags_00(k,j,i),0,1) 1651 1651 ibit31 = IBITS(wall_flags_0(k,j,i),31,1) 1652 1652 ibit30 = IBITS(wall_flags_0(k,j,i),30,1) … … 1808 1808 ) 1809 1809 1810 ibit32 = IBITS(wall_flags_0 (k,j,i),32,1)1810 ibit32 = IBITS(wall_flags_00(k,j,i),0,1) 1811 1811 ibit31 = IBITS(wall_flags_0(k,j,i),31,1) 1812 1812 ibit30 = IBITS(wall_flags_0(k,j,i),30,1) … … 1845 1845 !-- k index has to be modified near bottom and top, else array 1846 1846 !-- subscripts will be exceeded. 1847 ibit35 = IBITS(wall_flags_0 (k,j,i),35,1)1848 ibit34 = IBITS(wall_flags_0 (k,j,i),34,1)1849 ibit33 = IBITS(wall_flags_0 (k,j,i),33,1)1847 ibit35 = IBITS(wall_flags_00(k,j,i),3,1) 1848 ibit34 = IBITS(wall_flags_00(k,j,i),2,1) 1849 ibit33 = IBITS(wall_flags_00(k,j,i),1,1) 1850 1850 1851 1851 k_ppp = k + 3 * ibit35 … … 1942 1942 !-- k index has to be modified near bottom and top, else array 1943 1943 !-- subscripts will be exceeded. 1944 ibit35 = IBITS(wall_flags_0 (k,j,i),35,1)1945 ibit34 = IBITS(wall_flags_0 (k,j,i),34,1)1946 ibit33 = IBITS(wall_flags_0 (k,j,i),33,1)1944 ibit35 = IBITS(wall_flags_00(k,j,i),3,1) 1945 ibit34 = IBITS(wall_flags_00(k,j,i),2,1) 1946 ibit33 = IBITS(wall_flags_00(k,j,i),1,1) 1947 1947 1948 1948 k_ppp = k + 3 * ibit35 … … 2454 2454 ! 2455 2455 !-- Computation of fluxes and tendency terms 2456 !$acc kernels present( ddzw, sk, tend, u, v, w, wall_flags_0 )2456 !$acc kernels present( ddzw, sk, tend, u, v, w, wall_flags_0, wall_flags_00 ) 2457 2457 !$acc loop 2458 2458 DO i = i_left, i_right … … 3155 3155 ! 3156 3156 !-- Computation of fluxes and tendency terms 3157 !$acc kernels present( ddzw, tend, u, v, w, wall_flags_0 )3157 !$acc kernels present( ddzw, tend, u, v, w, wall_flags_0, wall_flags_00 ) 3158 3158 !$acc loop 3159 3159 DO i = i_left, i_right … … 3872 3872 ! 3873 3873 !-- Computation of fluxes and tendency terms 3874 !$acc kernels present( ddzw, tend, u, v, w, wall_flags_0 )3874 !$acc kernels present( ddzw, tend, u, v, w, wall_flags_0, wall_flags_00 ) 3875 3875 !$acc loop 3876 3876 DO i = i_left, i_right … … 4226 4226 DO k = nzb+1, nzb_max 4227 4227 4228 ibit32 = IBITS(wall_flags_0 (k,j,i),32,1)4228 ibit32 = IBITS(wall_flags_00(k,j,i),0,1) 4229 4229 ibit31 = IBITS(wall_flags_0(k,j,i),31,1) 4230 4230 ibit30 = IBITS(wall_flags_0(k,j,i),30,1) … … 4327 4327 ) 4328 4328 4329 ibit32 = IBITS(wall_flags_0 (k,j,i),32,1)4329 ibit32 = IBITS(wall_flags_00(k,j,i),0,1) 4330 4330 ibit31 = IBITS(wall_flags_0(k,j,i),31,1) 4331 4331 ibit30 = IBITS(wall_flags_0(k,j,i),30,1) … … 4364 4364 !-- k index has to be modified near bottom and top, else array 4365 4365 !-- subscripts will be exceeded. 4366 ibit35 = IBITS(wall_flags_0 (k,j,i),35,1)4367 ibit34 = IBITS(wall_flags_0 (k,j,i),34,1)4368 ibit33 = IBITS(wall_flags_0 (k,j,i),33,1)4366 ibit35 = IBITS(wall_flags_00(k,j,i),3,1) 4367 ibit34 = IBITS(wall_flags_00(k,j,i),2,1) 4368 ibit33 = IBITS(wall_flags_00(k,j,i),1,1) 4369 4369 4370 4370 k_ppp = k + 3 * ibit35 … … 4463 4463 !-- k index has to be modified near bottom and top, else array 4464 4464 !-- subscripts will be exceeded. 4465 ibit35 = IBITS(wall_flags_0 (k,j,i),35,1)4466 ibit34 = IBITS(wall_flags_0 (k,j,i),34,1)4467 ibit33 = IBITS(wall_flags_0 (k,j,i),33,1)4465 ibit35 = IBITS(wall_flags_00(k,j,i),3,1) 4466 ibit34 = IBITS(wall_flags_00(k,j,i),2,1) 4467 ibit33 = IBITS(wall_flags_00(k,j,i),1,1) 4468 4468 4469 4469 k_ppp = k + 3 * ibit35 … … 4565 4565 gv = 2.0 * v_gtrans 4566 4566 4567 4567 4568 ! 4568 4569 !-- Computation of fluxes and tendency terms 4569 !$acc kernels present( ddzu, tend, u, v, w, wall_flags_0 )4570 !$acc kernels present( ddzu, tend, u, v, w, wall_flags_0, wall_flags_00 ) 4570 4571 !$acc loop 4571 4572 DO i = i_left, i_right … … 4574 4575 DO k = nzb+1, nzt 4575 4576 4577 ibit27 = IBITS(wall_flags_0(k,j,i),27,1) 4578 ibit28 = IBITS(wall_flags_0(k,j,i),28,1) 4576 4579 ibit29 = IBITS(wall_flags_0(k,j,i),29,1) 4577 ibit28 = IBITS(wall_flags_0(k,j,i),28,1)4578 ibit27 = IBITS(wall_flags_0(k,j,i),27,1)4579 4580 4580 4581 4581 u_comp_l = u(k+1,j,i) + u(k,j,i) - gu … … 4640 4640 ( w(k,j,i+3) - w(k,j,i-2) ) & 4641 4641 ) 4642 4643 ibit32 = IBITS(wall_flags_0(k,j,i),32,1) 4642 ibit32 = IBITS(wall_flags_00(k,j,i),0,1) 4644 4643 ibit31 = IBITS(wall_flags_0(k,j,i),31,1) 4645 4644 ibit30 = IBITS(wall_flags_0(k,j,i),30,1) … … 4707 4706 ) 4708 4707 4709 4710 ibit35 = IBITS(wall_flags_0(k-1,j,i),35,1) 4711 ibit34 = IBITS(wall_flags_0(k-1,j,i),34,1) 4712 ibit33 = IBITS(wall_flags_0(k-1,j,i),33,1) 4708 ibit35 = IBITS(wall_flags_00(k-1,j,i),3,1) 4709 ibit34 = IBITS(wall_flags_00(k-1,j,i),2,1) 4710 ibit33 = IBITS(wall_flags_00(k-1,j,i),1,1) 4713 4711 4714 4712 k_pp = k + 2 * ibit35 … … 4750 4748 !-- k index has to be modified near bottom and top, else array 4751 4749 !-- subscripts will be exceeded. 4752 ibit35 = IBITS(wall_flags_0 (k,j,i),35,1)4753 ibit34 = IBITS(wall_flags_0 (k,j,i),34,1)4754 ibit33 = IBITS(wall_flags_0 (k,j,i),33,1)4750 ibit35 = IBITS(wall_flags_00(k,j,i),3,1) 4751 ibit34 = IBITS(wall_flags_00(k,j,i),2,1) 4752 ibit33 = IBITS(wall_flags_00(k,j,i),1,1) 4755 4753 4756 4754 k_ppp = k + 3 * ibit35 -
palm/trunk/SOURCE/flow_statistics.f90
r1182 r1221 1 #if ! defined( __openacc ) 1 2 SUBROUTINE flow_statistics 2 3 … … 20 21 ! Current revisions: 21 22 ! ----------------- 22 ! 23 ! ported for openACC in separate branch 23 24 ! 24 25 ! Former revisions: … … 64 65 ! scheme. Furthermore the calculation will be the same for all advection 65 66 ! schemes. 66 ! 67 !, tend 67 68 ! 696 2011-03-18 07:03:49Z raasch 68 69 ! Bugfix: Summation of Wicker-Skamarock scheme fluxes and variances for all … … 182 183 CALL cpu_log( log_point(10), 'flow_statistics', 'start' ) 183 184 185 !$acc update host( km, kh, e, pt, qs, qsws, rif, shf, ts, u, usws, v, vsws, w ) 186 184 187 ! 185 188 !-- To be on the safe side, check whether flow_statistics has already been … … 192 195 193 196 ENDIF 194 195 !$acc update host( km, kh, e, pt, qs, qsws, rif, shf, ts, u, v, w )196 197 197 198 ! … … 1321 1322 1322 1323 END SUBROUTINE flow_statistics 1324 1325 1326 #else 1327 1328 1329 !------------------------------------------------------------------------------! 1330 ! flow statistics - accelerator version 1331 !------------------------------------------------------------------------------! 1332 SUBROUTINE flow_statistics 1333 1334 USE arrays_3d 1335 USE cloud_parameters 1336 USE control_parameters 1337 USE cpulog 1338 USE grid_variables 1339 USE indices 1340 USE interfaces 1341 USE pegrid 1342 USE statistics 1343 1344 IMPLICIT NONE 1345 1346 INTEGER :: i, j, k, omp_get_thread_num, sr, tn 1347 LOGICAL :: first 1348 REAL :: dptdz_threshold, height, pts, sums_l_eper, sums_l_etot, ust, & 1349 ust2, u2, vst, vst2, v2, w2, z_i(2) 1350 REAL :: s1, s2, s3, s4, s5, s6, s7 1351 REAL :: dptdz(nzb+1:nzt+1) 1352 REAL :: sums_ll(nzb:nzt+1,2) 1353 1354 CALL cpu_log( log_point(10), 'flow_statistics', 'start' ) 1355 1356 ! 1357 !-- To be on the safe side, check whether flow_statistics has already been 1358 !-- called once after the current time step 1359 IF ( flow_statistics_called ) THEN 1360 1361 message_string = 'flow_statistics is called two times within one ' // & 1362 'timestep' 1363 CALL message( 'flow_statistics', 'PA0190', 1, 2, 0, 6, 0 ) 1364 1365 ENDIF 1366 1367 !$acc data copyin( hom ) create( sums, sums_l ) 1368 1369 ! 1370 !-- Compute statistics for each (sub-)region 1371 DO sr = 0, statistic_regions 1372 1373 ! 1374 !-- Initialize (local) summation array 1375 sums_l = 0.0 1376 1377 ! 1378 !-- Store sums that have been computed in other subroutines in summation 1379 !-- array 1380 sums_l(:,11,:) = sums_l_l(:,sr,:) ! mixing length from diffusivities 1381 !-- WARNING: next line still has to be adjusted for OpenMP 1382 sums_l(:,21,0) = sums_wsts_bc_l(:,sr) ! heat flux from advec_s_bc 1383 sums_l(nzb+9,pr_palm,0) = sums_divold_l(sr) ! old divergence from pres 1384 sums_l(nzb+10,pr_palm,0) = sums_divnew_l(sr) ! new divergence from pres 1385 1386 ! 1387 !-- Copy the turbulent quantities, evaluated in the advection routines to 1388 !-- the local array sums_l() for further computations 1389 IF ( ws_scheme_mom .AND. sr == 0 ) THEN 1390 1391 ! 1392 !-- According to the Neumann bc for the horizontal velocity components, 1393 !-- the corresponding fluxes has to satisfiy the same bc. 1394 IF ( ocean ) THEN 1395 sums_us2_ws_l(nzt+1,:) = sums_us2_ws_l(nzt,:) 1396 sums_vs2_ws_l(nzt+1,:) = sums_vs2_ws_l(nzt,:) 1397 ENDIF 1398 1399 DO i = 0, threads_per_task-1 1400 ! 1401 !-- Swap the turbulent quantities evaluated in advec_ws. 1402 sums_l(:,13,i) = sums_wsus_ws_l(:,i) ! w*u* 1403 sums_l(:,15,i) = sums_wsvs_ws_l(:,i) ! w*v* 1404 sums_l(:,30,i) = sums_us2_ws_l(:,i) ! u*2 1405 sums_l(:,31,i) = sums_vs2_ws_l(:,i) ! v*2 1406 sums_l(:,32,i) = sums_ws2_ws_l(:,i) ! w*2 1407 sums_l(:,34,i) = sums_l(:,34,i) + 0.5 * & 1408 ( sums_us2_ws_l(:,i) + sums_vs2_ws_l(:,i) + & 1409 sums_ws2_ws_l(:,i) ) ! e* 1410 DO k = nzb, nzt 1411 sums_l(nzb+5,pr_palm,i) = sums_l(nzb+5,pr_palm,i) + 0.5 * ( & 1412 sums_us2_ws_l(k,i) + & 1413 sums_vs2_ws_l(k,i) + & 1414 sums_ws2_ws_l(k,i) ) 1415 ENDDO 1416 ENDDO 1417 1418 ENDIF 1419 1420 IF ( ws_scheme_sca .AND. sr == 0 ) THEN 1421 1422 DO i = 0, threads_per_task-1 1423 sums_l(:,17,i) = sums_wspts_ws_l(:,i) ! w*pt* from advec_s_ws 1424 IF ( ocean ) sums_l(:,66,i) = sums_wssas_ws_l(:,i) ! w*sa* 1425 IF ( humidity .OR. passive_scalar ) sums_l(:,49,i) = & 1426 sums_wsqs_ws_l(:,i) !w*q* 1427 ENDDO 1428 1429 ENDIF 1430 ! 1431 !-- Horizontally averaged profiles of horizontal velocities and temperature. 1432 !-- They must have been computed before, because they are already required 1433 !-- for other horizontal averages. 1434 tn = 0 1435 1436 !$OMP PARALLEL PRIVATE( i, j, k, tn ) 1437 #if defined( __intel_openmp_bug ) 1438 tn = omp_get_thread_num() 1439 #else 1440 !$ tn = omp_get_thread_num() 1441 #endif 1442 1443 !$acc update device( sums_l ) 1444 1445 !$OMP DO 1446 !$acc parallel loop gang present( pt, rflags_invers, rmask, sums_l, u, v ) create( s1, s2, s3 ) 1447 DO k = nzb, nzt+1 1448 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3 ) 1449 DO i = nxl, nxr 1450 DO j = nys, nyn 1451 ! 1452 !-- k+1 is used in rflags since rflags is set 0 at surface points 1453 s1 = s1 + u(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1454 s2 = s2 + v(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1455 s3 = s3 + pt(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1456 ENDDO 1457 ENDDO 1458 sums_l(k,1,tn) = s1 1459 sums_l(k,2,tn) = s2 1460 sums_l(k,4,tn) = s3 1461 ENDDO 1462 !$acc end parallel 1463 1464 ! 1465 !-- Horizontally averaged profile of salinity 1466 IF ( ocean ) THEN 1467 !$OMP DO 1468 !$acc parallel loop gang present( rflags_invers, rmask, sums_l, sa ) create( s1 ) 1469 DO k = nzb, nzt+1 1470 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 1471 DO i = nxl, nxr 1472 DO j = nys, nyn 1473 s1 = s1 + sa(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1474 ENDDO 1475 ENDDO 1476 sums_l(k,23,tn) = s1 1477 ENDDO 1478 !$acc end parallel 1479 ENDIF 1480 1481 ! 1482 !-- Horizontally averaged profiles of virtual potential temperature, 1483 !-- total water content, specific humidity and liquid water potential 1484 !-- temperature 1485 IF ( humidity ) THEN 1486 1487 !$OMP DO 1488 !$acc parallel loop gang present( q, rflags_invers, rmask, sums_l, vpt ) create( s1, s2 ) 1489 DO k = nzb, nzt+1 1490 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 ) 1491 DO i = nxl, nxr 1492 DO j = nys, nyn 1493 s1 = s1 + q(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1494 s2 = s2 + vpt(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1495 ENDDO 1496 ENDDO 1497 sums_l(k,41,tn) = s1 1498 sums_l(k,44,tn) = s2 1499 ENDDO 1500 !$acc end parallel 1501 1502 IF ( cloud_physics ) THEN 1503 !$OMP DO 1504 !$acc parallel loop gang present( pt, q, ql, rflags_invers, rmask, sums_l ) create( s1, s2 ) 1505 DO k = nzb, nzt+1 1506 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 ) 1507 DO i = nxl, nxr 1508 DO j = nys, nyn 1509 s1 = s1 + ( q(k,j,i) - ql(k,j,i) ) * & 1510 rmask(j,i,sr) * rflags_invers(j,i,k+1) 1511 s2 = s2 + ( pt(k,j,i) + l_d_cp*pt_d_t(k) * ql(k,j,i) ) * & 1512 rmask(j,i,sr) * rflags_invers(j,i,k+1) 1513 ENDDO 1514 ENDDO 1515 sums_l(k,42,tn) = s1 1516 sums_l(k,43,tn) = s2 1517 ENDDO 1518 !$acc end parallel 1519 ENDIF 1520 ENDIF 1521 1522 ! 1523 !-- Horizontally averaged profiles of passive scalar 1524 IF ( passive_scalar ) THEN 1525 !$OMP DO 1526 !$acc parallel loop gang present( q, rflags_invers, rmask, sums_l ) create( s1 ) 1527 DO k = nzb, nzt+1 1528 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 1529 DO i = nxl, nxr 1530 DO j = nys, nyn 1531 s1 = s1 + q(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1532 ENDDO 1533 ENDDO 1534 sums_l(k,41,tn) = s1 1535 ENDDO 1536 !$acc end parallel 1537 ENDIF 1538 !$OMP END PARALLEL 1539 1540 ! 1541 !-- Summation of thread sums 1542 IF ( threads_per_task > 1 ) THEN 1543 DO i = 1, threads_per_task-1 1544 !$acc parallel present( sums_l ) 1545 sums_l(:,1,0) = sums_l(:,1,0) + sums_l(:,1,i) 1546 sums_l(:,2,0) = sums_l(:,2,0) + sums_l(:,2,i) 1547 sums_l(:,4,0) = sums_l(:,4,0) + sums_l(:,4,i) 1548 !$acc end parallel 1549 IF ( ocean ) THEN 1550 !$acc parallel present( sums_l ) 1551 sums_l(:,23,0) = sums_l(:,23,0) + sums_l(:,23,i) 1552 !$acc end parallel 1553 ENDIF 1554 IF ( humidity ) THEN 1555 !$acc parallel present( sums_l ) 1556 sums_l(:,41,0) = sums_l(:,41,0) + sums_l(:,41,i) 1557 sums_l(:,44,0) = sums_l(:,44,0) + sums_l(:,44,i) 1558 !$acc end parallel 1559 IF ( cloud_physics ) THEN 1560 !$acc parallel present( sums_l ) 1561 sums_l(:,42,0) = sums_l(:,42,0) + sums_l(:,42,i) 1562 sums_l(:,43,0) = sums_l(:,43,0) + sums_l(:,43,i) 1563 !$acc end parallel 1564 ENDIF 1565 ENDIF 1566 IF ( passive_scalar ) THEN 1567 !$acc parallel present( sums_l ) 1568 sums_l(:,41,0) = sums_l(:,41,0) + sums_l(:,41,i) 1569 !$acc end parallel 1570 ENDIF 1571 ENDDO 1572 ENDIF 1573 1574 #if defined( __parallel ) 1575 ! 1576 !-- Compute total sum from local sums 1577 !$acc update host( sums_l ) 1578 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 1579 CALL MPI_ALLREDUCE( sums_l(nzb,1,0), sums(nzb,1), nzt+2-nzb, MPI_REAL, & 1580 MPI_SUM, comm2d, ierr ) 1581 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 1582 CALL MPI_ALLREDUCE( sums_l(nzb,2,0), sums(nzb,2), nzt+2-nzb, MPI_REAL, & 1583 MPI_SUM, comm2d, ierr ) 1584 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 1585 CALL MPI_ALLREDUCE( sums_l(nzb,4,0), sums(nzb,4), nzt+2-nzb, MPI_REAL, & 1586 MPI_SUM, comm2d, ierr ) 1587 IF ( ocean ) THEN 1588 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 1589 CALL MPI_ALLREDUCE( sums_l(nzb,23,0), sums(nzb,23), nzt+2-nzb, & 1590 MPI_REAL, MPI_SUM, comm2d, ierr ) 1591 ENDIF 1592 IF ( humidity ) THEN 1593 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 1594 CALL MPI_ALLREDUCE( sums_l(nzb,44,0), sums(nzb,44), nzt+2-nzb, & 1595 MPI_REAL, MPI_SUM, comm2d, ierr ) 1596 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 1597 CALL MPI_ALLREDUCE( sums_l(nzb,41,0), sums(nzb,41), nzt+2-nzb, & 1598 MPI_REAL, MPI_SUM, comm2d, ierr ) 1599 IF ( cloud_physics ) THEN 1600 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 1601 CALL MPI_ALLREDUCE( sums_l(nzb,42,0), sums(nzb,42), nzt+2-nzb, & 1602 MPI_REAL, MPI_SUM, comm2d, ierr ) 1603 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 1604 CALL MPI_ALLREDUCE( sums_l(nzb,43,0), sums(nzb,43), nzt+2-nzb, & 1605 MPI_REAL, MPI_SUM, comm2d, ierr ) 1606 ENDIF 1607 ENDIF 1608 1609 IF ( passive_scalar ) THEN 1610 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 1611 CALL MPI_ALLREDUCE( sums_l(nzb,41,0), sums(nzb,41), nzt+2-nzb, & 1612 MPI_REAL, MPI_SUM, comm2d, ierr ) 1613 ENDIF 1614 !$acc update device( sums ) 1615 #else 1616 !$acc parallel present( sums, sums_l ) 1617 sums(:,1) = sums_l(:,1,0) 1618 sums(:,2) = sums_l(:,2,0) 1619 sums(:,4) = sums_l(:,4,0) 1620 !$acc end parallel 1621 IF ( ocean ) THEN 1622 !$acc parallel present( sums, sums_l ) 1623 sums(:,23) = sums_l(:,23,0) 1624 !$acc end parallel 1625 ENDIF 1626 IF ( humidity ) THEN 1627 !$acc parallel present( sums, sums_l ) 1628 sums(:,44) = sums_l(:,44,0) 1629 sums(:,41) = sums_l(:,41,0) 1630 !$acc end parallel 1631 IF ( cloud_physics ) THEN 1632 !$acc parallel present( sums, sums_l ) 1633 sums(:,42) = sums_l(:,42,0) 1634 sums(:,43) = sums_l(:,43,0) 1635 !$acc end parallel 1636 ENDIF 1637 ENDIF 1638 IF ( passive_scalar ) THEN 1639 !$acc parallel present( sums, sums_l ) 1640 sums(:,41) = sums_l(:,41,0) 1641 !$acc end parallel 1642 ENDIF 1643 #endif 1644 1645 ! 1646 !-- Final values are obtained by division by the total number of grid points 1647 !-- used for summation. After that store profiles. 1648 !$acc parallel present( hom, ngp_2dh, ngp_2dh_s_inner, sums ) 1649 sums(:,1) = sums(:,1) / ngp_2dh(sr) 1650 sums(:,2) = sums(:,2) / ngp_2dh(sr) 1651 sums(:,4) = sums(:,4) / ngp_2dh_s_inner(:,sr) 1652 hom(:,1,1,sr) = sums(:,1) ! u 1653 hom(:,1,2,sr) = sums(:,2) ! v 1654 hom(:,1,4,sr) = sums(:,4) ! pt 1655 !$acc end parallel 1656 1657 ! 1658 !-- Salinity 1659 IF ( ocean ) THEN 1660 !$acc parallel present( hom, ngp_2dh_s_inner, sums ) 1661 sums(:,23) = sums(:,23) / ngp_2dh_s_inner(:,sr) 1662 hom(:,1,23,sr) = sums(:,23) ! sa 1663 !$acc end parallel 1664 ENDIF 1665 1666 ! 1667 !-- Humidity and cloud parameters 1668 IF ( humidity ) THEN 1669 !$acc parallel present( hom, ngp_2dh_s_inner, sums ) 1670 sums(:,44) = sums(:,44) / ngp_2dh_s_inner(:,sr) 1671 sums(:,41) = sums(:,41) / ngp_2dh_s_inner(:,sr) 1672 hom(:,1,44,sr) = sums(:,44) ! vpt 1673 hom(:,1,41,sr) = sums(:,41) ! qv (q) 1674 !$acc end parallel 1675 IF ( cloud_physics ) THEN 1676 !$acc parallel present( hom, ngp_2dh_s_inner, sums ) 1677 sums(:,42) = sums(:,42) / ngp_2dh_s_inner(:,sr) 1678 sums(:,43) = sums(:,43) / ngp_2dh_s_inner(:,sr) 1679 hom(:,1,42,sr) = sums(:,42) ! qv 1680 hom(:,1,43,sr) = sums(:,43) ! pt 1681 !$acc end parallel 1682 ENDIF 1683 ENDIF 1684 1685 ! 1686 !-- Passive scalar 1687 IF ( passive_scalar ) THEN 1688 !$acc parallel present( hom, ngp_2dh_s_inner, sums ) 1689 sums(:,41) = sums(:,41) / ngp_2dh_s_inner(:,sr) 1690 hom(:,1,41,sr) = sums(:,41) ! s (q) 1691 !$acc end parallel 1692 ENDIF 1693 1694 ! 1695 !-- Horizontally averaged profiles of the remaining prognostic variables, 1696 !-- variances, the total and the perturbation energy (single values in last 1697 !-- column of sums_l) and some diagnostic quantities. 1698 !-- NOTE: for simplicity, nzb_s_inner is used below, although strictly 1699 !-- ---- speaking the following k-loop would have to be split up and 1700 !-- rearranged according to the staggered grid. 1701 !-- However, this implies no error since staggered velocity components 1702 !-- are zero at the walls and inside buildings. 1703 tn = 0 1704 #if defined( __intel_openmp_bug ) 1705 !$OMP PARALLEL PRIVATE( i, j, k, pts, sums_ll, sums_l_eper, sums_l_etot, & 1706 !$OMP tn, ust, ust2, u2, vst, vst2, v2, w2 ) 1707 tn = omp_get_thread_num() 1708 #else 1709 !$OMP PARALLEL PRIVATE( i, j, k, pts, sums_ll, sums_l_eper, sums_l_etot, tn, ust, ust2, u2, vst, vst2, v2, w2 ) 1710 !$ tn = omp_get_thread_num() 1711 #endif 1712 !$OMP DO 1713 !$acc parallel loop gang present( e, hom, kh, km, p, pt, w, rflags_invers, rmask, sums_l ) create( s1, s2, s3, s4, s5, s6, s7 ) 1714 DO k = nzb, nzt+1 1715 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4, s5, s6, s7 ) 1716 DO i = nxl, nxr 1717 DO j = nys, nyn 1718 ! 1719 !-- Prognostic and diagnostic variables 1720 s1 = s1 + w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1721 s2 = s2 + e(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1722 s3 = s3 + km(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1723 s4 = s4 + kh(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1724 s5 = s5 + p(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1725 s6 = s6 + ( pt(k,j,i)-hom(k,1,4,sr) )**2 * rmask(j,i,sr) * & 1726 rflags_invers(j,i,k+1) 1727 ! 1728 !-- Higher moments 1729 !-- (Computation of the skewness of w further below) 1730 s7 = s7 + w(k,j,i)**3 * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1731 ENDDO 1732 ENDDO 1733 sums_l(k,3,tn) = s1 1734 sums_l(k,8,tn) = s2 1735 sums_l(k,9,tn) = s3 1736 sums_l(k,10,tn) = s4 1737 sums_l(k,40,tn) = s5 1738 sums_l(k,33,tn) = s6 1739 sums_l(k,38,tn) = s7 1740 ENDDO 1741 !$acc end parallel 1742 1743 IF ( humidity ) THEN 1744 !$OMP DO 1745 !$acc parallel loop gang present( hom, q, rflags_invers, rmask, sums_l ) create( s1 ) 1746 DO k = nzb, nzt+1 1747 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 1748 DO i = nxl, nxr 1749 DO j = nys, nyn 1750 s1 = s1 + ( q(k,j,i)-hom(k,1,41,sr) )**2 * rmask(j,i,sr) * & 1751 rflags_invers(j,i,k+1) 1752 ENDDO 1753 ENDDO 1754 sums_l(k,70,tn) = s1 1755 ENDDO 1756 !$acc end parallel 1757 ENDIF 1758 1759 ! 1760 !-- Total and perturbation energy for the total domain (being 1761 !-- collected in the last column of sums_l). 1762 !$OMP DO 1763 !$acc parallel loop collapse(3) present( rflags_invers, rmask, u, v, w ) reduction(+:s1) 1764 DO i = nxl, nxr 1765 DO j = nys, nyn 1766 DO k = nzb, nzt+1 1767 s1 = s1 + 0.5 * ( u(k,j,i)**2 + v(k,j,i)**2 + w(k,j,i)**2 ) * & 1768 rmask(j,i,sr) * rflags_invers(j,i,k+1) 1769 ENDDO 1770 ENDDO 1771 ENDDO 1772 !$acc end parallel 1773 !$acc parallel present( sums_l ) 1774 sums_l(nzb+4,pr_palm,tn) = s1 1775 !$acc end parallel 1776 1777 !$OMP DO 1778 !$acc parallel present( rmask, sums_l, us, usws, vsws, ts ) create( s1, s2, s3, s4 ) 1779 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4 ) 1780 DO i = nxl, nxr 1781 DO j = nys, nyn 1782 ! 1783 !-- 2D-arrays (being collected in the last column of sums_l) 1784 s1 = s1 + us(j,i) * rmask(j,i,sr) 1785 s2 = s2 + usws(j,i) * rmask(j,i,sr) 1786 s3 = s3 + vsws(j,i) * rmask(j,i,sr) 1787 s4 = s4 + ts(j,i) * rmask(j,i,sr) 1788 ENDDO 1789 ENDDO 1790 sums_l(nzb,pr_palm,tn) = s1 1791 sums_l(nzb+1,pr_palm,tn) = s2 1792 sums_l(nzb+2,pr_palm,tn) = s3 1793 sums_l(nzb+3,pr_palm,tn) = s4 1794 !$acc end parallel 1795 1796 IF ( humidity ) THEN 1797 !$acc parallel present( qs, rmask, sums_l ) create( s1 ) 1798 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 1799 DO i = nxl, nxr 1800 DO j = nys, nyn 1801 s1 = s1 + qs(j,i) * rmask(j,i,sr) 1802 ENDDO 1803 ENDDO 1804 sums_l(nzb+12,pr_palm,tn) = s1 1805 !$acc end parallel 1806 ENDIF 1807 1808 ! 1809 !-- Computation of statistics when ws-scheme is not used. Else these 1810 !-- quantities are evaluated in the advection routines. 1811 IF ( .NOT. ws_scheme_mom .OR. sr /= 0 ) THEN 1812 1813 !$OMP DO 1814 !$acc parallel loop gang present( u, v, w, rflags_invers, rmask, sums_l ) create( s1, s2, s3, s4, ust2, vst2, w2 ) 1815 DO k = nzb, nzt+1 1816 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4 ) 1817 DO i = nxl, nxr 1818 DO j = nys, nyn 1819 ust2 = ( u(k,j,i) - hom(k,1,1,sr) )**2 1820 vst2 = ( v(k,j,i) - hom(k,1,2,sr) )**2 1821 w2 = w(k,j,i)**2 1822 1823 s1 = s1 + ust2 * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1824 s2 = s2 + vst2 * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1825 s3 = s3 + w2 * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1826 ! 1827 !-- Perturbation energy 1828 s4 = s4 + 0.5 * ( ust2 + vst2 + w2 ) * rmask(j,i,sr) * & 1829 rflags_invers(j,i,k+1) 1830 ENDDO 1831 ENDDO 1832 sums_l(k,30,tn) = s1 1833 sums_l(k,31,tn) = s2 1834 sums_l(k,32,tn) = s3 1835 sums_l(k,34,tn) = s4 1836 ENDDO 1837 !$acc end parallel 1838 ! 1839 !-- Total perturbation TKE 1840 !$OMP DO 1841 !$acc parallel present( sums_l ) create( s1 ) 1842 !$acc loop reduction( +: s1 ) 1843 DO k = nzb, nzt+1 1844 s1 = s1 + sums_l(k,34,tn) 1845 ENDDO 1846 sums_l(nzb+5,pr_palm,tn) = s1 1847 !$acc end parallel 1848 1849 ENDIF 1850 1851 ! 1852 !-- Horizontally averaged profiles of the vertical fluxes 1853 1854 ! 1855 !-- Subgridscale fluxes. 1856 !-- WARNING: If a Prandtl-layer is used (k=nzb for flat terrain), the fluxes 1857 !-- ------- should be calculated there in a different way. This is done 1858 !-- in the next loop further below, where results from this loop are 1859 !-- overwritten. However, THIS WORKS IN CASE OF FLAT TERRAIN ONLY! 1860 !-- The non-flat case still has to be handled. 1861 !-- NOTE: for simplicity, nzb_s_inner is used below, although 1862 !-- ---- strictly speaking the following k-loop would have to be 1863 !-- split up according to the staggered grid. 1864 !-- However, this implies no error since staggered velocity 1865 !-- components are zero at the walls and inside buildings. 1866 !$OMP DO 1867 !$acc parallel loop gang present( ddzu, kh, km, pt, u, v, w, rflags_invers, rmask, sums_l ) create( s1, s2, s3 ) 1868 DO k = nzb, nzt_diff 1869 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3 ) 1870 DO i = nxl, nxr 1871 DO j = nys, nyn 1872 1873 ! 1874 !-- Momentum flux w"u" 1875 s1 = s1 - 0.25 * ( & 1876 km(k,j,i)+km(k+1,j,i)+km(k,j,i-1)+km(k+1,j,i-1) & 1877 ) * ( & 1878 ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1) & 1879 + ( w(k,j,i) - w(k,j,i-1) ) * ddx & 1880 ) & 1881 * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1882 ! 1883 !-- Momentum flux w"v" 1884 s2 = s2 - 0.25 * ( & 1885 km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) & 1886 ) * ( & 1887 ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1) & 1888 + ( w(k,j,i) - w(k,j-1,i) ) * ddy & 1889 ) & 1890 * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1891 ! 1892 !-- Heat flux w"pt" 1893 s3 = s3 - 0.5 * ( kh(k,j,i) + kh(k+1,j,i) ) & 1894 * ( pt(k+1,j,i) - pt(k,j,i) ) & 1895 * ddzu(k+1) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1896 ENDDO 1897 ENDDO 1898 sums_l(k,12,tn) = s1 1899 sums_l(k,14,tn) = s2 1900 sums_l(k,16,tn) = s3 1901 ENDDO 1902 !$acc end parallel 1903 1904 ! 1905 !-- Salinity flux w"sa" 1906 IF ( ocean ) THEN 1907 !$acc parallel loop gang present( ddzu, kh, sa, rflags_invers, rmask, sums_l ) create( s1 ) 1908 DO k = nzb, nzt_diff 1909 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 1910 DO i = nxl, nxr 1911 DO j = nys, nyn 1912 s1 = s1 - 0.5 * ( kh(k,j,i) + kh(k+1,j,i) ) & 1913 * ( sa(k+1,j,i) - sa(k,j,i) ) & 1914 * ddzu(k+1) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1915 ENDDO 1916 ENDDO 1917 sums_l(k,65,tn) = s1 1918 ENDDO 1919 !$acc end parallel 1920 ENDIF 1921 1922 ! 1923 !-- Buoyancy flux, water flux (humidity flux) w"q" 1924 IF ( humidity ) THEN 1925 1926 !$acc parallel loop gang present( ddzu, kh, q, vpt, rflags_invers, rmask, sums_l ) create( s1, s2 ) 1927 DO k = nzb, nzt_diff 1928 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 ) 1929 DO i = nxl, nxr 1930 DO j = nys, nyn 1931 s1 = s1 - 0.5 * ( kh(k,j,i) + kh(k+1,j,i) ) & 1932 * ( vpt(k+1,j,i) - vpt(k,j,i) ) & 1933 * ddzu(k+1) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1934 s2 = s2 - 0.5 * ( kh(k,j,i) + kh(k+1,j,i) ) & 1935 * ( q(k+1,j,i) - q(k,j,i) ) & 1936 * ddzu(k+1) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1937 ENDDO 1938 ENDDO 1939 sums_l(k,45,tn) = s1 1940 sums_l(k,48,tn) = s2 1941 ENDDO 1942 !$acc end parallel 1943 1944 IF ( cloud_physics ) THEN 1945 1946 !$acc parallel loop gang present( ddzu, kh, q, ql, rflags_invers, rmask, sums_l ) create( s1 ) 1947 DO k = nzb, nzt_diff 1948 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 1949 DO i = nxl, nxr 1950 DO j = nys, nyn 1951 s1 = s1 - 0.5 * ( kh(k,j,i) + kh(k+1,j,i) ) & 1952 * ( ( q(k+1,j,i) - ql(k+1,j,i) ) & 1953 - ( q(k,j,i) - ql(k,j,i) ) ) & 1954 * ddzu(k+1) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1955 ENDDO 1956 ENDDO 1957 sums_l(k,51,tn) = s1 1958 ENDDO 1959 !$acc end parallel 1960 1961 ENDIF 1962 1963 ENDIF 1964 ! 1965 !-- Passive scalar flux 1966 IF ( passive_scalar ) THEN 1967 1968 !$acc parallel loop gang present( ddzu, kh, q, rflags_invers, rmask, sums_l ) create( s1 ) 1969 DO k = nzb, nzt_diff 1970 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 1971 DO i = nxl, nxr 1972 DO j = nys, nyn 1973 s1 = s1 - 0.5 * ( kh(k,j,i) + kh(k+1,j,i) ) & 1974 * ( q(k+1,j,i) - q(k,j,i) ) & 1975 * ddzu(k+1) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 1976 ENDDO 1977 ENDDO 1978 sums_l(k,48,tn) = s1 1979 ENDDO 1980 !$acc end parallel 1981 1982 ENDIF 1983 1984 IF ( use_surface_fluxes ) THEN 1985 1986 !$OMP DO 1987 !$acc parallel present( rmask, shf, sums_l, usws, vsws ) create( s1, s2, s3, s4, s5 ) 1988 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4, s5 ) 1989 DO i = nxl, nxr 1990 DO j = nys, nyn 1991 ! 1992 !-- Subgridscale fluxes in the Prandtl layer 1993 s1 = s1 + usws(j,i) * rmask(j,i,sr) ! w"u" 1994 s2 = s2 + vsws(j,i) * rmask(j,i,sr) ! w"v" 1995 s3 = s3 + shf(j,i) * rmask(j,i,sr) ! w"pt" 1996 s4 = s4 + 0.0 * rmask(j,i,sr) ! u"pt" 1997 s5 = s5 + 0.0 * rmask(j,i,sr) ! v"pt" 1998 ENDDO 1999 ENDDO 2000 sums_l(nzb,12,tn) = s1 2001 sums_l(nzb,14,tn) = s2 2002 sums_l(nzb,16,tn) = s3 2003 sums_l(nzb,58,tn) = s4 2004 sums_l(nzb,61,tn) = s5 2005 !$acc end parallel 2006 2007 IF ( ocean ) THEN 2008 2009 !$OMP DO 2010 !$acc parallel present( rmask, saswsb, sums_l ) create( s1 ) 2011 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2012 DO i = nxl, nxr 2013 DO j = nys, nyn 2014 s1 = s1 + saswsb(j,i) * rmask(j,i,sr) ! w"sa" 2015 ENDDO 2016 ENDDO 2017 sums_l(nzb,65,tn) = s1 2018 !$acc end parallel 2019 2020 ENDIF 2021 2022 IF ( humidity ) THEN 2023 2024 !$OMP DO 2025 !$acc parallel present( pt, q, qsws, rmask, shf, sums_l ) create( s1, s2 ) 2026 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 ) 2027 DO i = nxl, nxr 2028 DO j = nys, nyn 2029 s1 = s1 + qsws(j,i) * rmask(j,i,sr) ! w"q" (w"qv") 2030 s2 = s2 + ( ( 1.0 + 0.61 * q(nzb,j,i) ) * shf(j,i) & 2031 + 0.61 * pt(nzb,j,i) * qsws(j,i) ) 2032 ENDDO 2033 ENDDO 2034 sums_l(nzb,48,tn) = s1 2035 sums_l(nzb,45,tn) = s2 2036 !$acc end parallel 2037 2038 IF ( cloud_droplets ) THEN 2039 2040 !$OMP DO 2041 !$acc parallel present( pt, q, ql, qsws, rmask, shf, sums_l ) create( s1 ) 2042 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2043 DO i = nxl, nxr 2044 DO j = nys, nyn 2045 s1 = s1 + ( ( 1.0 + 0.61 * q(nzb,j,i) - ql(nzb,j,i) ) * & 2046 shf(j,i) + 0.61 * pt(nzb,j,i) * qsws(j,i) ) 2047 ENDDO 2048 ENDDO 2049 sums_l(nzb,45,tn) = s1 2050 !$acc end parallel 2051 2052 ENDIF 2053 2054 IF ( cloud_physics ) THEN 2055 2056 !$OMP DO 2057 !$acc parallel present( qsws, rmask, sums_l ) create( s1 ) 2058 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2059 DO i = nxl, nxr 2060 DO j = nys, nyn 2061 ! 2062 !-- Formula does not work if ql(nzb) /= 0.0 2063 s1 = s1 + qsws(j,i) * rmask(j,i,sr) ! w"q" (w"qv") 2064 ENDDO 2065 ENDDO 2066 sums_l(nzb,51,tn) = s1 2067 !$acc end parallel 2068 2069 ENDIF 2070 2071 ENDIF 2072 2073 IF ( passive_scalar ) THEN 2074 2075 !$OMP DO 2076 !$acc parallel present( qsws, rmask, sums_l ) create( s1 ) 2077 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2078 DO i = nxl, nxr 2079 DO j = nys, nyn 2080 s1 = s1 + qsws(j,i) * rmask(j,i,sr) ! w"q" (w"qv") 2081 ENDDO 2082 ENDDO 2083 sums_l(nzb,48,tn) = s1 2084 !$acc end parallel 2085 2086 ENDIF 2087 2088 ENDIF 2089 2090 ! 2091 !-- Subgridscale fluxes at the top surface 2092 IF ( use_top_fluxes ) THEN 2093 2094 !$OMP DO 2095 !$acc parallel present( rmask, sums_l, tswst, uswst, vswst ) create( s1, s2, s3, s4, s5 ) 2096 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4, s5 ) 2097 DO i = nxl, nxr 2098 DO j = nys, nyn 2099 s1 = s1 + uswst(j,i) * rmask(j,i,sr) ! w"u" 2100 s2 = s2 + vswst(j,i) * rmask(j,i,sr) ! w"v" 2101 s3 = s3 + tswst(j,i) * rmask(j,i,sr) ! w"pt" 2102 s4 = s4 + 0.0 * rmask(j,i,sr) ! u"pt" 2103 s5 = s5 + 0.0 * rmask(j,i,sr) ! v"pt" 2104 ENDDO 2105 ENDDO 2106 sums_l(nzt:nzt+1,12,tn) = s1 2107 sums_l(nzt:nzt+1,14,tn) = s2 2108 sums_l(nzt:nzt+1,16,tn) = s3 2109 sums_l(nzt:nzt+1,58,tn) = s4 2110 sums_l(nzt:nzt+1,61,tn) = s5 2111 !$acc end parallel 2112 2113 IF ( ocean ) THEN 2114 2115 !$OMP DO 2116 !$acc parallel present( rmask, saswst, sums_l ) create( s1 ) 2117 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2118 DO i = nxl, nxr 2119 DO j = nys, nyn 2120 s1 = s1 + saswst(j,i) * rmask(j,i,sr) ! w"sa" 2121 ENDDO 2122 ENDDO 2123 sums_l(nzt,65,tn) = s1 2124 !$acc end parallel 2125 2126 ENDIF 2127 2128 IF ( humidity ) THEN 2129 2130 !$OMP DO 2131 !$acc parallel present( pt, q, qswst, rmask, tswst, sums_l ) create( s1, s2 ) 2132 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 ) 2133 DO i = nxl, nxr 2134 DO j = nys, nyn 2135 s1 = s1 + qswst(j,i) * rmask(j,i,sr) ! w"q" (w"qv") 2136 s2 = s2 + ( ( 1.0 + 0.61 * q(nzt,j,i) ) * tswst(j,i) + & 2137 0.61 * pt(nzt,j,i) * qswst(j,i) ) 2138 ENDDO 2139 ENDDO 2140 sums_l(nzt,48,tn) = s1 2141 sums_l(nzt,45,tn) = s2 2142 !$acc end parallel 2143 2144 IF ( cloud_droplets ) THEN 2145 2146 !$OMP DO 2147 !$acc parallel present( pt, q, ql, qswst, rmask, tswst, sums_l ) create( s1 ) 2148 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2149 DO i = nxl, nxr 2150 DO j = nys, nyn 2151 s1 = s1 + ( ( 1.0 + 0.61 * q(nzt,j,i) - ql(nzt,j,i) ) * & 2152 tswst(j,i) + 0.61 * pt(nzt,j,i) * qswst(j,i) ) 2153 ENDDO 2154 ENDDO 2155 sums_l(nzt,45,tn) = s1 2156 !$acc end parallel 2157 2158 ENDIF 2159 2160 IF ( cloud_physics ) THEN 2161 2162 !$OMP DO 2163 !$acc parallel present( qswst, rmask, sums_l ) create( s1 ) 2164 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2165 DO i = nxl, nxr 2166 DO j = nys, nyn 2167 ! 2168 !-- Formula does not work if ql(nzb) /= 0.0 2169 s1 = s1 + qswst(j,i) * rmask(j,i,sr) ! w"q" (w"qv") 2170 ENDDO 2171 ENDDO 2172 sums_l(nzt,51,tn) = s1 2173 !$acc end parallel 2174 2175 ENDIF 2176 2177 ENDIF 2178 2179 IF ( passive_scalar ) THEN 2180 2181 !$OMP DO 2182 !$acc parallel present( qswst, rmask, sums_l ) create( s1 ) 2183 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2184 DO i = nxl, nxr 2185 DO j = nys, nyn 2186 s1 = s1 + qswst(j,i) * rmask(j,i,sr) ! w"q" (w"qv") 2187 ENDDO 2188 ENDDO 2189 sums_l(nzt,48,tn) = s1 2190 !$acc end parallel 2191 2192 ENDIF 2193 2194 ENDIF 2195 2196 ! 2197 !-- Resolved fluxes (can be computed for all horizontal points) 2198 !-- NOTE: for simplicity, nzb_s_inner is used below, although strictly 2199 !-- ---- speaking the following k-loop would have to be split up and 2200 !-- rearranged according to the staggered grid. 2201 !$acc parallel loop gang present( hom, pt, rflags_invers, rmask, sums_l, u, v, w ) create( s1, s2, s3 ) 2202 DO k = nzb, nzt_diff 2203 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3 ) 2204 DO i = nxl, nxr 2205 DO j = nys, nyn 2206 ust = 0.5 * ( u(k,j,i) - hom(k,1,1,sr) + & 2207 u(k+1,j,i) - hom(k+1,1,1,sr) ) 2208 vst = 0.5 * ( v(k,j,i) - hom(k,1,2,sr) + & 2209 v(k+1,j,i) - hom(k+1,1,2,sr) ) 2210 pts = 0.5 * ( pt(k,j,i) - hom(k,1,4,sr) + & 2211 pt(k+1,j,i) - hom(k+1,1,4,sr) ) 2212 ! 2213 !-- Higher moments 2214 s1 = s1 + pts * w(k,j,i)**2 * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2215 s2 = s2 + pts**2 * w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2216 ! 2217 !-- Energy flux w*e* (has to be adjusted?) 2218 s3 = s3 + w(k,j,i) * 0.5 * ( ust**2 + vst**2 + w(k,j,i)**2 ) & 2219 * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2220 ENDDO 2221 ENDDO 2222 sums_l(k,35,tn) = s1 2223 sums_l(k,36,tn) = s2 2224 sums_l(k,37,tn) = s3 2225 ENDDO 2226 !$acc end parallel 2227 2228 ! 2229 !-- Salinity flux and density (density does not belong to here, 2230 !-- but so far there is no other suitable place to calculate) 2231 IF ( ocean ) THEN 2232 2233 IF( .NOT. ws_scheme_sca .OR. sr /= 0 ) THEN 2234 2235 !$acc parallel loop gang present( hom, rflags_invers, rmask, sa, sums_l, w ) create( s1 ) 2236 DO k = nzb, nzt_diff 2237 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2238 DO i = nxl, nxr 2239 DO j = nys, nyn 2240 s1 = s1 + 0.5 * ( sa(k,j,i) - hom(k,1,23,sr) + & 2241 sa(k+1,j,i) - hom(k+1,1,23,sr) ) & 2242 * w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2243 ENDDO 2244 ENDDO 2245 sums_l(k,66,tn) = s1 2246 ENDDO 2247 !$acc end parallel 2248 2249 ENDIF 2250 2251 !$acc parallel loop gang present( rflags_invers, rho, prho, rmask, sums_l ) create( s1, s2 ) 2252 DO k = nzb, nzt_diff 2253 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 ) 2254 DO i = nxl, nxr 2255 DO j = nys, nyn 2256 s1 = s1 + rho(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2257 s2 = s2 + prho(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2258 ENDDO 2259 ENDDO 2260 sums_l(k,64,tn) = s1 2261 sums_l(k,71,tn) = s2 2262 ENDDO 2263 !$acc end parallel 2264 2265 ENDIF 2266 2267 ! 2268 !-- Buoyancy flux, water flux, humidity flux, liquid water 2269 !-- content, rain drop concentration and rain water content 2270 IF ( humidity ) THEN 2271 2272 IF ( cloud_physics .OR. cloud_droplets ) THEN 2273 2274 !$acc parallel loop gang present( hom, rflags_invers, rmask, sums_l, vpt, w ) create( s1 ) 2275 DO k = nzb, nzt_diff 2276 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2277 DO i = nxl, nxr 2278 DO j = nys, nyn 2279 s1 = s1 + 0.5 * ( vpt(k,j,i) - hom(k,1,44,sr) + & 2280 vpt(k+1,j,i) - hom(k+1,1,44,sr) ) * & 2281 w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2282 ENDDO 2283 ENDDO 2284 sums_l(k,46,tn) = s1 2285 ENDDO 2286 !$acc end parallel 2287 2288 IF ( .NOT. cloud_droplets ) THEN 2289 2290 !$acc parallel loop gang present( hom, q, ql, rflags_invers, rmask, sums_l, w ) create( s1 ) 2291 DO k = nzb, nzt_diff 2292 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2293 DO i = nxl, nxr 2294 DO j = nys, nyn 2295 s1 = s1 + 0.5 * ( ( q(k,j,i) - ql(k,j,i) ) - hom(k,1,42,sr) + & 2296 ( q(k+1,j,i) - ql(k+1,j,i) ) - hom(k+1,1,42,sr) ) & 2297 * w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2298 ENDDO 2299 ENDDO 2300 sums_l(k,52,tn) = s1 2301 ENDDO 2302 !$acc end parallel 2303 2304 IF ( icloud_scheme == 0 ) THEN 2305 2306 !$acc parallel loop gang present( qc, ql, rflags_invers, rmask, sums_l ) create( s1, s2 ) 2307 DO k = nzb, nzt_diff 2308 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 ) 2309 DO i = nxl, nxr 2310 DO j = nys, nyn 2311 s1 = s1 + ql(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2312 s2 = s2 + qc(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2313 ENDDO 2314 ENDDO 2315 sums_l(k,54,tn) = s1 2316 sums_l(k,75,tn) = s2 2317 ENDDO 2318 !$acc end parallel 2319 2320 IF ( precipitation ) THEN 2321 2322 !$acc parallel loop gang present( nr, qr, prr, rflags_invers, rmask, sums_l ) create( s1, s2, s3 ) 2323 DO k = nzb, nzt_diff 2324 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3 ) 2325 DO i = nxl, nxr 2326 DO j = nys, nyn 2327 s1 = s1 + nr(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2328 s2 = s2 + qr(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2329 s3 = s3 + prr(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2330 ENDDO 2331 ENDDO 2332 sums_l(k,73,tn) = s1 2333 sums_l(k,74,tn) = s2 2334 sums_l(k,76,tn) = s3 2335 ENDDO 2336 !$acc end parallel 2337 2338 ENDIF 2339 2340 ELSE 2341 2342 !$acc parallel loop gang present( ql, rflags_invers, rmask, sums_l ) create( s1 ) 2343 DO k = nzb, nzt_diff 2344 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2345 DO i = nxl, nxr 2346 DO j = nys, nyn 2347 s1 = s1 + ql(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2348 ENDDO 2349 ENDDO 2350 sums_l(k,54,tn) = s1 2351 ENDDO 2352 !$acc end parallel 2353 2354 ENDIF 2355 2356 ELSE 2357 2358 !$acc parallel loop gang present( ql, rflags_invers, rmask, sums_l ) create( s1 ) 2359 DO k = nzb, nzt_diff 2360 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2361 DO i = nxl, nxr 2362 DO j = nys, nyn 2363 s1 = s1 + ql(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2364 ENDDO 2365 ENDDO 2366 sums_l(k,54,tn) = s1 2367 ENDDO 2368 !$acc end parallel 2369 2370 ENDIF 2371 2372 ELSE 2373 2374 IF( .NOT. ws_scheme_sca .OR. sr /= 0 ) THEN 2375 2376 !$acc parallel loop gang present( hom, rflags_invers, rmask, sums_l, vpt, w ) create( s1 ) 2377 DO k = nzb, nzt_diff 2378 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2379 DO i = nxl, nxr 2380 DO j = nys, nyn 2381 s1 = s1 + 0.5 * ( vpt(k,j,i) - hom(k,1,44,sr) + & 2382 vpt(k+1,j,i) - hom(k+1,1,44,sr) ) & 2383 * w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2384 ENDDO 2385 ENDDO 2386 sums_l(k,46,tn) = s1 2387 ENDDO 2388 !$acc end parallel 2389 2390 ELSEIF ( ws_scheme_sca .AND. sr == 0 ) THEN 2391 2392 !$acc parallel loop present( hom, sums_l ) 2393 DO k = nzb, nzt_diff 2394 sums_l(k,46,tn) = ( 1.0 + 0.61 * hom(k,1,41,sr) ) * sums_l(k,17,tn) + & 2395 0.61 * hom(k,1,4,sr) * sums_l(k,49,tn) 2396 ENDDO 2397 !$acc end parallel 2398 2399 ENDIF 2400 2401 ENDIF 2402 2403 ENDIF 2404 ! 2405 !-- Passive scalar flux 2406 IF ( passive_scalar .AND. ( .NOT. ws_scheme_sca .OR. sr /= 0 ) ) THEN 2407 2408 !$acc parallel loop gang present( hom, q, rflags_invers, rmask, sums_l, w ) create( s1 ) 2409 DO k = nzb, nzt_diff 2410 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2411 DO i = nxl, nxr 2412 DO j = nys, nyn 2413 s1 = s1 + 0.5 * ( q(k,j,i) - hom(k,1,41,sr) + & 2414 q(k+1,j,i) - hom(k+1,1,41,sr) ) & 2415 * w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2416 ENDDO 2417 ENDDO 2418 sums_l(k,49,tn) = s1 2419 ENDDO 2420 !$acc end parallel 2421 2422 ENDIF 2423 2424 ! 2425 !-- For speed optimization fluxes which have been computed in part directly 2426 !-- inside the WS advection routines are treated seperatly 2427 !-- Momentum fluxes first: 2428 IF ( .NOT. ws_scheme_mom .OR. sr /= 0 ) THEN 2429 2430 !$OMP DO 2431 !$acc parallel loop gang present( hom, rflags_invers, rmask, sums_l, u, v, w ) create( s1, s2 ) 2432 DO k = nzb, nzt_diff 2433 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 ) 2434 DO i = nxl, nxr 2435 DO j = nys, nyn 2436 ust = 0.5 * ( u(k,j,i) - hom(k,1,1,sr) + & 2437 u(k+1,j,i) - hom(k+1,1,1,sr) ) 2438 vst = 0.5 * ( v(k,j,i) - hom(k,1,2,sr) + & 2439 v(k+1,j,i) - hom(k+1,1,2,sr) ) 2440 ! 2441 !-- Momentum flux w*u* 2442 s1 = s1 + 0.5 * ( w(k,j,i-1) + w(k,j,i) ) & 2443 * ust * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2444 ! 2445 !-- Momentum flux w*v* 2446 s2 = s2 + 0.5 * ( w(k,j-1,i) + w(k,j,i) ) & 2447 * vst * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2448 ENDDO 2449 ENDDO 2450 sums_l(k,13,tn) = s1 2451 sums_l(k,15,tn) = s1 2452 ENDDO 2453 !$acc end parallel 2454 2455 ENDIF 2456 2457 IF ( .NOT. ws_scheme_sca .OR. sr /= 0 ) THEN 2458 2459 !$OMP DO 2460 !$acc parallel loop gang present( hom, pt, rflags_invers, rmask, sums_l, w ) create( s1 ) 2461 DO k = nzb, nzt_diff 2462 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2463 DO i = nxl, nxr 2464 DO j = nys, nyn 2465 ! 2466 !-- Vertical heat flux 2467 s1 = s1 + 0.5 * ( pt(k,j,i) - hom(k,1,4,sr) + & 2468 pt(k+1,j,i) - hom(k+1,1,4,sr) ) & 2469 * w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2470 ENDDO 2471 ENDDO 2472 sums_l(k,17,tn) = s1 2473 ENDDO 2474 !$acc end parallel 2475 2476 IF ( humidity ) THEN 2477 2478 !$acc parallel loop gang present( hom, q, rflags_invers, rmask, sums_l, w ) create( s1 ) 2479 DO k = nzb, nzt_diff 2480 !$acc loop vector collapse( 2 ) reduction( +: s1 ) 2481 DO i = nxl, nxr 2482 DO j = nys, nyn 2483 s1 = s1 + 0.5 * ( q(k,j,i) - hom(k,1,41,sr) + & 2484 q(k+1,j,i) - hom(k+1,1,41,sr) ) & 2485 * w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1) 2486 ENDDO 2487 ENDDO 2488 sums_l(k,49,tn) = s1 2489 ENDDO 2490 !$acc end parallel 2491 2492 ENDIF 2493 2494 ENDIF 2495 2496 2497 ! 2498 !-- Density at top follows Neumann condition 2499 IF ( ocean ) THEN 2500 !$acc parallel present( sums_l ) 2501 sums_l(nzt+1,64,tn) = sums_l(nzt,64,tn) 2502 sums_l(nzt+1,71,tn) = sums_l(nzt,71,tn) 2503 !$acc end parallel 2504 ENDIF 2505 2506 ! 2507 !-- Divergence of vertical flux of resolved scale energy and pressure 2508 !-- fluctuations as well as flux of pressure fluctuation itself (68). 2509 !-- First calculate the products, then the divergence. 2510 !-- Calculation is time consuming. Do it only, if profiles shall be plotted. 2511 IF ( hom(nzb+1,2,55,0) /= 0.0 .OR. hom(nzb+1,2,68,0) /= 0.0 ) THEN 2512 2513 STOP '+++ openACC porting for vertical flux div of resolved scale TKE in flow_statistics is still missing' 2514 sums_ll = 0.0 ! local array 2515 2516 !$OMP DO 2517 DO i = nxl, nxr 2518 DO j = nys, nyn 2519 DO k = nzb_s_inner(j,i)+1, nzt 2520 2521 sums_ll(k,1) = sums_ll(k,1) + 0.5 * w(k,j,i) * ( & 2522 ( 0.25 * ( u(k,j,i)+u(k+1,j,i)+u(k,j,i+1)+u(k+1,j,i+1) & 2523 - 0.5 * ( hom(k,1,1,sr) + hom(k+1,1,1,sr) ) & 2524 ) )**2 & 2525 + ( 0.25 * ( v(k,j,i)+v(k+1,j,i)+v(k,j+1,i)+v(k+1,j+1,i) & 2526 - 0.5 * ( hom(k,1,2,sr) + hom(k+1,1,2,sr) ) & 2527 ) )**2 & 2528 + w(k,j,i)**2 ) 2529 2530 sums_ll(k,2) = sums_ll(k,2) + 0.5 * w(k,j,i) & 2531 * ( p(k,j,i) + p(k+1,j,i) ) 2532 2533 ENDDO 2534 ENDDO 2535 ENDDO 2536 sums_ll(0,1) = 0.0 ! because w is zero at the bottom 2537 sums_ll(nzt+1,1) = 0.0 2538 sums_ll(0,2) = 0.0 2539 sums_ll(nzt+1,2) = 0.0 2540 2541 DO k = nzb+1, nzt 2542 sums_l(k,55,tn) = ( sums_ll(k,1) - sums_ll(k-1,1) ) * ddzw(k) 2543 sums_l(k,56,tn) = ( sums_ll(k,2) - sums_ll(k-1,2) ) * ddzw(k) 2544 sums_l(k,68,tn) = sums_ll(k,2) 2545 ENDDO 2546 sums_l(nzb,55,tn) = sums_l(nzb+1,55,tn) 2547 sums_l(nzb,56,tn) = sums_l(nzb+1,56,tn) 2548 sums_l(nzb,68,tn) = 0.0 ! because w* = 0 at nzb 2549 2550 ENDIF 2551 2552 ! 2553 !-- Divergence of vertical flux of SGS TKE and the flux itself (69) 2554 IF ( hom(nzb+1,2,57,0) /= 0.0 .OR. hom(nzb+1,2,69,0) /= 0.0 ) THEN 2555 2556 STOP '+++ openACC porting for vertical flux div of SGS TKE in flow_statistics is still missing' 2557 !$OMP DO 2558 DO i = nxl, nxr 2559 DO j = nys, nyn 2560 DO k = nzb_s_inner(j,i)+1, nzt 2561 2562 sums_l(k,57,tn) = sums_l(k,57,tn) - 0.5 * ( & 2563 (km(k,j,i)+km(k+1,j,i)) * (e(k+1,j,i)-e(k,j,i)) * ddzu(k+1) & 2564 - (km(k-1,j,i)+km(k,j,i)) * (e(k,j,i)-e(k-1,j,i)) * ddzu(k) & 2565 ) * ddzw(k) 2566 2567 sums_l(k,69,tn) = sums_l(k,69,tn) - 0.5 * ( & 2568 (km(k,j,i)+km(k+1,j,i)) * (e(k+1,j,i)-e(k,j,i)) * ddzu(k+1) & 2569 ) 2570 2571 ENDDO 2572 ENDDO 2573 ENDDO 2574 sums_l(nzb,57,tn) = sums_l(nzb+1,57,tn) 2575 sums_l(nzb,69,tn) = sums_l(nzb+1,69,tn) 2576 2577 ENDIF 2578 2579 ! 2580 !-- Horizontal heat fluxes (subgrid, resolved, total). 2581 !-- Do it only, if profiles shall be plotted. 2582 IF ( hom(nzb+1,2,58,0) /= 0.0 ) THEN 2583 2584 STOP '+++ openACC porting for horizontal flux calculation in flow_statistics is still missing' 2585 !$OMP DO 2586 DO i = nxl, nxr 2587 DO j = nys, nyn 2588 DO k = nzb_s_inner(j,i)+1, nzt 2589 ! 2590 !-- Subgrid horizontal heat fluxes u"pt", v"pt" 2591 sums_l(k,58,tn) = sums_l(k,58,tn) - 0.5 * & 2592 ( kh(k,j,i) + kh(k,j,i-1) ) & 2593 * ( pt(k,j,i-1) - pt(k,j,i) ) & 2594 * ddx * rmask(j,i,sr) 2595 sums_l(k,61,tn) = sums_l(k,61,tn) - 0.5 * & 2596 ( kh(k,j,i) + kh(k,j-1,i) ) & 2597 * ( pt(k,j-1,i) - pt(k,j,i) ) & 2598 * ddy * rmask(j,i,sr) 2599 ! 2600 !-- Resolved horizontal heat fluxes u*pt*, v*pt* 2601 sums_l(k,59,tn) = sums_l(k,59,tn) + & 2602 ( u(k,j,i) - hom(k,1,1,sr) ) & 2603 * 0.5 * ( pt(k,j,i-1) - hom(k,1,4,sr) + & 2604 pt(k,j,i) - hom(k,1,4,sr) ) 2605 pts = 0.5 * ( pt(k,j-1,i) - hom(k,1,4,sr) + & 2606 pt(k,j,i) - hom(k,1,4,sr) ) 2607 sums_l(k,62,tn) = sums_l(k,62,tn) + & 2608 ( v(k,j,i) - hom(k,1,2,sr) ) & 2609 * 0.5 * ( pt(k,j-1,i) - hom(k,1,4,sr) + & 2610 pt(k,j,i) - hom(k,1,4,sr) ) 2611 ENDDO 2612 ENDDO 2613 ENDDO 2614 ! 2615 !-- Fluxes at the surface must be zero (e.g. due to the Prandtl-layer) 2616 sums_l(nzb,58,tn) = 0.0 2617 sums_l(nzb,59,tn) = 0.0 2618 sums_l(nzb,60,tn) = 0.0 2619 sums_l(nzb,61,tn) = 0.0 2620 sums_l(nzb,62,tn) = 0.0 2621 sums_l(nzb,63,tn) = 0.0 2622 2623 ENDIF 2624 2625 ! 2626 !-- Calculate the user-defined profiles 2627 CALL user_statistics( 'profiles', sr, tn ) 2628 !$OMP END PARALLEL 2629 2630 ! 2631 !-- Summation of thread sums 2632 IF ( threads_per_task > 1 ) THEN 2633 STOP '+++ openACC porting for threads_per_task > 1 in flow_statistics is still missing' 2634 DO i = 1, threads_per_task-1 2635 sums_l(:,3,0) = sums_l(:,3,0) + sums_l(:,3,i) 2636 sums_l(:,4:40,0) = sums_l(:,4:40,0) + sums_l(:,4:40,i) 2637 sums_l(:,45:pr_palm,0) = sums_l(:,45:pr_palm,0) + & 2638 sums_l(:,45:pr_palm,i) 2639 IF ( max_pr_user > 0 ) THEN 2640 sums_l(:,pr_palm+1:pr_palm+max_pr_user,0) = & 2641 sums_l(:,pr_palm+1:pr_palm+max_pr_user,0) + & 2642 sums_l(:,pr_palm+1:pr_palm+max_pr_user,i) 2643 ENDIF 2644 ENDDO 2645 ENDIF 2646 2647 !$acc update host( hom, sums, sums_l ) 2648 2649 #if defined( __parallel ) 2650 2651 ! 2652 !-- Compute total sum from local sums 2653 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 2654 CALL MPI_ALLREDUCE( sums_l(nzb,1,0), sums(nzb,1), ngp_sums, MPI_REAL, & 2655 MPI_SUM, comm2d, ierr ) 2656 #else 2657 sums = sums_l(:,:,0) 2658 #endif 2659 2660 ! 2661 !-- Final values are obtained by division by the total number of grid points 2662 !-- used for summation. After that store profiles. 2663 !-- Profiles: 2664 DO k = nzb, nzt+1 2665 sums(k,3) = sums(k,3) / ngp_2dh(sr) 2666 sums(k,8:11) = sums(k,8:11) / ngp_2dh_s_inner(k,sr) 2667 sums(k,12:22) = sums(k,12:22) / ngp_2dh(sr) 2668 sums(k,23:29) = sums(k,23:29) / ngp_2dh_s_inner(k,sr) 2669 sums(k,30:32) = sums(k,30:32) / ngp_2dh(sr) 2670 sums(k,33:34) = sums(k,33:34) / ngp_2dh_s_inner(k,sr) 2671 sums(k,35:39) = sums(k,35:39) / ngp_2dh(sr) 2672 sums(k,40) = sums(k,40) / ngp_2dh_s_inner(k,sr) 2673 sums(k,45:53) = sums(k,45:53) / ngp_2dh(sr) 2674 sums(k,54) = sums(k,54) / ngp_2dh_s_inner(k,sr) 2675 sums(k,55:63) = sums(k,55:63) / ngp_2dh(sr) 2676 sums(k,64) = sums(k,64) / ngp_2dh_s_inner(k,sr) 2677 sums(k,65:69) = sums(k,65:69) / ngp_2dh(sr) 2678 sums(k,70:pr_palm-2) = sums(k,70:pr_palm-2)/ ngp_2dh_s_inner(k,sr) 2679 ENDDO 2680 2681 !-- Upstream-parts 2682 sums(nzb:nzb+11,pr_palm-1) = sums(nzb:nzb+11,pr_palm-1) / ngp_3d(sr) 2683 !-- u* and so on 2684 !-- As sums(nzb:nzb+3,pr_palm) are full 2D arrays (us, usws, vsws, ts) whose 2685 !-- size is always ( nx + 1 ) * ( ny + 1 ), defined at the first grid layer 2686 !-- above the topography, they are being divided by ngp_2dh(sr) 2687 sums(nzb:nzb+3,pr_palm) = sums(nzb:nzb+3,pr_palm) / & 2688 ngp_2dh(sr) 2689 sums(nzb+12,pr_palm) = sums(nzb+12,pr_palm) / & ! qs 2690 ngp_2dh(sr) 2691 !-- eges, e* 2692 sums(nzb+4:nzb+5,pr_palm) = sums(nzb+4:nzb+5,pr_palm) / & 2693 ngp_3d(sr) 2694 !-- Old and new divergence 2695 sums(nzb+9:nzb+10,pr_palm) = sums(nzb+9:nzb+10,pr_palm) / & 2696 ngp_3d_inner(sr) 2697 2698 !-- User-defined profiles 2699 IF ( max_pr_user > 0 ) THEN 2700 DO k = nzb, nzt+1 2701 sums(k,pr_palm+1:pr_palm+max_pr_user) = & 2702 sums(k,pr_palm+1:pr_palm+max_pr_user) / & 2703 ngp_2dh_s_inner(k,sr) 2704 ENDDO 2705 ENDIF 2706 2707 ! 2708 !-- Collect horizontal average in hom. 2709 !-- Compute deduced averages (e.g. total heat flux) 2710 hom(:,1,3,sr) = sums(:,3) ! w 2711 hom(:,1,8,sr) = sums(:,8) ! e profiles 5-7 are initial profiles 2712 hom(:,1,9,sr) = sums(:,9) ! km 2713 hom(:,1,10,sr) = sums(:,10) ! kh 2714 hom(:,1,11,sr) = sums(:,11) ! l 2715 hom(:,1,12,sr) = sums(:,12) ! w"u" 2716 hom(:,1,13,sr) = sums(:,13) ! w*u* 2717 hom(:,1,14,sr) = sums(:,14) ! w"v" 2718 hom(:,1,15,sr) = sums(:,15) ! w*v* 2719 hom(:,1,16,sr) = sums(:,16) ! w"pt" 2720 hom(:,1,17,sr) = sums(:,17) ! w*pt* 2721 hom(:,1,18,sr) = sums(:,16) + sums(:,17) ! wpt 2722 hom(:,1,19,sr) = sums(:,12) + sums(:,13) ! wu 2723 hom(:,1,20,sr) = sums(:,14) + sums(:,15) ! wv 2724 hom(:,1,21,sr) = sums(:,21) ! w*pt*BC 2725 hom(:,1,22,sr) = sums(:,16) + sums(:,21) ! wptBC 2726 ! profile 24 is initial profile (sa) 2727 ! profiles 25-29 left empty for initial 2728 ! profiles 2729 hom(:,1,30,sr) = sums(:,30) ! u*2 2730 hom(:,1,31,sr) = sums(:,31) ! v*2 2731 hom(:,1,32,sr) = sums(:,32) ! w*2 2732 hom(:,1,33,sr) = sums(:,33) ! pt*2 2733 hom(:,1,34,sr) = sums(:,34) ! e* 2734 hom(:,1,35,sr) = sums(:,35) ! w*2pt* 2735 hom(:,1,36,sr) = sums(:,36) ! w*pt*2 2736 hom(:,1,37,sr) = sums(:,37) ! w*e* 2737 hom(:,1,38,sr) = sums(:,38) ! w*3 2738 hom(:,1,39,sr) = sums(:,38) / ( abs( sums(:,32) ) + 1E-20 )**1.5 ! Sw 2739 hom(:,1,40,sr) = sums(:,40) ! p 2740 hom(:,1,45,sr) = sums(:,45) ! w"vpt" 2741 hom(:,1,46,sr) = sums(:,46) ! w*vpt* 2742 hom(:,1,47,sr) = sums(:,45) + sums(:,46) ! wvpt 2743 hom(:,1,48,sr) = sums(:,48) ! w"q" (w"qv") 2744 hom(:,1,49,sr) = sums(:,49) ! w*q* (w*qv*) 2745 hom(:,1,50,sr) = sums(:,48) + sums(:,49) ! wq (wqv) 2746 hom(:,1,51,sr) = sums(:,51) ! w"qv" 2747 hom(:,1,52,sr) = sums(:,52) ! w*qv* 2748 hom(:,1,53,sr) = sums(:,52) + sums(:,51) ! wq (wqv) 2749 hom(:,1,54,sr) = sums(:,54) ! ql 2750 hom(:,1,55,sr) = sums(:,55) ! w*u*u*/dz 2751 hom(:,1,56,sr) = sums(:,56) ! w*p*/dz 2752 hom(:,1,57,sr) = sums(:,57) ! ( w"e + w"p"/rho )/dz 2753 hom(:,1,58,sr) = sums(:,58) ! u"pt" 2754 hom(:,1,59,sr) = sums(:,59) ! u*pt* 2755 hom(:,1,60,sr) = sums(:,58) + sums(:,59) ! upt_t 2756 hom(:,1,61,sr) = sums(:,61) ! v"pt" 2757 hom(:,1,62,sr) = sums(:,62) ! v*pt* 2758 hom(:,1,63,sr) = sums(:,61) + sums(:,62) ! vpt_t 2759 hom(:,1,64,sr) = sums(:,64) ! rho 2760 hom(:,1,65,sr) = sums(:,65) ! w"sa" 2761 hom(:,1,66,sr) = sums(:,66) ! w*sa* 2762 hom(:,1,67,sr) = sums(:,65) + sums(:,66) ! wsa 2763 hom(:,1,68,sr) = sums(:,68) ! w*p* 2764 hom(:,1,69,sr) = sums(:,69) ! w"e + w"p"/rho 2765 hom(:,1,70,sr) = sums(:,70) ! q*2 2766 hom(:,1,71,sr) = sums(:,71) ! prho 2767 hom(:,1,72,sr) = hyp * 1E-4 ! hyp in dbar 2768 hom(:,1,73,sr) = sums(:,73) ! nr 2769 hom(:,1,74,sr) = sums(:,74) ! qr 2770 hom(:,1,75,sr) = sums(:,75) ! qc 2771 hom(:,1,76,sr) = sums(:,76) ! prr (precipitation rate) 2772 ! 77 is initial density profile 2773 2774 hom(:,1,pr_palm-1,sr) = sums(:,pr_palm-1) 2775 ! upstream-parts u_x, u_y, u_z, v_x, 2776 ! v_y, usw. (in last but one profile) 2777 hom(:,1,pr_palm,sr) = sums(:,pr_palm) 2778 ! u*, w'u', w'v', t* (in last profile) 2779 2780 IF ( max_pr_user > 0 ) THEN ! user-defined profiles 2781 hom(:,1,pr_palm+1:pr_palm+max_pr_user,sr) = & 2782 sums(:,pr_palm+1:pr_palm+max_pr_user) 2783 ENDIF 2784 2785 ! 2786 !-- Determine the boundary layer height using two different schemes. 2787 !-- First scheme: Starting from the Earth's (Ocean's) surface, look for the 2788 !-- first relative minimum (maximum) of the total heat flux. 2789 !-- The corresponding height is assumed as the boundary layer height, if it 2790 !-- is less than 1.5 times the height where the heat flux becomes negative 2791 !-- (positive) for the first time. 2792 z_i(1) = 0.0 2793 first = .TRUE. 2794 2795 IF ( ocean ) THEN 2796 DO k = nzt, nzb+1, -1 2797 IF ( first .AND. hom(k,1,18,sr) < 0.0 & 2798 .AND. abs(hom(k,1,18,sr)) > 1.0E-8) THEN 2799 first = .FALSE. 2800 height = zw(k) 2801 ENDIF 2802 IF ( hom(k,1,18,sr) < 0.0 .AND. & 2803 abs(hom(k,1,18,sr)) > 1.0E-8 .AND. & 2804 hom(k-1,1,18,sr) > hom(k,1,18,sr) ) THEN 2805 IF ( zw(k) < 1.5 * height ) THEN 2806 z_i(1) = zw(k) 2807 ELSE 2808 z_i(1) = height 2809 ENDIF 2810 EXIT 2811 ENDIF 2812 ENDDO 2813 ELSE 2814 DO k = nzb, nzt-1 2815 IF ( first .AND. hom(k,1,18,sr) < 0.0 & 2816 .AND. abs(hom(k,1,18,sr)) > 1.0E-8 ) THEN 2817 first = .FALSE. 2818 height = zw(k) 2819 ENDIF 2820 IF ( hom(k,1,18,sr) < 0.0 .AND. & 2821 abs(hom(k,1,18,sr)) > 1.0E-8 .AND. & 2822 hom(k+1,1,18,sr) > hom(k,1,18,sr) ) THEN 2823 IF ( zw(k) < 1.5 * height ) THEN 2824 z_i(1) = zw(k) 2825 ELSE 2826 z_i(1) = height 2827 ENDIF 2828 EXIT 2829 ENDIF 2830 ENDDO 2831 ENDIF 2832 2833 ! 2834 !-- Second scheme: Gradient scheme from Sullivan et al. (1998), modified 2835 !-- by Uhlenbrock(2006). The boundary layer height is the height with the 2836 !-- maximal local temperature gradient: starting from the second (the last 2837 !-- but one) vertical gridpoint, the local gradient must be at least 2838 !-- 0.2K/100m and greater than the next four gradients. 2839 !-- WARNING: The threshold value of 0.2K/100m must be adjusted for the 2840 !-- ocean case! 2841 z_i(2) = 0.0 2842 DO k = nzb+1, nzt+1 2843 dptdz(k) = ( hom(k,1,4,sr) - hom(k-1,1,4,sr) ) * ddzu(k) 2844 ENDDO 2845 dptdz_threshold = 0.2 / 100.0 2846 2847 IF ( ocean ) THEN 2848 DO k = nzt+1, nzb+5, -1 2849 IF ( dptdz(k) > dptdz_threshold .AND. & 2850 dptdz(k) > dptdz(k-1) .AND. dptdz(k) > dptdz(k-2) .AND. & 2851 dptdz(k) > dptdz(k-3) .AND. dptdz(k) > dptdz(k-4) ) THEN 2852 z_i(2) = zw(k-1) 2853 EXIT 2854 ENDIF 2855 ENDDO 2856 ELSE 2857 DO k = nzb+1, nzt-3 2858 IF ( dptdz(k) > dptdz_threshold .AND. & 2859 dptdz(k) > dptdz(k+1) .AND. dptdz(k) > dptdz(k+2) .AND. & 2860 dptdz(k) > dptdz(k+3) .AND. dptdz(k) > dptdz(k+4) ) THEN 2861 z_i(2) = zw(k-1) 2862 EXIT 2863 ENDIF 2864 ENDDO 2865 ENDIF 2866 2867 hom(nzb+6,1,pr_palm,sr) = z_i(1) 2868 hom(nzb+7,1,pr_palm,sr) = z_i(2) 2869 2870 ! 2871 !-- Computation of both the characteristic vertical velocity and 2872 !-- the characteristic convective boundary layer temperature. 2873 !-- The horizontal average at nzb+1 is input for the average temperature. 2874 IF ( hom(nzb,1,18,sr) > 0.0 .AND. abs(hom(nzb,1,18,sr)) > 1.0E-8 & 2875 .AND. z_i(1) /= 0.0 ) THEN 2876 hom(nzb+8,1,pr_palm,sr) = ( g / hom(nzb+1,1,4,sr) * & 2877 hom(nzb,1,18,sr) * & 2878 ABS( z_i(1) ) )**0.333333333 2879 !-- so far this only works if Prandtl layer is used 2880 hom(nzb+11,1,pr_palm,sr) = hom(nzb,1,16,sr) / hom(nzb+8,1,pr_palm,sr) 2881 ELSE 2882 hom(nzb+8,1,pr_palm,sr) = 0.0 2883 hom(nzb+11,1,pr_palm,sr) = 0.0 2884 ENDIF 2885 2886 ! 2887 !-- Collect the time series quantities 2888 ts_value(1,sr) = hom(nzb+4,1,pr_palm,sr) ! E 2889 ts_value(2,sr) = hom(nzb+5,1,pr_palm,sr) ! E* 2890 ts_value(3,sr) = dt_3d 2891 ts_value(4,sr) = hom(nzb,1,pr_palm,sr) ! u* 2892 ts_value(5,sr) = hom(nzb+3,1,pr_palm,sr) ! th* 2893 ts_value(6,sr) = u_max 2894 ts_value(7,sr) = v_max 2895 ts_value(8,sr) = w_max 2896 ts_value(9,sr) = hom(nzb+10,1,pr_palm,sr) ! new divergence 2897 ts_value(10,sr) = hom(nzb+9,1,pr_palm,sr) ! old Divergence 2898 ts_value(11,sr) = hom(nzb+6,1,pr_palm,sr) ! z_i(1) 2899 ts_value(12,sr) = hom(nzb+7,1,pr_palm,sr) ! z_i(2) 2900 ts_value(13,sr) = hom(nzb+8,1,pr_palm,sr) ! w* 2901 ts_value(14,sr) = hom(nzb,1,16,sr) ! w'pt' at k=0 2902 ts_value(15,sr) = hom(nzb+1,1,16,sr) ! w'pt' at k=1 2903 ts_value(16,sr) = hom(nzb+1,1,18,sr) ! wpt at k=1 2904 ts_value(17,sr) = hom(nzb,1,4,sr) ! pt(0) 2905 ts_value(18,sr) = hom(nzb+1,1,4,sr) ! pt(zp) 2906 ts_value(19,sr) = hom(nzb+1,1,pr_palm,sr) ! u'w' at k=0 2907 ts_value(20,sr) = hom(nzb+2,1,pr_palm,sr) ! v'w' at k=0 2908 ts_value(21,sr) = hom(nzb,1,48,sr) ! w"q" at k=0 2909 2910 IF ( ts_value(5,sr) /= 0.0 ) THEN 2911 ts_value(22,sr) = ts_value(4,sr)**2 / & 2912 ( kappa * g * ts_value(5,sr) / ts_value(18,sr) ) ! L 2913 ELSE 2914 ts_value(22,sr) = 10000.0 2915 ENDIF 2916 2917 ts_value(23,sr) = hom(nzb+12,1,pr_palm,sr) ! q* 2918 ! 2919 !-- Calculate additional statistics provided by the user interface 2920 CALL user_statistics( 'time_series', sr, 0 ) 2921 2922 ENDDO ! loop of the subregions 2923 2924 !$acc end data 2925 2926 ! 2927 !-- If required, sum up horizontal averages for subsequent time averaging 2928 IF ( do_sum ) THEN 2929 IF ( average_count_pr == 0 ) hom_sum = 0.0 2930 hom_sum = hom_sum + hom(:,1,:,:) 2931 average_count_pr = average_count_pr + 1 2932 do_sum = .FALSE. 2933 ENDIF 2934 2935 ! 2936 !-- Set flag for other UPs (e.g. output routines, but also buoyancy). 2937 !-- This flag is reset after each time step in time_integration. 2938 flow_statistics_called = .TRUE. 2939 2940 CALL cpu_log( log_point(10), 'flow_statistics', 'stop' ) 2941 2942 2943 END SUBROUTINE flow_statistics 2944 #endif -
palm/trunk/SOURCE/init_3d_model.f90
r1213 r1221 23 23 ! Current revisions: 24 24 ! ------------------ 25 ! 25 ! +rflags_s_inner in copyin statement, use copyin for most arrays instead of 26 ! copy 26 27 ! 27 28 ! Former revisions: … … 1499 1500 CALL disturb_field( nzb_v_inner, tend, v ) 1500 1501 n_sor = nsor_ini 1501 !$acc data copy( d, ddzu, ddzw, nzb_s_inner, nzb_u_inner, nzb_v_inner, nzb_w_inner, p, tri, tric, u, v, w, weight_pres, weight_substep, tend ) 1502 !$acc data copyin( d, ddzu, ddzw, nzb_s_inner, nzb_u_inner ) & 1503 !$acc copyin( nzb_v_inner, nzb_w_inner, p, rflags_s_inner, tend ) & 1504 !$acc copyin( weight_pres, weight_substep ) & 1505 !$acc copy( tri, tric, u, v, w ) 1502 1506 CALL pres 1503 1507 !$acc end data -
palm/trunk/SOURCE/init_grid.f90
r1093 r1221 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! wall_flags_00 introduced to hold bits 32-63, 23 ! additional 3D-flag arrays for replacing the 2D-index array nzb_s_inner in 24 ! loops optimized for openACC (pres + flow_statistics) 23 25 ! 24 26 ! Former revisions: … … 387 389 nzb_diff_v(nysg:nyng,nxlg:nxrg), & 388 390 nzb_2d(nysg:nyng,nxlg:nxrg), & 391 rflags_s_inner(nzb:nzt+2,nysg:nyng,nxlg:nxrg), & 392 rflags_invers(nysg:nyng,nxlg:nxrg,nzb:nzt+2), & 389 393 wall_e_x(nysg:nyng,nxlg:nxrg), & 390 394 wall_e_y(nysg:nyng,nxlg:nxrg), & … … 403 407 nzb_v_inner = nzb; nzb_v_outer = nzb 404 408 nzb_w_inner = nzb; nzb_w_outer = nzb 409 410 rflags_s_inner = 1.0 411 rflags_invers = 1.0 405 412 406 413 ! … … 854 861 855 862 ENDIF 863 ! 864 !-- Set flag arrays to be used for masking of grid points 865 DO i = nxlg, nxrg 866 DO j = nysg, nyng 867 DO k = nzb, nzt+1 868 IF ( k <= nzb_s_inner(j,i) ) rflags_s_inner(k,j,i) = 0.0 869 IF ( k <= nzb_s_inner(j,i) ) rflags_invers(j,i,k) = 0.0 870 ENDDO 871 ENDDO 872 ENDDO 856 873 #endif 857 874 ENDIF … … 1107 1124 ! 1108 1125 !-- Allocate flags needed for masking walls. 1109 ALLOCATE( wall_flags_0(nzb:nzt,nys:nyn,nxl:nxr) ) 1110 wall_flags_0 = 0 1126 ALLOCATE( wall_flags_0(nzb:nzt,nys:nyn,nxl:nxr), & 1127 wall_flags_00(nzb:nzt,nys:nyn,nxl:nxr) ) 1128 wall_flags_0 = 0 1129 wall_flags_00 = 0 1111 1130 1112 1131 IF ( scalar_advec == 'ws-scheme' ) THEN … … 1311 1330 wall_flags_0(k,j,i) = IBSET( wall_flags_0(k,j,i), 31 ) 1312 1331 ELSE 1313 wall_flags_0 (k,j,i) = IBSET( wall_flags_0(k,j,i), 32)1332 wall_flags_00(k,j,i) = IBSET( wall_flags_00(k,j,i), 0 ) 1314 1333 ENDIF 1315 1334 ! … … 1325 1344 !-- because flux_t(nzb_w_inner(j,i)) is used for the tendency 1326 1345 !-- at k == nzb_w_inner(j,i)+1. 1327 wall_flags_0 (k,j,i) = IBSET( wall_flags_0(k,j,i), 33)1346 wall_flags_00(k,j,i) = IBSET( wall_flags_00(k,j,i), 1 ) 1328 1347 flag_set = .TRUE. 1329 1348 ELSEIF ( k == nzb_w_inner(j,i) + 2 .OR. k == nzt - 1 ) THEN 1330 wall_flags_0 (k,j,i) = IBSET( wall_flags_0(k,j,i), 34)1349 wall_flags_00(k,j,i) = IBSET( wall_flags_00(k,j,i), 2 ) 1331 1350 flag_set = .TRUE. 1332 1351 ELSEIF ( k > nzb_w_inner(j,i) .AND. .NOT. flag_set ) THEN 1333 wall_flags_0 (k,j,i) = IBSET( wall_flags_0(k,j,i), 35)1352 wall_flags_00(k,j,i) = IBSET( wall_flags_00(k,j,i), 3 ) 1334 1353 ENDIF 1335 1354 -
palm/trunk/SOURCE/modules.f90
r1217 r1221 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! wall_flags_0 changed to 32bit int, +wall_flags_00, 23 ! +rflags_s_inner, rflags_invers 23 24 ! 24 25 ! Former revisions: … … 1088 1089 ! Description: 1089 1090 ! ------------ 1090 ! Definition of array bounds and number of gridpoints1091 ! Definition of array bounds, number of gridpoints, and wall flag arrays 1091 1092 !------------------------------------------------------------------------------! 1092 1093 … … 1112 1113 INTEGER, DIMENSION(:,:,:), POINTER :: flags 1113 1114 1114 INTEGER( KIND = SELECTED_INT_KIND(11) ), DIMENSION(:,:,:), ALLOCATABLE :: & 1115 wall_flags_0 ! need to have 34 Bit 1115 INTEGER, DIMENSION(:,:,:), ALLOCATABLE :: wall_flags_0, wall_flags_00 1116 1116 1117 1117 INTEGER, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: & … … 1120 1120 wall_flags_9, wall_flags_10 1121 1121 1122 REAL, DIMENSION(:,:,:), ALLOCATABLE :: rflags_s_inner, rflags_invers 1122 1123 1123 1124 SAVE -
palm/trunk/SOURCE/palm.f90
r1213 r1221 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! +wall_flags_00, rflags_invers, rflags_s_inner in copyin statement 23 23 ! 24 24 ! Former revisions: … … 252 252 !$acc copyin( nzb_diff_v, nzb_s_inner, nzb_s_outer, nzb_u_inner ) & 253 253 !$acc copyin( nzb_u_outer, nzb_v_inner, nzb_v_outer, nzb_w_inner ) & 254 !$acc copyin( nzb_w_outer, wall_heatflux, wall_e_x, wall_e_y, wall_u, wall_v, wall_w_x, wall_w_y, wall_flags_0 ) & 254 !$acc copyin( nzb_w_outer, rflags_invers, rflags_s_inner, rmask, wall_heatflux, wall_e_x, wall_e_y, wall_u, wall_v, wall_w_x, wall_w_y, wall_flags_0, wall_flags_00 ) & 255 !$acc copyin( ngp_2dh, ngp_2dh_s_inner ) & 255 256 !$acc copyin( weight_pres, weight_substep ) 256 257 ! -
palm/trunk/SOURCE/pres.f90
r1213 r1221 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! openACC porting of reduction operations, loops for calculating d are 23 ! using the rflags_s_inner multiply flag instead of the nzb_s_inner loop index 23 24 ! 24 25 ! Former revisions: … … 360 361 !$OMP PARALLEL PRIVATE (i,j,k) 361 362 !$OMP DO SCHEDULE( STATIC ) 362 !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w )363 !$acc loop 363 !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w ) 364 !$acc loop collapse( 3 ) 364 365 DO i = nxl, nxr 365 366 DO j = nys, nyn 366 !$acc loop vector(32)367 367 DO k = 1, nzt 368 IF ( k > nzb_s_inner(j,i) ) THEN 369 d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + & 370 ( v(k,j+1,i) - v(k,j,i) ) * ddy + & 371 ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d & 372 * d_weight_pres 373 ENDIF 368 d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + & 369 ( v(k,j+1,i) - v(k,j,i) ) * ddy + & 370 ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d & 371 * d_weight_pres * rflags_s_inner(k,j,i) 374 372 ENDDO 375 373 ENDDO … … 382 380 !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum) 383 381 !$OMP DO SCHEDULE( STATIC ) 382 !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum) 384 383 DO i = nxl, nxr 385 384 DO j = nys, nyn … … 389 388 ENDDO 390 389 ENDDO 390 !$acc end parallel 391 391 localsum = localsum + threadsum * dt_3d * & 392 392 weight_pres(intermediate_timestep_count) … … 594 594 !-- Correction of the provisional velocities with the current perturbation 595 595 !-- pressure just computed 596 !$acc update host( u, v, w )597 596 IF ( conserve_volume_flow .AND. ( bc_lr_cyc .OR. bc_ns_cyc ) ) THEN 598 597 volume_flow_l(1) = 0.0 … … 748 747 ENDDO 749 748 #else 750 !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w )751 !$acc loop 749 !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w ) 750 !$acc loop collapse( 3 ) 752 751 DO i = nxl, nxr 753 752 DO j = nys, nyn 754 !$acc loop vector( 32 )755 753 DO k = 1, nzt 756 IF ( k > nzb_s_inner(j,i) ) THEN 757 d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + & 758 ( v(k,j+1,i) - v(k,j,i) ) * ddy + & 759 ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) 760 ENDIF 754 d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + & 755 ( v(k,j+1,i) - v(k,j,i) ) * ddy + & 756 ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) & 757 ) * rflags_s_inner(k,j,i) 761 758 ENDDO 762 759 ENDDO … … 765 762 ! 766 763 !-- Compute possible PE-sum of divergences for flow_statistics 764 !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum) 767 765 DO i = nxl, nxr 768 766 DO j = nys, nyn 769 DO k = nzb _s_inner(j,i)+1, nzt767 DO k = nzb+1, nzt 770 768 threadsum = threadsum + ABS( d(k,j,i) ) 771 769 ENDDO 772 770 ENDDO 773 771 ENDDO 772 !$acc end parallel 774 773 #endif 775 774 -
palm/trunk/SOURCE/time_integration.f90
r1182 r1221 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! host update of arrays before timestep is called 23 23 ! 24 24 ! Former revisions: … … 211 211 212 212 CALL cpu_log( log_point_s(10), 'timesteps', 'start' ) 213 213 214 ! 214 215 !-- Determine size of next time step 215 IF ( simulated_time /= 0.0 ) CALL timestep 216 IF ( simulated_time /= 0.0 ) THEN 217 !$acc update host( kh, km, u, v, w ) 218 CALL timestep 219 ENDIF 220 216 221 ! 217 222 !-- Execute the user-defined actions -
palm/trunk/SOURCE/tridia_solver.f90
r1217 r1221 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! dummy argument tri in 1d-routines replaced by tri_for_1d because of name 23 ! conflict with arry tri in module arrays_3d 23 24 ! 24 25 ! Former revisions: … … 382 383 383 384 384 SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )385 SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri_for_1d ) 385 386 386 387 !------------------------------------------------------------------------------! … … 408 409 409 410 REAL, DIMENSION(0:nx,1:nz) :: ar 410 REAL, DIMENSION(5,0:nx,0:nz-1) :: tri 411 REAL, DIMENSION(5,0:nx,0:nz-1) :: tri_for_1d 411 412 412 413 … … 424 425 DO k = 0, nz-1 425 426 DO i = 0,nx 426 tri (2,i,k) = ddzu_pres(k+1) * ddzw(k+1)427 tri (3,i,k) = ddzu_pres(k+2) * ddzw(k+1)427 tri_for_1d(2,i,k) = ddzu_pres(k+1) * ddzw(k+1) 428 tri_for_1d(3,i,k) = ddzu_pres(k+2) * ddzw(k+1) 428 429 ENDDO 429 430 ENDDO … … 433 434 IF ( j <= nnyh ) THEN 434 435 #if defined( __intel11 ) 435 CALL maketri_1dd( j, tri )436 CALL maketri_1dd( j, tri_for_1d ) 436 437 #else 437 438 CALL maketri_1dd( j ) … … 439 440 ELSE 440 441 #if defined( __intel11 ) 441 CALL maketri_1dd( ny+1-j, tri )442 CALL maketri_1dd( ny+1-j, tri_for_1d ) 442 443 #else 443 444 CALL maketri_1dd( ny+1-j ) … … 445 446 ENDIF 446 447 #if defined( __intel11 ) 447 CALL split_1dd( tri )448 CALL split_1dd( tri_for_1d ) 448 449 #else 449 450 CALL split_1dd 450 451 #endif 451 CALL substi_1dd( ar, tri )452 CALL substi_1dd( ar, tri_for_1d ) 452 453 453 454 CONTAINS 454 455 455 456 #if defined( __intel11 ) 456 SUBROUTINE maketri_1dd( j, tri )457 SUBROUTINE maketri_1dd( j, tri_for_1d ) 457 458 #else 458 459 SUBROUTINE maketri_1dd( j ) … … 473 474 474 475 #if defined( __intel11 ) 475 REAL, DIMENSION(5,0:nx,0:nz-1) :: tri 476 REAL, DIMENSION(5,0:nx,0:nz-1) :: tri_for_1d 476 477 #endif 477 478 … … 502 503 a = -1.0 * ddzu_pres(k+2) * ddzw(k+1) 503 504 c = -1.0 * ddzu_pres(k+1) * ddzw(k+1) 504 tri (1,i,k) = a + c - l(i)505 tri_for_1d(1,i,k) = a + c - l(i) 505 506 ENDDO 506 507 ENDDO 507 508 IF ( ibc_p_b == 1 ) THEN 508 509 DO i = 0, nx 509 tri (1,i,0) = tri(1,i,0) + tri(2,i,0)510 tri_for_1d(1,i,0) = tri_for_1d(1,i,0) + tri_for_1d(2,i,0) 510 511 ENDDO 511 512 ENDIF 512 513 IF ( ibc_p_t == 1 ) THEN 513 514 DO i = 0, nx 514 tri (1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)515 tri_for_1d(1,i,nz-1) = tri_for_1d(1,i,nz-1) + tri_for_1d(3,i,nz-1) 515 516 ENDDO 516 517 ENDIF … … 520 521 521 522 #if defined( __intel11 ) 522 SUBROUTINE split_1dd( tri )523 SUBROUTINE split_1dd( tri_for_1d ) 523 524 #else 524 525 SUBROUTINE split_1dd … … 534 535 535 536 #if defined( __intel11 ) 536 REAL, DIMENSION(5,0:nx,0:nz-1) :: tri 537 REAL, DIMENSION(5,0:nx,0:nz-1) :: tri_for_1d 537 538 #endif 538 539 … … 541 542 !-- Splitting 542 543 DO i = 0, nx 543 tri (4,i,0) = tri(1,i,0)544 tri_for_1d(4,i,0) = tri_for_1d(1,i,0) 544 545 ENDDO 545 546 DO k = 1, nz-1 546 547 DO i = 0, nx 547 tri (5,i,k) = tri(2,i,k) / tri(4,i,k-1)548 tri (4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)548 tri_for_1d(5,i,k) = tri_for_1d(2,i,k) / tri_for_1d(4,i,k-1) 549 tri_for_1d(4,i,k) = tri_for_1d(1,i,k) - tri_for_1d(3,i,k-1) * tri_for_1d(5,i,k) 549 550 ENDDO 550 551 ENDDO … … 553 554 554 555 555 SUBROUTINE substi_1dd( ar, tri )556 SUBROUTINE substi_1dd( ar, tri_for_1d ) 556 557 557 558 !------------------------------------------------------------------------------! … … 565 566 REAL, DIMENSION(0:nx,nz) :: ar 566 567 REAL, DIMENSION(0:nx,0:nz-1) :: ar1 567 REAL, DIMENSION(5,0:nx,0:nz-1) :: tri 568 REAL, DIMENSION(5,0:nx,0:nz-1) :: tri_for_1d 568 569 569 570 ! … … 574 575 DO k = 1, nz-1 575 576 DO i = 0, nx 576 ar1(i,k) = ar(i,k+1) - tri (5,i,k) * ar1(i,k-1)577 ar1(i,k) = ar(i,k+1) - tri_for_1d(5,i,k) * ar1(i,k-1) 577 578 ENDDO 578 579 ENDDO … … 584 585 !-- the model domain. 585 586 DO i = 0, nx 586 ar(i,nz) = ar1(i,nz-1) / ( tri (4,i,nz-1) + 1.0E-20 )587 ar(i,nz) = ar1(i,nz-1) / ( tri_for_1d(4,i,nz-1) + 1.0E-20 ) 587 588 ENDDO 588 589 DO k = nz-2, 0, -1 589 590 DO i = 0, nx 590 ar(i,k+1) = ( ar1(i,k) - tri (3,i,k) * ar(i,k+2) ) &591 / tri (4,i,k)591 ar(i,k+1) = ( ar1(i,k) - tri_for_1d(3,i,k) * ar(i,k+2) ) & 592 / tri_for_1d(4,i,k) 592 593 ENDDO 593 594 ENDDO
Note: See TracChangeset
for help on using the changeset viewer.