SUBROUTINE poismg( r ) !------------------------------------------------------------------------------! ! Attention: Loop unrolling and cache optimization in SOR-Red/Black method ! still does not bring the expected speedup on ibm! Further work ! is required. ! ! Current revisions: ! ----------------- ! ! ! Former revisions: ! ----------------- ! $Id: poismg.f90 392 2009-09-24 10:39:14Z maronga $ ! ! 257 2009-03-11 15:17:42Z heinze ! Output of messages replaced by message handling routine. ! ! 181 2008-07-30 07:07:47Z raasch ! Bugfix: grid_level+1 has to be used in restrict for flags-array ! ! 114 2007-10-10 00:03:15Z raasch ! Boundary conditions at walls are implicitly set using flag arrays. Only ! Neumann BC is allowed. Upper walls are still not realized. ! Bottom and top BCs for array f_mg in restrict removed because boundary ! values are not needed (right hand side of SOR iteration). ! ! 75 2007-03-22 09:54:05Z raasch ! 2nd+3rd argument removed from exchange horiz ! ! RCS Log replace by Id keyword, revision history cleaned up ! ! Revision 1.6 2005/03/26 20:55:54 raasch ! Implementation of non-cyclic (Neumann) horizontal boundary conditions, ! routine prolong simplified (one call of exchange_horiz spared) ! ! Revision 1.1 2001/07/20 13:10:51 raasch ! Initial revision ! ! ! Description: ! ------------ ! Solves the Poisson equation for the perturbation pressure with a multigrid ! V- or W-Cycle scheme. ! ! This multigrid method was originally developed for PALM by Joerg Uhlenbrock, ! September 2000 - July 2001. !------------------------------------------------------------------------------! USE arrays_3d USE control_parameters USE cpulog USE grid_variables USE indices USE interfaces USE pegrid IMPLICIT NONE REAL :: maxerror, maximum_mgcycles, residual_norm REAL, DIMENSION(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) :: r REAL, DIMENSION(:,:,:), ALLOCATABLE :: p3 CALL cpu_log( log_point_s(29), 'poismg', 'start' ) ! !-- Initialize arrays and variables used in this subroutine ALLOCATE ( p3(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) ) ! !-- Some boundaries have to be added to divergence array CALL exchange_horiz( d ) d(nzb,:,:) = d(nzb+1,:,:) ! !-- Initiation of the multigrid scheme. Does n cycles until the !-- residual is smaller than the given limit. The accuracy of the solution !-- of the poisson equation will increase with the number of cycles. !-- If the number of cycles is preset by the user, this number will be !-- carried out regardless of the accuracy. grid_level_count = 0 mgcycles = 0 IF ( mg_cycles == -1 ) THEN maximum_mgcycles = 0 residual_norm = 1.0 ELSE maximum_mgcycles = mg_cycles residual_norm = 0.0 ENDIF DO WHILE ( residual_norm > residual_limit .OR. & mgcycles < maximum_mgcycles ) CALL next_mg_level( d, p, p3, r) ! !-- Calculate the residual if the user has not preset the number of !-- cycles to be performed IF ( maximum_mgcycles == 0 ) THEN CALL resid( d, p, r ) maxerror = SUM( r(nzb+1:nzt,nys:nyn,nxl:nxr)**2 ) #if defined( __parallel ) CALL MPI_ALLREDUCE( maxerror, residual_norm, 1, MPI_REAL, MPI_SUM, & comm2d, ierr) #else residual_norm = maxerror #endif residual_norm = SQRT( residual_norm ) ENDIF mgcycles = mgcycles + 1 ! !-- If the user has not limited the number of cycles, stop the run in case !-- of insufficient convergence IF ( mgcycles > 1000 .AND. mg_cycles == -1 ) THEN message_string = 'no sufficient convergence within 1000 cycles' CALL message( 'poismg', 'PA0283', 1, 2, 0, 6, 0 ) ENDIF ENDDO DEALLOCATE( p3 ) CALL cpu_log( log_point_s(29), 'poismg', 'stop' ) END SUBROUTINE poismg SUBROUTINE resid( f_mg, p_mg, r ) !------------------------------------------------------------------------------! ! Description: ! ------------ ! Computes the residual of the perturbation pressure. !------------------------------------------------------------------------------! USE arrays_3d USE control_parameters USE grid_variables USE indices USE pegrid IMPLICIT NONE INTEGER :: i, j, k, l REAL, DIMENSION(nzb:nzt_mg(grid_level)+1, & nys_mg(grid_level)-1:nyn_mg(grid_level)+1, & nxl_mg(grid_level)-1:nxr_mg(grid_level)+1) :: f_mg, p_mg, r ! !-- Calculate the residual l = grid_level ! !-- Choose flag array of this level SELECT CASE ( l ) CASE ( 1 ) flags => wall_flags_1 CASE ( 2 ) flags => wall_flags_2 CASE ( 3 ) flags => wall_flags_3 CASE ( 4 ) flags => wall_flags_4 CASE ( 5 ) flags => wall_flags_5 CASE ( 6 ) flags => wall_flags_6 CASE ( 7 ) flags => wall_flags_7 CASE ( 8 ) flags => wall_flags_8 CASE ( 9 ) flags => wall_flags_9 CASE ( 10 ) flags => wall_flags_10 END SELECT !$OMP PARALLEL PRIVATE (i,j,k) !$OMP DO DO i = nxl_mg(l), nxr_mg(l) DO j = nys_mg(l), nyn_mg(l) DO k = nzb+1, nzt_mg(l) r(k,j,i) = f_mg(k,j,i) & - ddx2_mg(l) * & ( p_mg(k,j,i+1) + IBITS( flags(k,j,i), 5, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i+1) ) + & p_mg(k,j,i-1) + IBITS( flags(k,j,i), 4, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i-1) ) ) & - ddy2_mg(l) * & ( p_mg(k,j+1,i) + IBITS( flags(k,j,i), 3, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j+1,i) ) + & p_mg(k,j-1,i) + IBITS( flags(k,j,i), 2, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j-1,i) ) ) & - f2_mg(k,l) * p_mg(k+1,j,i) & - f3_mg(k,l) * & ( p_mg(k-1,j,i) + IBITS( flags(k,j,i), 0, 1 ) * & ( p_mg(k,j,i) - p_mg(k-1,j,i) ) ) & + f1_mg(k,l) * p_mg(k,j,i) ! !-- Residual within topography should be zero r(k,j,i) = r(k,j,i) * ( 1.0 - IBITS( flags(k,j,i), 6, 1 ) ) ENDDO ENDDO ENDDO !$OMP END PARALLEL ! !-- Horizontal boundary conditions CALL exchange_horiz( r ) IF ( bc_lr /= 'cyclic' ) THEN IF ( inflow_l .OR. outflow_l ) r(:,:,nxl_mg(l)-1) = r(:,:,nxl_mg(l)) IF ( inflow_r .OR. outflow_r ) r(:,:,nxr_mg(l)+1) = r(:,:,nxr_mg(l)) ENDIF IF ( bc_ns /= 'cyclic' ) THEN IF ( inflow_n .OR. outflow_n ) r(:,nyn_mg(l)+1,:) = r(:,nyn_mg(l),:) IF ( inflow_s .OR. outflow_s ) r(:,nys_mg(l)-1,:) = r(:,nys_mg(l),:) ENDIF ! !-- Top boundary condition !-- A Neumann boundary condition for r is implicitly set in routine restrict IF ( ibc_p_t == 1 ) THEN r(nzt_mg(l)+1,:,: ) = r(nzt_mg(l),:,:) ELSE r(nzt_mg(l)+1,:,: ) = 0.0 ENDIF END SUBROUTINE resid SUBROUTINE restrict( f_mg, r ) !------------------------------------------------------------------------------! ! Description: ! ------------ ! Interpolates the residual on the next coarser grid with "full weighting" ! scheme !------------------------------------------------------------------------------! USE control_parameters USE grid_variables USE indices USE pegrid IMPLICIT NONE INTEGER :: i, ic, j, jc, k, kc, l REAL :: rkjim, rkjip, rkjmi, rkjmim, rkjmip, rkjpi, rkjpim, rkjpip, & rkmji, rkmjim, rkmjip, rkmjmi, rkmjmim, rkmjmip, rkmjpi, rkmjpim, & rkmjpip REAL, DIMENSION(nzb:nzt_mg(grid_level)+1, & nys_mg(grid_level)-1:nyn_mg(grid_level)+1, & nxl_mg(grid_level)-1:nxr_mg(grid_level)+1) :: f_mg REAL, DIMENSION(nzb:nzt_mg(grid_level+1)+1, & nys_mg(grid_level+1)-1:nyn_mg(grid_level+1)+1, & nxl_mg(grid_level+1)-1:nxr_mg(grid_level+1)+1) :: r ! !-- Interpolate the residual l = grid_level ! !-- Choose flag array of the upper level SELECT CASE ( l+1 ) CASE ( 1 ) flags => wall_flags_1 CASE ( 2 ) flags => wall_flags_2 CASE ( 3 ) flags => wall_flags_3 CASE ( 4 ) flags => wall_flags_4 CASE ( 5 ) flags => wall_flags_5 CASE ( 6 ) flags => wall_flags_6 CASE ( 7 ) flags => wall_flags_7 CASE ( 8 ) flags => wall_flags_8 CASE ( 9 ) flags => wall_flags_9 CASE ( 10 ) flags => wall_flags_10 END SELECT !$OMP PARALLEL PRIVATE (i,j,k,ic,jc,kc) !$OMP DO DO ic = nxl_mg(l), nxr_mg(l) i = 2*ic DO jc = nys_mg(l), nyn_mg(l) j = 2*jc DO kc = nzb+1, nzt_mg(l) k = 2*kc-1 ! !-- Use implicit Neumann BCs if the respective gridpoint is inside !-- the building rkjim = r(k,j,i-1) + IBITS( flags(k,j,i-1), 6, 1 ) * & ( r(k,j,i) - r(k,j,i-1) ) rkjip = r(k,j,i+1) + IBITS( flags(k,j,i+1), 6, 1 ) * & ( r(k,j,i) - r(k,j,i+1) ) rkjpi = r(k,j+1,i) + IBITS( flags(k,j+1,i), 6, 1 ) * & ( r(k,j,i) - r(k,j+1,i) ) rkjmi = r(k,j-1,i) + IBITS( flags(k,j-1,i), 6, 1 ) * & ( r(k,j,i) - r(k,j-1,i) ) rkjmim = r(k,j-1,i-1) + IBITS( flags(k,j-1,i-1), 6, 1 ) * & ( r(k,j,i) - r(k,j-1,i-1) ) rkjpim = r(k,j+1,i-1) + IBITS( flags(k,j+1,i-1), 6, 1 ) * & ( r(k,j,i) - r(k,j+1,i-1) ) rkjmip = r(k,j-1,i+1) + IBITS( flags(k,j-1,i+1), 6, 1 ) * & ( r(k,j,i) - r(k,j-1,i+1) ) rkjpip = r(k,j+1,i+1) + IBITS( flags(k,j+1,i+1), 6, 1 ) * & ( r(k,j,i) - r(k,j+1,i+1) ) rkmji = r(k-1,j,i) + IBITS( flags(k-1,j,i), 6, 1 ) * & ( r(k,j,i) - r(k-1,j,i) ) rkmjim = r(k-1,j,i-1) + IBITS( flags(k-1,j,i-1), 6, 1 ) * & ( r(k,j,i) - r(k-1,j,i-1) ) rkmjip = r(k-1,j,i+1) + IBITS( flags(k-1,j,i+1), 6, 1 ) * & ( r(k,j,i) - r(k-1,j,i+1) ) rkmjpi = r(k-1,j+1,i) + IBITS( flags(k-1,j+1,i), 6, 1 ) * & ( r(k,j,i) - r(k-1,j+1,i) ) rkmjmi = r(k-1,j-1,i) + IBITS( flags(k-1,j-1,i), 6, 1 ) * & ( r(k,j,i) - r(k-1,j-1,i) ) rkmjmim = r(k-1,j-1,i-1) + IBITS( flags(k-1,j-1,i-1), 6, 1 ) * & ( r(k,j,i) - r(k-1,j-1,i-1) ) rkmjpim = r(k-1,j+1,i-1) + IBITS( flags(k-1,j+1,i-1), 6, 1 ) * & ( r(k,j,i) - r(k-1,j+1,i-1) ) rkmjmip = r(k-1,j-1,i+1) + IBITS( flags(k-1,j-1,i+1), 6, 1 ) * & ( r(k,j,i) - r(k-1,j-1,i+1) ) rkmjpip = r(k-1,j+1,i+1) + IBITS( flags(k-1,j+1,i+1), 6, 1 ) * & ( r(k,j,i) - r(k-1,j+1,i+1) ) f_mg(kc,jc,ic) = 1.0 / 64.0 * ( & 8.0 * r(k,j,i) & + 4.0 * ( rkjim + rkjip + & rkjpi + rkjmi ) & + 2.0 * ( rkjmim + rkjpim + & rkjmip + rkjpip ) & + 4.0 * rkmji & + 2.0 * ( rkmjim + rkmjim + & rkmjpi + rkmjmi ) & + ( rkmjmim + rkmjpim + & rkmjmip + rkmjpip ) & + 4.0 * r(k+1,j,i) & + 2.0 * ( r(k+1,j,i-1) + r(k+1,j,i+1) + & r(k+1,j+1,i) + r(k+1,j-1,i) ) & + ( r(k+1,j-1,i-1) + r(k+1,j+1,i-1) + & r(k+1,j-1,i+1) + r(k+1,j+1,i+1) ) & ) ! f_mg(kc,jc,ic) = 1.0 / 64.0 * ( & ! 8.0 * r(k,j,i) & ! + 4.0 * ( r(k,j,i-1) + r(k,j,i+1) + & ! r(k,j+1,i) + r(k,j-1,i) ) & ! + 2.0 * ( r(k,j-1,i-1) + r(k,j+1,i-1) + & ! r(k,j-1,i+1) + r(k,j+1,i+1) ) & ! + 4.0 * r(k-1,j,i) & ! + 2.0 * ( r(k-1,j,i-1) + r(k-1,j,i+1) + & ! r(k-1,j+1,i) + r(k-1,j-1,i) ) & ! + ( r(k-1,j-1,i-1) + r(k-1,j+1,i-1) + & ! r(k-1,j-1,i+1) + r(k-1,j+1,i+1) ) & ! + 4.0 * r(k+1,j,i) & ! + 2.0 * ( r(k+1,j,i-1) + r(k+1,j,i+1) + & ! r(k+1,j+1,i) + r(k+1,j-1,i) ) & ! + ( r(k+1,j-1,i-1) + r(k+1,j+1,i-1) + & ! r(k+1,j-1,i+1) + r(k+1,j+1,i+1) ) & ! ) ENDDO ENDDO ENDDO !$OMP END PARALLEL ! !-- Horizontal boundary conditions CALL exchange_horiz( f_mg ) IF ( bc_lr /= 'cyclic' ) THEN IF (inflow_l .OR. outflow_l) f_mg(:,:,nxl_mg(l)-1) = f_mg(:,:,nxl_mg(l)) IF (inflow_r .OR. outflow_r) f_mg(:,:,nxr_mg(l)+1) = f_mg(:,:,nxr_mg(l)) ENDIF IF ( bc_ns /= 'cyclic' ) THEN IF (inflow_n .OR. outflow_n) f_mg(:,nyn_mg(l)+1,:) = f_mg(:,nyn_mg(l),:) IF (inflow_s .OR. outflow_s) f_mg(:,nys_mg(l)-1,:) = f_mg(:,nys_mg(l),:) ENDIF ! !-- Bottom and top boundary conditions ! IF ( ibc_p_b == 1 ) THEN ! f_mg(nzb,:,: ) = f_mg(nzb+1,:,:) ! ELSE ! f_mg(nzb,:,: ) = 0.0 ! ENDIF ! ! IF ( ibc_p_t == 1 ) THEN ! f_mg(nzt_mg(l)+1,:,: ) = f_mg(nzt_mg(l),:,:) ! ELSE ! f_mg(nzt_mg(l)+1,:,: ) = 0.0 ! ENDIF END SUBROUTINE restrict SUBROUTINE prolong( p, temp ) !------------------------------------------------------------------------------! ! Description: ! ------------ ! Interpolates the correction of the perturbation pressure ! to the next finer grid. !------------------------------------------------------------------------------! USE control_parameters USE pegrid USE indices IMPLICIT NONE INTEGER :: i, j, k, l REAL, DIMENSION(nzb:nzt_mg(grid_level-1)+1, & nys_mg(grid_level-1)-1:nyn_mg(grid_level-1)+1, & nxl_mg(grid_level-1)-1:nxr_mg(grid_level-1)+1 ) :: p REAL, DIMENSION(nzb:nzt_mg(grid_level)+1, & nys_mg(grid_level)-1:nyn_mg(grid_level)+1, & nxl_mg(grid_level)-1:nxr_mg(grid_level)+1) :: temp ! !-- First, store elements of the coarser grid on the next finer grid l = grid_level !$OMP PARALLEL PRIVATE (i,j,k) !$OMP DO DO i = nxl_mg(l-1), nxr_mg(l-1) DO j = nys_mg(l-1), nyn_mg(l-1) !CDIR NODEP DO k = nzb+1, nzt_mg(l-1) ! !-- Points of the coarse grid are directly stored on the next finer !-- grid temp(2*k-1,2*j,2*i) = p(k,j,i) ! !-- Points between two coarse-grid points temp(2*k-1,2*j,2*i+1) = 0.5 * ( p(k,j,i) + p(k,j,i+1) ) temp(2*k-1,2*j+1,2*i) = 0.5 * ( p(k,j,i) + p(k,j+1,i) ) temp(2*k,2*j,2*i) = 0.5 * ( p(k,j,i) + p(k+1,j,i) ) ! !-- Points in the center of the planes stretched by four points !-- of the coarse grid cube temp(2*k-1,2*j+1,2*i+1) = 0.25 * ( p(k,j,i) + p(k,j,i+1) + & p(k,j+1,i) + p(k,j+1,i+1) ) temp(2*k,2*j,2*i+1) = 0.25 * ( p(k,j,i) + p(k,j,i+1) + & p(k+1,j,i) + p(k+1,j,i+1) ) temp(2*k,2*j+1,2*i) = 0.25 * ( p(k,j,i) + p(k,j+1,i) + & p(k+1,j,i) + p(k+1,j+1,i) ) ! !-- Points in the middle of coarse grid cube temp(2*k,2*j+1,2*i+1) = 0.125 * ( p(k,j,i) + p(k,j,i+1) + & p(k,j+1,i) + p(k,j+1,i+1) + & p(k+1,j,i) + p(k+1,j,i+1) + & p(k+1,j+1,i) + p(k+1,j+1,i+1) ) ENDDO ENDDO ENDDO !$OMP END PARALLEL ! !-- Horizontal boundary conditions CALL exchange_horiz( temp ) IF ( bc_lr /= 'cyclic' ) THEN IF (inflow_l .OR. outflow_l) temp(:,:,nxl_mg(l)-1) = temp(:,:,nxl_mg(l)) IF (inflow_r .OR. outflow_r) temp(:,:,nxr_mg(l)+1) = temp(:,:,nxr_mg(l)) ENDIF IF ( bc_ns /= 'cyclic' ) THEN IF (inflow_n .OR. outflow_n) temp(:,nyn_mg(l)+1,:) = temp(:,nyn_mg(l),:) IF (inflow_s .OR. outflow_s) temp(:,nys_mg(l)-1,:) = temp(:,nys_mg(l),:) ENDIF ! !-- Bottom and top boundary conditions IF ( ibc_p_b == 1 ) THEN temp(nzb,:,: ) = temp(nzb+1,:,:) ELSE temp(nzb,:,: ) = 0.0 ENDIF IF ( ibc_p_t == 1 ) THEN temp(nzt_mg(l)+1,:,: ) = temp(nzt_mg(l),:,:) ELSE temp(nzt_mg(l)+1,:,: ) = 0.0 ENDIF END SUBROUTINE prolong SUBROUTINE redblack( f_mg, p_mg ) !------------------------------------------------------------------------------! ! Description: ! ------------ ! Relaxation method for the multigrid scheme. A Gauss-Seidel iteration with ! 3D-Red-Black decomposition (GS-RB) is used. !------------------------------------------------------------------------------! USE arrays_3d USE control_parameters USE cpulog USE grid_variables USE indices USE interfaces USE pegrid IMPLICIT NONE INTEGER :: colour, i, ic, j, jc, jj, k, l, n LOGICAL :: unroll REAL :: wall_left, wall_north, wall_right, wall_south, wall_total, wall_top REAL, DIMENSION(nzb:nzt_mg(grid_level)+1, & nys_mg(grid_level)-1:nyn_mg(grid_level)+1, & nxl_mg(grid_level)-1:nxr_mg(grid_level)+1) :: f_mg, p_mg l = grid_level ! !-- Choose flag array of this level SELECT CASE ( l ) CASE ( 1 ) flags => wall_flags_1 CASE ( 2 ) flags => wall_flags_2 CASE ( 3 ) flags => wall_flags_3 CASE ( 4 ) flags => wall_flags_4 CASE ( 5 ) flags => wall_flags_5 CASE ( 6 ) flags => wall_flags_6 CASE ( 7 ) flags => wall_flags_7 CASE ( 8 ) flags => wall_flags_8 CASE ( 9 ) flags => wall_flags_9 CASE ( 10 ) flags => wall_flags_10 END SELECT unroll = ( MOD( nyn_mg(l)-nys_mg(l)+1, 4 ) == 0 .AND. & MOD( nxr_mg(l)-nxl_mg(l)+1, 2 ) == 0 ) DO n = 1, ngsrb DO colour = 1, 2 IF ( .NOT. unroll ) THEN CALL cpu_log( log_point_s(36), 'redblack_no_unroll', 'start' ) ! !-- Without unrolling of loops, no cache optimization DO i = nxl_mg(l), nxr_mg(l), 2 DO j = nys_mg(l) + 2 - colour, nyn_mg(l), 2 DO k = nzb+1, nzt_mg(l), 2 ! p_mg(k,j,i) = 1.0 / f1_mg(k,l) * ( & ! ddx2_mg(l) * ( p_mg(k,j,i+1) + p_mg(k,j,i-1) ) & ! + ddy2_mg(l) * ( p_mg(k,j+1,i) + p_mg(k,j-1,i) ) & ! + f2_mg(k,l) * p_mg(k+1,j,i) & ! + f3_mg(k,l) * p_mg(k-1,j,i) - f_mg(k,j,i) & ! ) p_mg(k,j,i) = 1.0 / f1_mg(k,l) * ( & ddx2_mg(l) * & ( p_mg(k,j,i+1) + IBITS( flags(k,j,i), 5, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i+1) ) + & p_mg(k,j,i-1) + IBITS( flags(k,j,i), 4, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i-1) ) ) & + ddy2_mg(l) * & ( p_mg(k,j+1,i) + IBITS( flags(k,j,i), 3, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j+1,i) ) + & p_mg(k,j-1,i) + IBITS( flags(k,j,i), 2, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j-1,i) ) ) & + f2_mg(k,l) * p_mg(k+1,j,i) & + f3_mg(k,l) * & ( p_mg(k-1,j,i) + IBITS( flags(k,j,i), 0, 1 ) * & ( p_mg(k,j,i) - p_mg(k-1,j,i) ) ) & - f_mg(k,j,i) ) ENDDO ENDDO ENDDO DO i = nxl_mg(l)+1, nxr_mg(l), 2 DO j = nys_mg(l) + (colour-1), nyn_mg(l), 2 DO k = nzb+1, nzt_mg(l), 2 p_mg(k,j,i) = 1.0 / f1_mg(k,l) * ( & ddx2_mg(l) * & ( p_mg(k,j,i+1) + IBITS( flags(k,j,i), 5, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i+1) ) + & p_mg(k,j,i-1) + IBITS( flags(k,j,i), 4, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i-1) ) ) & + ddy2_mg(l) * & ( p_mg(k,j+1,i) + IBITS( flags(k,j,i), 3, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j+1,i) ) + & p_mg(k,j-1,i) + IBITS( flags(k,j,i), 2, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j-1,i) ) ) & + f2_mg(k,l) * p_mg(k+1,j,i) & + f3_mg(k,l) * & ( p_mg(k-1,j,i) + IBITS( flags(k,j,i), 0, 1 ) * & ( p_mg(k,j,i) - p_mg(k-1,j,i) ) ) & - f_mg(k,j,i) ) ENDDO ENDDO ENDDO DO i = nxl_mg(l), nxr_mg(l), 2 DO j = nys_mg(l) + (colour-1), nyn_mg(l), 2 DO k = nzb+2, nzt_mg(l), 2 p_mg(k,j,i) = 1.0 / f1_mg(k,l) * ( & ddx2_mg(l) * & ( p_mg(k,j,i+1) + IBITS( flags(k,j,i), 5, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i+1) ) + & p_mg(k,j,i-1) + IBITS( flags(k,j,i), 4, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i-1) ) ) & + ddy2_mg(l) * & ( p_mg(k,j+1,i) + IBITS( flags(k,j,i), 3, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j+1,i) ) + & p_mg(k,j-1,i) + IBITS( flags(k,j,i), 2, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j-1,i) ) ) & + f2_mg(k,l) * p_mg(k+1,j,i) & + f3_mg(k,l) * & ( p_mg(k-1,j,i) + IBITS( flags(k,j,i), 0, 1 ) * & ( p_mg(k,j,i) - p_mg(k-1,j,i) ) ) & - f_mg(k,j,i) ) ENDDO ENDDO ENDDO DO i = nxl_mg(l)+1, nxr_mg(l), 2 DO j = nys_mg(l) + 2 - colour, nyn_mg(l), 2 DO k = nzb+2, nzt_mg(l), 2 p_mg(k,j,i) = 1.0 / f1_mg(k,l) * ( & ddx2_mg(l) * & ( p_mg(k,j,i+1) + IBITS( flags(k,j,i), 5, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i+1) ) + & p_mg(k,j,i-1) + IBITS( flags(k,j,i), 4, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i-1) ) ) & + ddy2_mg(l) * & ( p_mg(k,j+1,i) + IBITS( flags(k,j,i), 3, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j+1,i) ) + & p_mg(k,j-1,i) + IBITS( flags(k,j,i), 2, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j-1,i) ) ) & + f2_mg(k,l) * p_mg(k+1,j,i) & + f3_mg(k,l) * & ( p_mg(k-1,j,i) + IBITS( flags(k,j,i), 0, 1 ) * & ( p_mg(k,j,i) - p_mg(k-1,j,i) ) ) & - f_mg(k,j,i) ) ENDDO ENDDO ENDDO CALL cpu_log( log_point_s(36), 'redblack_no_unroll', 'stop' ) ELSE ! !-- Loop unrolling along y, only one i loop for better cache use CALL cpu_log( log_point_s(38), 'redblack_unroll', 'start' ) DO ic = nxl_mg(l), nxr_mg(l), 2 DO jc = nys_mg(l), nyn_mg(l), 4 i = ic jj = jc+2-colour DO k = nzb+1, nzt_mg(l), 2 j = jj p_mg(k,j,i) = 1.0 / f1_mg(k,l) * ( & ddx2_mg(l) * & ( p_mg(k,j,i+1) + IBITS( flags(k,j,i), 5, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i+1) ) + & p_mg(k,j,i-1) + IBITS( flags(k,j,i), 4, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i-1) ) ) & + ddy2_mg(l) * & ( p_mg(k,j+1,i) + IBITS( flags(k,j,i), 3, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j+1,i) ) + & p_mg(k,j-1,i) + IBITS( flags(k,j,i), 2, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j-1,i) ) ) & + f2_mg(k,l) * p_mg(k+1,j,i) & + f3_mg(k,l) * & ( p_mg(k-1,j,i) + IBITS( flags(k,j,i), 0, 1 ) * & ( p_mg(k,j,i) - p_mg(k-1,j,i) ) ) & - f_mg(k,j,i) ) j = jj+2 p_mg(k,j,i) = 1.0 / f1_mg(k,l) * ( & ddx2_mg(l) * & ( p_mg(k,j,i+1) + IBITS( flags(k,j,i), 5, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i+1) ) + & p_mg(k,j,i-1) + IBITS( flags(k,j,i), 4, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i-1) ) ) & + ddy2_mg(l) * & ( p_mg(k,j+1,i) + IBITS( flags(k,j,i), 3, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j+1,i) ) + & p_mg(k,j-1,i) + IBITS( flags(k,j,i), 2, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j-1,i) ) ) & + f2_mg(k,l) * p_mg(k+1,j,i) & + f3_mg(k,l) * & ( p_mg(k-1,j,i) + IBITS( flags(k,j,i), 0, 1 ) * & ( p_mg(k,j,i) - p_mg(k-1,j,i) ) ) & - f_mg(k,j,i) ) ENDDO i = ic+1 jj = jc+colour-1 DO k = nzb+1, nzt_mg(l), 2 j =jj p_mg(k,j,i) = 1.0 / f1_mg(k,l) * ( & ddx2_mg(l) * & ( p_mg(k,j,i+1) + IBITS( flags(k,j,i), 5, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i+1) ) + & p_mg(k,j,i-1) + IBITS( flags(k,j,i), 4, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i-1) ) ) & + ddy2_mg(l) * & ( p_mg(k,j+1,i) + IBITS( flags(k,j,i), 3, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j+1,i) ) + & p_mg(k,j-1,i) + IBITS( flags(k,j,i), 2, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j-1,i) ) ) & + f2_mg(k,l) * p_mg(k+1,j,i) & + f3_mg(k,l) * & ( p_mg(k-1,j,i) + IBITS( flags(k,j,i), 0, 1 ) * & ( p_mg(k,j,i) - p_mg(k-1,j,i) ) ) & - f_mg(k,j,i) ) j = jj+2 p_mg(k,j,i) = 1.0 / f1_mg(k,l) * ( & ddx2_mg(l) * & ( p_mg(k,j,i+1) + IBITS( flags(k,j,i), 5, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i+1) ) + & p_mg(k,j,i-1) + IBITS( flags(k,j,i), 4, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i-1) ) ) & + ddy2_mg(l) * & ( p_mg(k,j+1,i) + IBITS( flags(k,j,i), 3, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j+1,i) ) + & p_mg(k,j-1,i) + IBITS( flags(k,j,i), 2, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j-1,i) ) ) & + f2_mg(k,l) * p_mg(k+1,j,i) & + f3_mg(k,l) * & ( p_mg(k-1,j,i) + IBITS( flags(k,j,i), 0, 1 ) * & ( p_mg(k,j,i) - p_mg(k-1,j,i) ) ) & - f_mg(k,j,i) ) ENDDO i = ic jj = jc+colour-1 DO k = nzb+2, nzt_mg(l), 2 j =jj p_mg(k,j,i) = 1.0 / f1_mg(k,l) * ( & ddx2_mg(l) * & ( p_mg(k,j,i+1) + IBITS( flags(k,j,i), 5, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i+1) ) + & p_mg(k,j,i-1) + IBITS( flags(k,j,i), 4, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i-1) ) ) & + ddy2_mg(l) * & ( p_mg(k,j+1,i) + IBITS( flags(k,j,i), 3, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j+1,i) ) + & p_mg(k,j-1,i) + IBITS( flags(k,j,i), 2, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j-1,i) ) ) & + f2_mg(k,l) * p_mg(k+1,j,i) & + f3_mg(k,l) * & ( p_mg(k-1,j,i) + IBITS( flags(k,j,i), 0, 1 ) * & ( p_mg(k,j,i) - p_mg(k-1,j,i) ) ) & - f_mg(k,j,i) ) j = jj+2 p_mg(k,j,i) = 1.0 / f1_mg(k,l) * ( & ddx2_mg(l) * & ( p_mg(k,j,i+1) + IBITS( flags(k,j,i), 5, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i+1) ) + & p_mg(k,j,i-1) + IBITS( flags(k,j,i), 4, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i-1) ) ) & + ddy2_mg(l) * & ( p_mg(k,j+1,i) + IBITS( flags(k,j,i), 3, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j+1,i) ) + & p_mg(k,j-1,i) + IBITS( flags(k,j,i), 2, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j-1,i) ) ) & + f2_mg(k,l) * p_mg(k+1,j,i) & + f3_mg(k,l) * & ( p_mg(k-1,j,i) + IBITS( flags(k,j,i), 0, 1 ) * & ( p_mg(k,j,i) - p_mg(k-1,j,i) ) ) & - f_mg(k,j,i) ) ENDDO i = ic+1 jj = jc+2-colour DO k = nzb+2, nzt_mg(l), 2 j =jj p_mg(k,j,i) = 1.0 / f1_mg(k,l) * ( & ddx2_mg(l) * & ( p_mg(k,j,i+1) + IBITS( flags(k,j,i), 5, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i+1) ) + & p_mg(k,j,i-1) + IBITS( flags(k,j,i), 4, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i-1) ) ) & + ddy2_mg(l) * & ( p_mg(k,j+1,i) + IBITS( flags(k,j,i), 3, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j+1,i) ) + & p_mg(k,j-1,i) + IBITS( flags(k,j,i), 2, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j-1,i) ) ) & + f2_mg(k,l) * p_mg(k+1,j,i) & + f3_mg(k,l) * & ( p_mg(k-1,j,i) + IBITS( flags(k,j,i), 0, 1 ) * & ( p_mg(k,j,i) - p_mg(k-1,j,i) ) ) & - f_mg(k,j,i) ) j = jj+2 p_mg(k,j,i) = 1.0 / f1_mg(k,l) * ( & ddx2_mg(l) * & ( p_mg(k,j,i+1) + IBITS( flags(k,j,i), 5, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i+1) ) + & p_mg(k,j,i-1) + IBITS( flags(k,j,i), 4, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j,i-1) ) ) & + ddy2_mg(l) * & ( p_mg(k,j+1,i) + IBITS( flags(k,j,i), 3, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j+1,i) ) + & p_mg(k,j-1,i) + IBITS( flags(k,j,i), 2, 1 ) * & ( p_mg(k,j,i) - p_mg(k,j-1,i) ) ) & + f2_mg(k,l) * p_mg(k+1,j,i) & + f3_mg(k,l) * & ( p_mg(k-1,j,i) + IBITS( flags(k,j,i), 0, 1 ) * & ( p_mg(k,j,i) - p_mg(k-1,j,i) ) ) & - f_mg(k,j,i) ) ENDDO ENDDO ENDDO CALL cpu_log( log_point_s(38), 'redblack_unroll', 'stop' ) ENDIF ! !-- Horizontal boundary conditions CALL exchange_horiz( p_mg ) IF ( bc_lr /= 'cyclic' ) THEN IF ( inflow_l .OR. outflow_l ) THEN p_mg(:,:,nxl_mg(l)-1) = p_mg(:,:,nxl_mg(l)) ENDIF IF ( inflow_r .OR. outflow_r ) THEN p_mg(:,:,nxr_mg(l)+1) = p_mg(:,:,nxr_mg(l)) ENDIF ENDIF IF ( bc_ns /= 'cyclic' ) THEN IF ( inflow_n .OR. outflow_n ) THEN p_mg(:,nyn_mg(l)+1,:) = p_mg(:,nyn_mg(l),:) ENDIF IF ( inflow_s .OR. outflow_s ) THEN p_mg(:,nys_mg(l)-1,:) = p_mg(:,nys_mg(l),:) ENDIF ENDIF ! !-- Bottom and top boundary conditions IF ( ibc_p_b == 1 ) THEN p_mg(nzb,:,: ) = p_mg(nzb+1,:,:) ELSE p_mg(nzb,:,: ) = 0.0 ENDIF IF ( ibc_p_t == 1 ) THEN p_mg(nzt_mg(l)+1,:,: ) = p_mg(nzt_mg(l),:,:) ELSE p_mg(nzt_mg(l)+1,:,: ) = 0.0 ENDIF ENDDO ENDDO ! !-- Set pressure within topography and at the topography surfaces !$OMP PARALLEL PRIVATE (i,j,k,wall_left,wall_north,wall_right,wall_south,wall_top,wall_total) !$OMP DO DO i = nxl_mg(l), nxr_mg(l) DO j = nys_mg(l), nyn_mg(l) DO k = nzb, nzt_mg(l) ! !-- First, set pressure inside topography to zero p_mg(k,j,i) = p_mg(k,j,i) * ( 1.0 - IBITS( flags(k,j,i), 6, 1 ) ) ! !-- Second, determine if the gridpoint inside topography is adjacent !-- to a wall and set its value to a value given by the average of !-- those values obtained from Neumann boundary condition wall_left = IBITS( flags(k,j,i-1), 5, 1 ) wall_right = IBITS( flags(k,j,i+1), 4, 1 ) wall_south = IBITS( flags(k,j-1,i), 3, 1 ) wall_north = IBITS( flags(k,j+1,i), 2, 1 ) wall_top = IBITS( flags(k+1,j,i), 0, 1 ) wall_total = wall_left + wall_right + wall_south + wall_north + & wall_top IF ( wall_total > 0.0 ) THEN p_mg(k,j,i) = 1.0 / wall_total * & ( wall_left * p_mg(k,j,i-1) + & wall_right * p_mg(k,j,i+1) + & wall_south * p_mg(k,j-1,i) + & wall_north * p_mg(k,j+1,i) + & wall_top * p_mg(k+1,j,i) ) ENDIF ENDDO ENDDO ENDDO !$OMP END PARALLEL ! !-- One more time horizontal boundary conditions CALL exchange_horiz( p_mg ) END SUBROUTINE redblack SUBROUTINE mg_gather( f2, f2_sub ) USE control_parameters USE cpulog USE indices USE interfaces USE pegrid IMPLICIT NONE INTEGER :: n, nwords, sender REAL, DIMENSION(nzb:nzt_mg(grid_level)+1, & nys_mg(grid_level)-1:nyn_mg(grid_level)+1, & nxl_mg(grid_level)-1:nxr_mg(grid_level)+1) :: f2 REAL, DIMENSION(nzb:mg_loc_ind(5,myid)+1, & mg_loc_ind(3,myid)-1:mg_loc_ind(4,myid)+1, & mg_loc_ind(1,myid)-1:mg_loc_ind(2,myid)+1) :: f2_sub ! !-- Find out the number of array elements of the subdomain array nwords = SIZE( f2_sub ) #if defined( __parallel ) CALL cpu_log( log_point_s(34), 'mg_gather', 'start' ) IF ( myid == 0 ) THEN ! !-- Store the local subdomain array on the total array f2(:,mg_loc_ind(3,0)-1:mg_loc_ind(4,0)+1, & mg_loc_ind(1,0)-1:mg_loc_ind(2,0)+1) = f2_sub ! !-- Receive the subdomain arrays from all other PEs and store them on the !-- total array DO n = 1, numprocs-1 ! !-- Receive the arrays in arbitrary order from the PEs. CALL MPI_RECV( f2_sub(nzb,mg_loc_ind(3,0)-1,mg_loc_ind(1,0)-1), & nwords, MPI_REAL, MPI_ANY_SOURCE, 1, comm2d, status, & ierr ) sender = status(MPI_SOURCE) f2(:,mg_loc_ind(3,sender)-1:mg_loc_ind(4,sender)+1, & mg_loc_ind(1,sender)-1:mg_loc_ind(2,sender)+1) = f2_sub ENDDO ELSE ! !-- Send subdomain array to PE0 CALL MPI_SEND( f2_sub(nzb,mg_loc_ind(3,myid)-1,mg_loc_ind(1,myid)-1), & nwords, MPI_REAL, 0, 1, comm2d, ierr ) ENDIF CALL cpu_log( log_point_s(34), 'mg_gather', 'stop' ) #endif END SUBROUTINE mg_gather SUBROUTINE mg_scatter( p2, p2_sub ) ! !-- TODO: It may be possible to improve the speed of this routine by using !-- non-blocking communication USE control_parameters USE cpulog USE indices USE interfaces USE pegrid IMPLICIT NONE INTEGER :: n, nwords, sender REAL, DIMENSION(nzb:nzt_mg(grid_level-1)+1, & nys_mg(grid_level-1)-1:nyn_mg(grid_level-1)+1, & nxl_mg(grid_level-1)-1:nxr_mg(grid_level-1)+1) :: p2 REAL, DIMENSION(nzb:mg_loc_ind(5,myid)+1, & mg_loc_ind(3,myid)-1:mg_loc_ind(4,myid)+1, & mg_loc_ind(1,myid)-1:mg_loc_ind(2,myid)+1) :: p2_sub ! !-- Find out the number of array elements of the subdomain array nwords = SIZE( p2_sub ) #if defined( __parallel ) CALL cpu_log( log_point_s(35), 'mg_scatter', 'start' ) IF ( myid == 0 ) THEN ! !-- Scatter the subdomain arrays to the other PEs by blocking !-- communication DO n = 1, numprocs-1 p2_sub = p2(:,mg_loc_ind(3,n)-1:mg_loc_ind(4,n)+1, & mg_loc_ind(1,n)-1:mg_loc_ind(2,n)+1) CALL MPI_SEND( p2_sub(nzb,mg_loc_ind(3,0)-1,mg_loc_ind(1,0)-1), & nwords, MPI_REAL, n, 1, comm2d, ierr ) ENDDO ! !-- Store data from the total array to the local subdomain array p2_sub = p2(:,mg_loc_ind(3,0)-1:mg_loc_ind(4,0)+1, & mg_loc_ind(1,0)-1:mg_loc_ind(2,0)+1) ELSE ! !-- Receive subdomain array from PE0 CALL MPI_RECV( p2_sub(nzb,mg_loc_ind(3,myid)-1,mg_loc_ind(1,myid)-1), & nwords, MPI_REAL, 0, 1, comm2d, status, ierr ) ENDIF CALL cpu_log( log_point_s(35), 'mg_scatter', 'stop' ) #endif END SUBROUTINE mg_scatter RECURSIVE SUBROUTINE next_mg_level( f_mg, p_mg, p3, r ) !------------------------------------------------------------------------------! ! Description: ! ------------ ! This is where the multigrid technique takes place. V- and W- Cycle are ! implemented and steered by the parameter "gamma". Parameter "nue" determines ! the convergence of the multigrid iterative solution. There are nue times ! RB-GS iterations. It should be set to "1" or "2", considering the time effort ! one would like to invest. Last choice shows a very good converging factor, ! but leads to an increase in computing time. !------------------------------------------------------------------------------! USE arrays_3d USE control_parameters USE grid_variables USE indices USE pegrid IMPLICIT NONE INTEGER :: i, j, k, nxl_mg_save, nxr_mg_save, nyn_mg_save, nys_mg_save, & nzt_mg_save LOGICAL :: restore_boundary_lr_on_pe0, restore_boundary_ns_on_pe0 REAL, DIMENSION(nzb:nzt_mg(grid_level)+1, & nys_mg(grid_level)-1:nyn_mg(grid_level)+1, & nxl_mg(grid_level)-1:nxr_mg(grid_level)+1) :: f_mg, p_mg, p3, r REAL, DIMENSION(:,:,:), ALLOCATABLE :: f2, f2_sub, p2, p2_sub ! !-- Restriction to the coarsest grid 10 IF ( grid_level == 1 ) THEN ! !-- Solution on the coarsest grid. Double the number of Gauss-Seidel !-- iterations in order to get a more accurate solution. ngsrb = 2 * ngsrb CALL redblack( f_mg, p_mg ) ngsrb = ngsrb / 2 ELSEIF ( grid_level /= 1 ) THEN grid_level_count(grid_level) = grid_level_count(grid_level) + 1 ! !-- Solution on the actual grid level CALL redblack( f_mg, p_mg ) ! !-- Determination of the actual residual CALL resid( f_mg, p_mg, r ) ! !-- Restriction of the residual (finer grid values!) to the next coarser !-- grid. Therefore, the grid level has to be decremented now. nxl..nzt have !-- to be set to the coarse grid values, because these variables are needed !-- for the exchange of ghost points in routine exchange_horiz grid_level = grid_level - 1 nxl = nxl_mg(grid_level) nxr = nxr_mg(grid_level) nys = nys_mg(grid_level) nyn = nyn_mg(grid_level) nzt = nzt_mg(grid_level) ALLOCATE( f2(nzb:nzt_mg(grid_level)+1, & nys_mg(grid_level)-1:nyn_mg(grid_level)+1, & nxl_mg(grid_level)-1:nxr_mg(grid_level)+1), & p2(nzb:nzt_mg(grid_level)+1, & nys_mg(grid_level)-1:nyn_mg(grid_level)+1, & nxl_mg(grid_level)-1:nxr_mg(grid_level)+1) ) IF ( grid_level == mg_switch_to_pe0_level ) THEN ! print*, 'myid=',myid, ' restrict and switch to PE0. level=', grid_level ! !-- From this level on, calculations are done on PE0 only. !-- First, carry out restriction on the subdomain. !-- Therefore, indices of the level have to be changed to subdomain values !-- in between (otherwise, the restrict routine would expect !-- the gathered array) nxl_mg_save = nxl_mg(grid_level) nxr_mg_save = nxr_mg(grid_level) nys_mg_save = nys_mg(grid_level) nyn_mg_save = nyn_mg(grid_level) nzt_mg_save = nzt_mg(grid_level) nxl_mg(grid_level) = mg_loc_ind(1,myid) nxr_mg(grid_level) = mg_loc_ind(2,myid) nys_mg(grid_level) = mg_loc_ind(3,myid) nyn_mg(grid_level) = mg_loc_ind(4,myid) nzt_mg(grid_level) = mg_loc_ind(5,myid) nxl = mg_loc_ind(1,myid) nxr = mg_loc_ind(2,myid) nys = mg_loc_ind(3,myid) nyn = mg_loc_ind(4,myid) nzt = mg_loc_ind(5,myid) ALLOCATE( f2_sub(nzb:nzt_mg(grid_level)+1, & nys_mg(grid_level)-1:nyn_mg(grid_level)+1, & nxl_mg(grid_level)-1:nxr_mg(grid_level)+1) ) CALL restrict( f2_sub, r ) ! !-- Restore the correct indices of this level nxl_mg(grid_level) = nxl_mg_save nxr_mg(grid_level) = nxr_mg_save nys_mg(grid_level) = nys_mg_save nyn_mg(grid_level) = nyn_mg_save nzt_mg(grid_level) = nzt_mg_save nxl = nxl_mg(grid_level) nxr = nxr_mg(grid_level) nys = nys_mg(grid_level) nyn = nyn_mg(grid_level) nzt = nzt_mg(grid_level) ! !-- Gather all arrays from the subdomains on PE0 CALL mg_gather( f2, f2_sub ) ! !-- Set switch for routine exchange_horiz, that no ghostpoint exchange !-- has to be carried out from now on mg_switch_to_pe0 = .TRUE. ! !-- In case of non-cyclic lateral boundary conditions, both in- and !-- outflow conditions have to be used on PE0 after the switch, because !-- it then contains the total domain. Due to the virtual processor !-- grid, before the switch, PE0 can have in-/outflow at the left !-- and south wall only (or on opposite walls in case of a 1d !-- decomposition). restore_boundary_lr_on_pe0 = .FALSE. restore_boundary_ns_on_pe0 = .FALSE. IF ( myid == 0 ) THEN IF ( inflow_l .AND. .NOT. outflow_r ) THEN outflow_r = .TRUE. restore_boundary_lr_on_pe0 = .TRUE. ENDIF IF ( outflow_l .AND. .NOT. inflow_r ) THEN inflow_r = .TRUE. restore_boundary_lr_on_pe0 = .TRUE. ENDIF IF ( inflow_s .AND. .NOT. outflow_n ) THEN outflow_n = .TRUE. restore_boundary_ns_on_pe0 = .TRUE. ENDIF IF ( outflow_s .AND. .NOT. inflow_n ) THEN inflow_n = .TRUE. restore_boundary_ns_on_pe0 = .TRUE. ENDIF ENDIF DEALLOCATE( f2_sub ) ELSE CALL restrict( f2, r ) ENDIF p2 = 0.0 ! !-- Repeat the same procedure till the coarsest grid is reached IF ( myid == 0 .OR. grid_level > mg_switch_to_pe0_level ) THEN CALL next_mg_level( f2, p2, p3, r ) ENDIF ENDIF ! !-- Now follows the prolongation IF ( grid_level >= 2 ) THEN ! !-- Grid level has to be incremented on the PEs where next_mg_level !-- has not been called before (normally it is incremented at the end !-- of next_mg_level) IF ( myid /= 0 .AND. grid_level == mg_switch_to_pe0_level ) THEN grid_level = grid_level + 1 nxl = nxl_mg(grid_level) nxr = nxr_mg(grid_level) nys = nys_mg(grid_level) nyn = nyn_mg(grid_level) nzt = nzt_mg(grid_level) ENDIF ! !-- Prolongation of the new residual. The values are transferred !-- from the coarse to the next finer grid. IF ( grid_level == mg_switch_to_pe0_level+1 ) THEN ! !-- At this level, the new residual first has to be scattered from !-- PE0 to the other PEs ALLOCATE( p2_sub(nzb:mg_loc_ind(5,myid)+1, & mg_loc_ind(3,myid)-1:mg_loc_ind(4,myid)+1, & mg_loc_ind(1,myid)-1:mg_loc_ind(2,myid)+1) ) CALL mg_scatter( p2, p2_sub ) ! !-- Therefore, indices of the previous level have to be changed to !-- subdomain values in between (otherwise, the prolong routine would !-- expect the gathered array) nxl_mg_save = nxl_mg(grid_level-1) nxr_mg_save = nxr_mg(grid_level-1) nys_mg_save = nys_mg(grid_level-1) nyn_mg_save = nyn_mg(grid_level-1) nzt_mg_save = nzt_mg(grid_level-1) nxl_mg(grid_level-1) = mg_loc_ind(1,myid) nxr_mg(grid_level-1) = mg_loc_ind(2,myid) nys_mg(grid_level-1) = mg_loc_ind(3,myid) nyn_mg(grid_level-1) = mg_loc_ind(4,myid) nzt_mg(grid_level-1) = mg_loc_ind(5,myid) ! !-- Set switch for routine exchange_horiz, that ghostpoint exchange !-- has to be carried again out from now on mg_switch_to_pe0 = .FALSE. ! !-- In case of non-cyclic lateral boundary conditions, restore the !-- in-/outflow conditions on PE0 IF ( myid == 0 ) THEN IF ( restore_boundary_lr_on_pe0 ) THEN IF ( inflow_l ) outflow_r = .FALSE. IF ( outflow_l ) inflow_r = .FALSE. ENDIF IF ( restore_boundary_ns_on_pe0 ) THEN IF ( inflow_s ) outflow_n = .FALSE. IF ( outflow_s ) inflow_n = .FALSE. ENDIF ENDIF CALL prolong( p2_sub, p3 ) ! !-- Restore the correct indices of the previous level nxl_mg(grid_level-1) = nxl_mg_save nxr_mg(grid_level-1) = nxr_mg_save nys_mg(grid_level-1) = nys_mg_save nyn_mg(grid_level-1) = nyn_mg_save nzt_mg(grid_level-1) = nzt_mg_save DEALLOCATE( p2_sub ) ELSE CALL prolong( p2, p3 ) ENDIF ! !-- Temporary arrays for the actual grid are not needed any more DEALLOCATE( p2, f2 ) ! !-- Computation of the new pressure correction. Therefore, !-- values from prior grids are added up automatically stage by stage. DO i = nxl_mg(grid_level)-1, nxr_mg(grid_level)+1 DO j = nys_mg(grid_level)-1, nyn_mg(grid_level)+1 DO k = nzb, nzt_mg(grid_level)+1 p_mg(k,j,i) = p_mg(k,j,i) + p3(k,j,i) ENDDO ENDDO ENDDO ! !-- Relaxation of the new solution CALL redblack( f_mg, p_mg ) ENDIF ! !-- The following few lines serve the steering of the multigrid scheme IF ( grid_level == maximum_grid_level ) THEN GOTO 20 ELSEIF ( grid_level /= maximum_grid_level .AND. grid_level /= 1 .AND. & grid_level_count(grid_level) /= gamma_mg ) THEN GOTO 10 ENDIF ! !-- Reset counter for the next call of poismg grid_level_count(grid_level) = 0 ! !-- Continue with the next finer level. nxl..nzt have to be !-- set to the finer grid values, because these variables are needed for the !-- exchange of ghost points in routine exchange_horiz grid_level = grid_level + 1 nxl = nxl_mg(grid_level) nxr = nxr_mg(grid_level) nys = nys_mg(grid_level) nyn = nyn_mg(grid_level) nzt = nzt_mg(grid_level) 20 CONTINUE END SUBROUTINE next_mg_level