Ignore:
Timestamp:
Mar 29, 2011 11:39:40 AM (10 years ago)
Author:
raasch
Message:

New:
---

In case of multigrid method, on coarse grid levels, gathered data are
identically processed on all PEs (before, on PE0 only), so that the subsequent
scattering of data is not neccessary any more. (modules, init_pegrid, poismg)

Changed:


Calculation of weighted average of p is now handled in the same way
regardless of the number of ghost layers (advection scheme). (pres)

multigrid and sor method are using p_loc for iterative
advancements of pressure. p_sub removed. (init_3d_model, modules, poismg, pres, sor)

bc_lr and bc_ns replaced by bc_lr_dirrad, bc_lr_raddir, bc_ns_dirrad, bc_ns_raddir
for speed optimization. (calc_spectra, check_parameters, exchange_horiz,
exchange_horiz_2d, header, init_3d_model, init_grid, init_pegrid, modules,
poismg, pres, sor, time_integration, timestep)

grid_level directly used as index for MPI data type arrays. (exchange_horiz,
poismg)

initial assignments of zero to array p for iterative solvers only (init_3d_model)

Errors:


localsum calculation modified for proper OpenMP reduction. (pres)

Bugfix: bottom (nzb) and top (nzt+1) boundary conditions set in routines
resid and restrict. They were missed before, which may have led to
unpredictable results. (poismg)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SOURCE/poismg.f90

    r668 r707  
    33!------------------------------------------------------------------------------!
    44! Attention: Loop unrolling and cache optimization in SOR-Red/Black method
    5 !            still does not bring the expected speedup on ibm! Further work
    6 !            is required.
     5!            still does not give the expected speedup! Further work required.
    76!
    87! Current revisions:
    98! -----------------
     9! p_loc is used instead of p in the main routine (poismg).
     10! On coarse grid levels, gathered data are identically processed on all PEs
     11! (before, on PE0 only), so that the subsequent scattering of data is not
     12! neccessary any more.
     13! bc_lr/ns replaced by bc_lr/ns_cyc/dirrad/raddir
     14! Bugfix: bottom (nzb) and top (nzt+1) boundary conditions set in routines
     15! resid and restrict. They were missed before which may have led to
     16! unpredictable results.
    1017!
    1118! Former revisions:
     
    7683    ALLOCATE ( p3(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) )
    7784
    78 
    79 !
    80 !-- Some boundaries have to be added to divergence array
     85!
     86!-- Ghost boundaries have to be added to divergence array.
     87!-- Exchange routine needs to know the grid level!
     88    grid_level = maximum_grid_level
    8189    CALL exchange_horiz( d, 1)
    8290    d(nzb,:,:) = d(nzb+1,:,:)
     
    8896!-- If the number of cycles is preset by the user, this number will be
    8997!-- carried out regardless of the accuracy.
    90     grid_level_count =   0
    91     mgcycles         =   0
     98    grid_level_count =  0
     99    mgcycles         =  0
    92100    IF ( mg_cycles == -1 )  THEN
    93101       maximum_mgcycles = 0
     
    101109               mgcycles < maximum_mgcycles )
    102110
    103        CALL next_mg_level( d, p, p3, r)
     111       CALL next_mg_level( d, p_loc, p3, r)
    104112       
    105113!
     
    107115!--    cycles to be performed
    108116       IF ( maximum_mgcycles == 0 )  THEN
    109           CALL resid( d, p, r )
     117          CALL resid( d, p_loc, r )
    110118          maxerror = SUM( r(nzb+1:nzt,nys:nyn,nxl:nxr)**2 )
    111119#if defined( __parallel )
     
    132140
    133141    DEALLOCATE( p3 )
     142
     143!
     144!-- Unset the grid level. Variable is used to determine the MPI datatypes for
     145!-- ghost point exchange
     146    grid_level = 0
    134147
    135148    CALL cpu_log( log_point_s(29), 'poismg', 'stop' )
     
    223236    CALL exchange_horiz( r, 1)
    224237
    225     IF ( bc_lr /= 'cyclic' )  THEN
     238    IF ( .NOT. bc_lr_cyc )  THEN
    226239       IF ( inflow_l .OR. outflow_l )  r(:,:,nxl_mg(l)-1) = r(:,:,nxl_mg(l))
    227240       IF ( inflow_r .OR. outflow_r )  r(:,:,nxr_mg(l)+1) = r(:,:,nxr_mg(l))
    228241    ENDIF
    229242
    230     IF ( bc_ns /= 'cyclic' )  THEN
     243    IF ( .NOT. bc_ns_cyc )  THEN
    231244       IF ( inflow_n .OR. outflow_n )  r(:,nyn_mg(l)+1,:) = r(:,nyn_mg(l),:)
    232245       IF ( inflow_s .OR. outflow_s )  r(:,nys_mg(l)-1,:) = r(:,nys_mg(l),:)
     
    234247
    235248!
    236 !-- Top boundary condition
    237 !-- A Neumann boundary condition for r is implicitly set in routine restrict
     249!-- Boundary conditions at bottom and top of the domain.
     250!-- These points are not handled by the above loop. Points may be within
     251!-- buildings, but that doesn't matter.
     252    IF ( ibc_p_b == 1 )  THEN
     253       r(nzb,:,: ) = r(nzb+1,:,:)
     254    ELSE
     255       r(nzb,:,: ) = 0.0
     256    ENDIF
     257
    238258    IF ( ibc_p_t == 1 )  THEN
    239259       r(nzt_mg(l)+1,:,: ) = r(nzt_mg(l),:,:)
     
    396416    CALL exchange_horiz( f_mg, 1)
    397417
    398     IF ( bc_lr /= 'cyclic' )  THEN
     418    IF ( .NOT. bc_lr_cyc )  THEN
    399419       IF (inflow_l .OR. outflow_l)  f_mg(:,:,nxl_mg(l)-1) = f_mg(:,:,nxl_mg(l))
    400420       IF (inflow_r .OR. outflow_r)  f_mg(:,:,nxr_mg(l)+1) = f_mg(:,:,nxr_mg(l))
    401421    ENDIF
    402422
    403     IF ( bc_ns /= 'cyclic' )  THEN
     423    IF ( .NOT. bc_ns_cyc )  THEN
    404424       IF (inflow_n .OR. outflow_n)  f_mg(:,nyn_mg(l)+1,:) = f_mg(:,nyn_mg(l),:)
    405425       IF (inflow_s .OR. outflow_s)  f_mg(:,nys_mg(l)-1,:) = f_mg(:,nys_mg(l),:)
     
    407427
    408428!
    409 !-- Bottom and top boundary conditions
    410 !    IF ( ibc_p_b == 1 )  THEN
    411 !       f_mg(nzb,:,: ) = f_mg(nzb+1,:,:)
    412 !    ELSE
    413 !       f_mg(nzb,:,: ) = 0.0
    414 !    ENDIF
    415 !
    416 !    IF ( ibc_p_t == 1 )  THEN
    417 !       f_mg(nzt_mg(l)+1,:,: ) = f_mg(nzt_mg(l),:,:)
    418 !    ELSE
    419 !       f_mg(nzt_mg(l)+1,:,: ) = 0.0
    420 !    ENDIF
     429!-- Boundary conditions at bottom and top of the domain.
     430!-- These points are not handled by the above loop. Points may be within
     431!-- buildings, but that doesn't matter.
     432    IF ( ibc_p_b == 1 )  THEN
     433       f_mg(nzb,:,: ) = f_mg(nzb+1,:,:)
     434    ELSE
     435       f_mg(nzb,:,: ) = 0.0
     436    ENDIF
     437
     438    IF ( ibc_p_t == 1 )  THEN
     439       f_mg(nzt_mg(l)+1,:,: ) = f_mg(nzt_mg(l),:,:)
     440    ELSE
     441       f_mg(nzt_mg(l)+1,:,: ) = 0.0
     442    ENDIF
    421443
    422444
     
    494516    CALL exchange_horiz( temp, 1)
    495517
    496     IF ( bc_lr /= 'cyclic' )  THEN
     518    IF ( .NOT. bc_lr_cyc )  THEN
    497519       IF (inflow_l .OR. outflow_l)  temp(:,:,nxl_mg(l)-1) = temp(:,:,nxl_mg(l))
    498520       IF (inflow_r .OR. outflow_r)  temp(:,:,nxr_mg(l)+1) = temp(:,:,nxr_mg(l))
    499521    ENDIF
    500522
    501     IF ( bc_ns /= 'cyclic' )  THEN
     523    IF ( .NOT. bc_ns_cyc )  THEN
    502524       IF (inflow_n .OR. outflow_n)  temp(:,nyn_mg(l)+1,:) = temp(:,nyn_mg(l),:)
    503525       IF (inflow_s .OR. outflow_s)  temp(:,nys_mg(l)-1,:) = temp(:,nys_mg(l),:)
     
    864886          CALL exchange_horiz( p_mg, 1 )
    865887
    866           IF ( bc_lr /= 'cyclic' )  THEN
     888          IF ( .NOT. bc_lr_cyc )  THEN
    867889             IF ( inflow_l .OR. outflow_l )  THEN
    868890                p_mg(:,:,nxl_mg(l)-1) = p_mg(:,:,nxl_mg(l))
     
    873895          ENDIF
    874896
    875           IF ( bc_ns /= 'cyclic' )  THEN
     897          IF ( .NOT. bc_ns_cyc )  THEN
    876898             IF ( inflow_n .OR. outflow_n )  THEN
    877899                p_mg(:,nyn_mg(l)+1,:) = p_mg(:,nyn_mg(l),:)
     
    953975    IMPLICIT NONE
    954976
    955     INTEGER ::  n, nwords, sender
     977    INTEGER ::  i, il, ir, j, jn, js, k, n, nwords, sender
    956978
    957979    REAL, DIMENSION(nzb:nzt_mg(grid_level)+1,                            &
     
    963985                    mg_loc_ind(1,myid)-1:mg_loc_ind(2,myid)+1) ::  f2_sub
    964986
    965 !
    966 !-- Find out the number of array elements of the subdomain array
    967     nwords = SIZE( f2_sub )
     987    REAL, DIMENSION(:,:,:), ALLOCATABLE ::  f2_l
     988
     989    ALLOCATE( f2_l(nzb:nzt_mg(grid_level)+1,                            &
     990                    nys_mg(grid_level)-1:nyn_mg(grid_level)+1,          &
     991                    nxl_mg(grid_level)-1:nxr_mg(grid_level)+1) )
    968992
    969993#if defined( __parallel )
    970994    CALL cpu_log( log_point_s(34), 'mg_gather', 'start' )
    971995
    972     IF ( myid == 0 )  THEN
    973 !
    974 !--    Store the local subdomain array on the total array
    975        f2(:,mg_loc_ind(3,0)-1:mg_loc_ind(4,0)+1, &
    976             mg_loc_ind(1,0)-1:mg_loc_ind(2,0)+1) = f2_sub
    977 
    978 !
    979 !--    Receive the subdomain arrays from all other PEs and store them on the
    980 !--    total array
    981        DO  n = 1, numprocs-1
    982 !
    983 !--       Receive the arrays in arbitrary order from the PEs.
    984           CALL MPI_RECV( f2_sub(nzb,mg_loc_ind(3,0)-1,mg_loc_ind(1,0)-1),     &
    985                          nwords, MPI_REAL, MPI_ANY_SOURCE, 1, comm2d, status, &
    986                          ierr )
    987           sender = status(MPI_SOURCE)
    988           f2(:,mg_loc_ind(3,sender)-1:mg_loc_ind(4,sender)+1, &
    989                mg_loc_ind(1,sender)-1:mg_loc_ind(2,sender)+1) = f2_sub
     996    f2_l = 0.0
     997
     998!
     999!-- Store the local subdomain array on the total array
     1000    js = mg_loc_ind(3,myid)
     1001    IF ( south_border_pe )  js = js - 1
     1002    jn = mg_loc_ind(4,myid)
     1003    IF ( north_border_pe )  jn = jn + 1
     1004    il = mg_loc_ind(1,myid)
     1005    IF ( left_border_pe )   il = il - 1
     1006    ir = mg_loc_ind(2,myid)
     1007    IF ( right_border_pe )  ir = ir + 1
     1008    DO  i = il, ir
     1009       DO  j = js, jn
     1010          DO  k = nzb, nzt_mg(grid_level)+1
     1011             f2_l(k,j,i) = f2_sub(k,j,i)
     1012          ENDDO
    9901013       ENDDO
    991 
    992     ELSE
    993 !
    994 !--    Send subdomain array to PE0
    995        CALL MPI_SEND( f2_sub(nzb,mg_loc_ind(3,myid)-1,mg_loc_ind(1,myid)-1), &
    996                       nwords, MPI_REAL, 0, 1, comm2d, ierr )
    997     ENDIF
     1014    ENDDO
     1015
     1016!
     1017!-- Find out the number of array elements of the total array
     1018    nwords = SIZE( f2 )
     1019
     1020!
     1021!-- Gather subdomain data from all PEs
     1022    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     1023    CALL MPI_ALLREDUCE( f2_l(nzb,nys_mg(grid_level)-1,nxl_mg(grid_level)-1), &
     1024                        f2(nzb,nys_mg(grid_level)-1,nxl_mg(grid_level)-1),   &
     1025                        nwords, MPI_REAL, MPI_SUM, comm2d, ierr )
     1026
     1027    DEALLOCATE( f2_l )
    9981028
    9991029    CALL cpu_log( log_point_s(34), 'mg_gather', 'stop' )
     
    10341064    CALL cpu_log( log_point_s(35), 'mg_scatter', 'start' )
    10351065
    1036     IF ( myid == 0 )  THEN
    1037 !
    1038 !--    Scatter the subdomain arrays to the other PEs by blocking
    1039 !--    communication
    1040        DO  n = 1, numprocs-1
    1041 
    1042           p2_sub = p2(:,mg_loc_ind(3,n)-1:mg_loc_ind(4,n)+1, &
    1043                         mg_loc_ind(1,n)-1:mg_loc_ind(2,n)+1)
    1044 
    1045           CALL MPI_SEND( p2_sub(nzb,mg_loc_ind(3,0)-1,mg_loc_ind(1,0)-1), &
    1046                          nwords, MPI_REAL, n, 1, comm2d, ierr )
    1047 
    1048        ENDDO
    1049 
    1050 !
    1051 !--    Store data from the total array to the local subdomain array
    1052        p2_sub = p2(:,mg_loc_ind(3,0)-1:mg_loc_ind(4,0)+1, &
    1053                      mg_loc_ind(1,0)-1:mg_loc_ind(2,0)+1)
    1054 
    1055     ELSE
    1056 !
    1057 !--    Receive subdomain array from PE0
    1058        CALL MPI_RECV( p2_sub(nzb,mg_loc_ind(3,myid)-1,mg_loc_ind(1,myid)-1), &
    1059                       nwords, MPI_REAL, 0, 1, comm2d, status, ierr )
    1060 
    1061     ENDIF
     1066    p2_sub = p2(:,mg_loc_ind(3,myid)-1:mg_loc_ind(4,myid)+1, &
     1067                  mg_loc_ind(1,myid)-1:mg_loc_ind(2,myid)+1)
    10621068
    10631069    CALL cpu_log( log_point_s(35), 'mg_scatter', 'stop' )
     
    10921098                nzt_mg_save
    10931099
    1094     LOGICAL ::  restore_boundary_lr_on_pe0, restore_boundary_ns_on_pe0
    1095 
    10961100    REAL, DIMENSION(nzb:nzt_mg(grid_level)+1,                                  &
    10971101                 nys_mg(grid_level)-1:nyn_mg(grid_level)+1,                    &
     
    11431147
    11441148       IF ( grid_level == mg_switch_to_pe0_level )  THEN
    1145 !          print*, 'myid=',myid, ' restrict and switch to PE0. level=', grid_level
    11461149!
    11471150!--       From this level on, calculations are done on PE0 only.
     
    11961199!
    11971200!--       In case of non-cyclic lateral boundary conditions, both in- and
    1198 !--       outflow conditions have to be used on PE0 after the switch, because
    1199 !--       it then contains the total domain. Due to the virtual processor
    1200 !--       grid, before the switch, PE0 can have in-/outflow at the left
    1201 !--       and south wall only (or on opposite walls in case of a 1d
    1202 !--       decomposition).
    1203           restore_boundary_lr_on_pe0 = .FALSE.
    1204           restore_boundary_ns_on_pe0 = .FALSE.
    1205           IF ( myid == 0 )  THEN
    1206              IF ( inflow_l  .AND.  .NOT. outflow_r )  THEN
    1207                 outflow_r = .TRUE.
    1208                 restore_boundary_lr_on_pe0 = .TRUE.
    1209              ENDIF
    1210              IF ( outflow_l  .AND.  .NOT. inflow_r )  THEN
    1211                 inflow_r  = .TRUE.
    1212                 restore_boundary_lr_on_pe0 = .TRUE.
    1213              ENDIF
    1214              IF ( inflow_s  .AND.  .NOT. outflow_n )  THEN
    1215                 outflow_n = .TRUE.
    1216                 restore_boundary_ns_on_pe0 = .TRUE.
    1217              ENDIF
    1218              IF ( outflow_s  .AND.  .NOT. inflow_n )  THEN
    1219                 inflow_n  = .TRUE.
    1220                 restore_boundary_ns_on_pe0 = .TRUE.
    1221              ENDIF
     1201!--       outflow conditions have to be used on all PEs after the switch,
     1202!--       because then they have the total domain.
     1203          IF ( bc_lr_dirrad )  THEN
     1204             inflow_l  = .TRUE.
     1205             inflow_r  = .FALSE.
     1206             outflow_l = .FALSE.
     1207             outflow_r = .TRUE.
     1208          ELSEIF ( bc_lr_raddir )  THEN
     1209             inflow_l  = .FALSE.
     1210             inflow_r  = .TRUE.
     1211             outflow_l = .TRUE.
     1212             outflow_r = .FALSE.
    12221213          ENDIF
    12231214
     1215          IF ( bc_ns_dirrad )  THEN
     1216             inflow_n  = .TRUE.
     1217             inflow_s  = .FALSE.
     1218             outflow_n = .FALSE.
     1219             outflow_s = .TRUE.
     1220          ELSEIF ( bc_ns_raddir )  THEN
     1221             inflow_n  = .FALSE.
     1222             inflow_s  = .TRUE.
     1223             outflow_n = .TRUE.
     1224             outflow_s = .FALSE.
     1225          ENDIF
     1226
    12241227          DEALLOCATE( f2_sub )
    12251228
     
    12291232
    12301233       ENDIF
     1234
    12311235       p2 = 0.0
    12321236
    12331237!
    12341238!--    Repeat the same procedure till the coarsest grid is reached
    1235        IF ( myid == 0  .OR.  grid_level > mg_switch_to_pe0_level )  THEN
    1236           CALL next_mg_level( f2, p2, p3, r )
    1237        ENDIF
     1239       CALL next_mg_level( f2, p2, p3, r )
    12381240
    12391241    ENDIF
     
    12421244!-- Now follows the prolongation
    12431245    IF ( grid_level >= 2 )  THEN
    1244 
    1245 !
    1246 !--    Grid level has to be incremented on the PEs where next_mg_level
    1247 !--    has not been called before (normally it is incremented at the end
    1248 !--    of next_mg_level)
    1249        IF ( myid /= 0  .AND.  grid_level == mg_switch_to_pe0_level )  THEN
    1250           grid_level = grid_level + 1
    1251           nxl = nxl_mg(grid_level)
    1252           nxr = nxr_mg(grid_level)
    1253           nys = nys_mg(grid_level)
    1254           nyn = nyn_mg(grid_level)
    1255           nzt = nzt_mg(grid_level)
    1256        ENDIF
    12571246
    12581247!
     
    12901279
    12911280!
    1292 !--       In case of non-cyclic lateral boundary conditions, restore the
    1293 !--       in-/outflow conditions on PE0
    1294           IF ( myid == 0 )  THEN
    1295              IF ( restore_boundary_lr_on_pe0 )  THEN
    1296                 IF ( inflow_l  )  outflow_r = .FALSE.
    1297                 IF ( outflow_l )  inflow_r  = .FALSE.
     1281!--       For non-cyclic lateral boundary conditions, restore the
     1282!--       in-/outflow conditions
     1283          inflow_l  = .FALSE.;  inflow_r  = .FALSE.
     1284          inflow_n  = .FALSE.;  inflow_s  = .FALSE.
     1285          outflow_l = .FALSE.;  outflow_r = .FALSE.
     1286          outflow_n = .FALSE.;  outflow_s = .FALSE.
     1287
     1288          IF ( pleft == MPI_PROC_NULL )  THEN
     1289             IF ( bc_lr_dirrad )  THEN
     1290                inflow_l  = .TRUE.
     1291             ELSEIF ( bc_lr_raddir )  THEN
     1292                outflow_l = .TRUE.
    12981293             ENDIF
    1299              IF ( restore_boundary_ns_on_pe0 )  THEN
    1300                 IF ( inflow_s  )  outflow_n = .FALSE.
    1301                 IF ( outflow_s )  inflow_n  = .FALSE.
     1294          ENDIF
     1295
     1296          IF ( pright == MPI_PROC_NULL )  THEN
     1297             IF ( bc_lr_dirrad )  THEN
     1298                outflow_r = .TRUE.
     1299             ELSEIF ( bc_lr_raddir )  THEN
     1300                inflow_r  = .TRUE.
     1301             ENDIF
     1302          ENDIF
     1303
     1304          IF ( psouth == MPI_PROC_NULL )  THEN
     1305             IF ( bc_ns_dirrad )  THEN
     1306                outflow_s = .TRUE.
     1307             ELSEIF ( bc_ns_raddir )  THEN
     1308                inflow_s  = .TRUE.
     1309             ENDIF
     1310          ENDIF
     1311
     1312          IF ( pnorth == MPI_PROC_NULL )  THEN
     1313             IF ( bc_ns_dirrad )  THEN
     1314                inflow_n  = .TRUE.
     1315             ELSEIF ( bc_ns_raddir )  THEN
     1316                outflow_n = .TRUE.
    13021317             ENDIF
    13031318          ENDIF
Note: See TracChangeset for help on using the changeset viewer.