Home

Context Navigation

← Previous Change
Next Change →

SOURCE

Timestamp:

Dec 10, 2010 8:08:13 AM (14 years ago)

Author:

raasch

Message:

New:
---

Optional barriers included in order to speed up collective operations
MPI_ALLTOALL and MPI_ALLREDUCE. This feature is controlled with new initial
parameter collective_wait. Default is .FALSE, but .TRUE. on SGI-type
systems. (advec_particles, advec_s_bc, buoyancy, check_for_restart,
cpu_statistics, data_output_2d, data_output_ptseries, flow_statistics,
global_min_max, inflow_turbulence, init_3d_model, init_particles, init_pegrid,
init_slope, parin, pres, poismg, set_particle_attributes, timestep,
read_var_list, user_statistics, write_compressed, write_var_list)

Adjustments for Kyushu Univ. (lcrte, ibmku). Concerning hybrid
(MPI/openMP) runs, the number of openMP threads per MPI tasks can now
be given as an argument to mrun-option -O. (mbuild, mrun, subjob)

Changed:

Initialization of the module command changed for SGI-ICE/lcsgi (mbuild, subjob)

Errors:

Location:

palm/trunk/SOURCE

Files:

: 26 edited

advec_particles.f90 (modified) (4 diffs)
advec_s_bc.f90 (modified) (4 diffs)
buoyancy.f90 (modified) (2 diffs)
check_for_restart.f90 (modified) (2 diffs)
cpu_statistics.f90 (modified) (3 diffs)
data_output_2d.f90 (modified) (5 diffs)
data_output_ptseries.f90 (modified) (3 diffs)
flow_statistics.f90 (modified) (4 diffs)
global_min_max.f90 (modified) (4 diffs)
inflow_turbulence.f90 (modified) (3 diffs)
init_3d_model.f90 (modified) (4 diffs)
init_particles.f90 (modified) (3 diffs)
init_pegrid.f90 (modified) (4 diffs)
init_slope.f90 (modified) (2 diffs)
modules.f90 (modified) (2 diffs)
parin.f90 (modified) (2 diffs)
poisfft.f90 (modified) (5 diffs)
poismg.f90 (modified) (2 diffs)
pres.f90 (modified) (5 diffs)
read_var_list.f90 (modified) (2 diffs)
set_particle_attributes.f90 (modified) (2 diffs)
timestep.f90 (modified) (4 diffs)
transpose.f90 (modified) (9 diffs)
user_statistics.f90 (modified) (2 diffs)
write_compressed.f90 (modified) (2 diffs)
write_var_list.f90 (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

palm/trunk/SOURCE/advec_particles.f90

-                      r559
+                      r622
 ! Current revisions:
 ! -----------------
+! optional barriers included in order to speed up collective operations
 ! TEST: PRINT statements on unit 9 (commented out)
+!
 …
+!
 !--       Compute total sum from local sums
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,1,0), sums(nzb,1), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,2,0), sums(nzb,2), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
 …
+!
 !--       Compute total sum from local sums
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,8,0), sums(nzb,8), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,30,0), sums(nzb,30), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,31,0), sums(nzb,31), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,32,0), sums(nzb,32), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
 …
 !--    and set the switch corespondingly
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( dt_3d_reached_l, dt_3d_reached, 1, MPI_LOGICAL, &
                            MPI_LAND, comm2d, ierr )

palm/trunk/SOURCE/advec_s_bc.f90

-                      r392
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
     ENDDO
 #if defined( __parallel )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( fmax_l, fmax, 2, MPI_REAL, MPI_MAX, comm2d, ierr )
 #else
 …
     ENDDO
 #if defined( __parallel )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( fmax_l, fmax, 2, MPI_REAL, MPI_MAX, comm2d, ierr )
 #else
 …
     ENDDO
 #if defined( __parallel )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( fmax_l, fmax, 2, MPI_REAL, MPI_MAX, comm2d, ierr )
 #else

palm/trunk/SOURCE/buoyancy.f90

-                      r516
+                      r622
 ! Currrent revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,pr,0), sums(nzb,pr), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )

palm/trunk/SOURCE/check_for_restart.f90

-                      r392
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 !-- Make a logical OR for all processes. Stop the model run if at least
 !-- one processor has reached the time limit.
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( terminate_run_l, terminate_run, 1, MPI_LOGICAL, &
                         MPI_LOR, comm2d, ierr )

palm/trunk/SOURCE/cpu_statistics.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! output of handling of collective operations
+!
 ! Former revisions:
 …
+!
+!--    Output handling of collective operations
+       IF ( collective_wait )  THEN
+          WRITE ( 18, 103 )
+       ELSE
+          WRITE ( 18, 104 )
+       ENDIF
+!
 !--    Empty lines in order to create a gap to the results of the model
 !--    continuation runs
        WRITE ( 18, 103 )
+       WRITE ( 18, 105 )
+!
 …
 FORMAT (A20,2X,F9.3,2X,F7.2,1X,I7,3(1X,F9.3))
+FORMAT (//)
+FORMAT (/'Barriers are set in front of collective operations')
+FORMAT (/'No barriers are set in front of collective operations')
+FORMAT (//)
  END SUBROUTINE cpu_statistics

palm/trunk/SOURCE/data_output_2d.f90

-                      r559
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
+!
 !--                   Now do the averaging over all PEs along y
+                      IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
                       CALL MPI_ALLREDUCE( local_2d_l(nxl-1,nzb),              &
                                           local_2d(nxl-1,nzb), ngp, MPI_REAL, &
 …
 !--                      Distribute data over all PEs along y
                          ngp = ( nxr-nxl+3 ) * ( nzt-nzb+2 )
+                         IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
                          CALL MPI_ALLREDUCE( local_2d_l(nxl-1,nzb),            &
                                              local_2d(nxl-1,nzb), ngp,         &
 …
+!
 !--                   Now do the averaging over all PEs along x
+                      IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
                       CALL MPI_ALLREDUCE( local_2d_l(nys-1,nzb),              &
                                           local_2d(nys-1,nzb), ngp, MPI_REAL, &
 …
 !--                      Distribute data over all PEs along x
                          ngp = ( nyn-nys+3 ) * ( nzt-nzb+2 )
+                         IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
                          CALL MPI_ALLREDUCE( local_2d_l(nys-1,nzb),            &
                                              local_2d(nys-1,nzb), ngp,         &

palm/trunk/SOURCE/data_output_ptseries.f90

-                      r392
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
     inum = number_of_particle_groups + 1
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( pts_value_l(0,1), pts_value(0,1), 14*inum, MPI_REAL, &
                         MPI_SUM, comm2d, ierr )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( pts_value_l(0,15), pts_value(0,15), inum, MPI_REAL, &
                         MPI_MAX, comm2d, ierr )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( pts_value_l(0,16), pts_value(0,16), inum, MPI_REAL, &
                         MPI_MIN, comm2d, ierr )
 …
     inum = number_of_particle_groups + 1
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( pts_value_l(0,17), pts_value(0,17), inum*10, MPI_REAL, &
                         MPI_SUM, comm2d, ierr )

palm/trunk/SOURCE/flow_statistics.f90

-                      r550
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
+!
 !--    Compute total sum from local sums
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( sums_l(nzb,1,0), sums(nzb,1), nzt+2-nzb, MPI_REAL, &
                            MPI_SUM, comm2d, ierr )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( sums_l(nzb,2,0), sums(nzb,2), nzt+2-nzb, MPI_REAL, &
                            MPI_SUM, comm2d, ierr )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( sums_l(nzb,4,0), sums(nzb,4), nzt+2-nzb, MPI_REAL, &
                            MPI_SUM, comm2d, ierr )
        IF ( ocean )  THEN
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,23,0), sums(nzb,23), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
        ENDIF
        IF ( humidity ) THEN
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,44,0), sums(nzb,44), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,41,0), sums(nzb,41), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
           IF ( cloud_physics ) THEN
+             IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
              CALL MPI_ALLREDUCE( sums_l(nzb,42,0), sums(nzb,42), nzt+2-nzb, &
                                  MPI_REAL, MPI_SUM, comm2d, ierr )
+             IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
              CALL MPI_ALLREDUCE( sums_l(nzb,43,0), sums(nzb,43), nzt+2-nzb, &
                                  MPI_REAL, MPI_SUM, comm2d, ierr )
 …
        IF ( passive_scalar )  THEN
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,41,0), sums(nzb,41), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
 …
+!
 !--    Compute total sum from local sums
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( sums_l(nzb,1,0), sums(nzb,1), ngp_sums, MPI_REAL, &
                            MPI_SUM, comm2d, ierr )

palm/trunk/SOURCE/global_min_max.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 #if defined( __parallel )
        fmin_l(2)  = myid
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( fmin_l, fmin, 1, MPI_2REAL, MPI_MINLOC, comm2d, ierr )
 …
 #if defined( __parallel )
        fmax_l(2)  = myid
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( fmax_l, fmax, 1, MPI_2REAL, MPI_MAXLOC, comm2d, ierr )
 …
 #if defined( __parallel )
        fmax_l(2)  = myid
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( fmax_l, fmax, 1, MPI_2REAL, MPI_MAXLOC, comm2d, &
                            ierr )

palm/trunk/SOURCE/inflow_turbulence.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
+!
 !-- Now, averaging over all PEs
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( avpr_l(nzb,1), avpr(nzb,1), ngp_pr, MPI_REAL, MPI_SUM, &
                         comm2d, ierr )
 …
 #if defined( __parallel )
+!       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
 !       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
 !                           MPI_SUM, comm1dy, ierr )

palm/trunk/SOURCE/init_3d_model.f90

-                      r561
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( volume_flow_initial_l(1), volume_flow_initial(1),&
 , MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( volume_flow_area_l(1), volume_flow_area(1),      &
 , MPI_REAL, MPI_SUM, comm2d, ierr )
 …
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( volume_flow_initial_l(1), volume_flow_initial(1),&
 , MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( volume_flow_area_l(1), volume_flow_area(1),      &
 , MPI_REAL, MPI_SUM, comm2d, ierr )
 …
     sr = statistic_regions + 1
 #if defined( __parallel )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( ngp_2dh_l(0), ngp_2dh(0), sr, MPI_INTEGER, MPI_SUM,   &
                         comm2d, ierr )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( ngp_2dh_outer_l(0,0), ngp_2dh_outer(0,0), (nz+2)*sr,  &
                         MPI_INTEGER, MPI_SUM, comm2d, ierr )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( ngp_2dh_s_inner_l(0,0), ngp_2dh_s_inner(0,0),         &
                         (nz+2)*sr, MPI_INTEGER, MPI_SUM, comm2d, ierr )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( ngp_3d_inner_l(0), ngp_3d_inner_tmp(0), sr, MPI_REAL, &
                         MPI_SUM, comm2d, ierr )

palm/trunk/SOURCE/init_particles.f90

-                      r392
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 !--    Calculate the number of particles and tails of the total domain
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( number_of_particles, total_number_of_particles, 1, &
                            MPI_INTEGER, MPI_SUM, comm2d, ierr )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( number_of_tails, total_number_of_tails, 1, &
                            MPI_INTEGER, MPI_SUM, comm2d, ierr )
 …
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( uniform_particles_l, uniform_particles, 1, &
                               MPI_LOGICAL, MPI_LAND, comm2d, ierr )

palm/trunk/SOURCE/init_pegrid.f90

-                      r482
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
 ! ATTENTION: nnz_x undefined problem still has to be solved!!!!!!!!
 ! TEST OUTPUT (TO BE REMOVED) logging mpi2 ierr values
 …
        CALL message( 'init_pegrid', 'PA0223', 1, 2, 0, 6, 0 )
     ENDIF
+!
+!-- For communication speedup, set barriers in front of collective
+!-- communications by default on SGI-type systems
+    IF ( host(3:5) == 'sgi' )  collective_wait = .TRUE.
+!
 …
        id_inflow_l = 0
     ENDIF
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( id_inflow_l, id_inflow, 1, MPI_INTEGER, MPI_SUM, &
                         comm1dx, ierr )
 …
 !-- Broadcast the id of the recycling plane
 !-- WARNING: needs to be adjusted in case of inflows other than from left side!
     IF ( ( recycling_width / dx ) >= nxl  .AND.  ( recycling_width / dx ) <= nxr ) &
     THEN
+    IF ( ( recycling_width / dx ) >= nxl  .AND. &
+         ( recycling_width / dx ) <= nxr )  THEN
        id_recycling_l = myidx
     ELSE
        id_recycling_l = 0
     ENDIF
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( id_recycling_l, id_recycling, 1, MPI_INTEGER, MPI_SUM, &
                         comm1dx, ierr )

palm/trunk/SOURCE/init_slope.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
              ENDDO
           ENDDO
       ENDDO
+       ENDDO
 #if defined( __parallel )
+      CALL MPI_ALLREDUCE( pt_init_local, pt_init, nzt+2-nzb, MPI_REAL, &
+                           MPI_SUM, comm2d, ierr )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
+       CALL MPI_ALLREDUCE( pt_init_local, pt_init, nzt+2-nzb, MPI_REAL, &
+                            MPI_SUM, comm2d, ierr )
 #else
       pt_init = pt_init_local
+       pt_init = pt_init_local
 #endif
       pt_init = pt_init / ngp_2dh(0)
       DEALLOCATE( pt_init_local )
+       pt_init = pt_init / ngp_2dh(0)
+       DEALLOCATE( pt_init_local )
    ENDIF
+    ENDIF
  END SUBROUTINE init_slope

palm/trunk/SOURCE/modules.f90

-                      r601
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! +collective_wait in pegrid
+!
 ! Former revisions:
 …
     INTEGER, DIMENSION(:), ALLOCATABLE ::  ngp_yz, type_xz
     LOGICAL ::  reorder = .TRUE.
+    LOGICAL ::  collective_wait = .FALSE., reorder = .TRUE.
     LOGICAL, DIMENSION(2) ::  cyclic = (/ .TRUE. , .TRUE. /), &
                               remain_dims

palm/trunk/SOURCE/parin.f90

-                      r601
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! +collective_wait in inipar
+!
 ! Former revisions:
 …
              canyon_width_x, canyon_width_y, canyon_wall_left, &
              canyon_wall_south, cfl_factor, cloud_droplets, cloud_physics, &
              conserve_volume_flow, conserve_volume_flow_mode, &
+             collective_wait, conserve_volume_flow, conserve_volume_flow_mode, &
              coupling_start_time, cthf, cut_spline_overshoot, &
              cycle_mg, damp_level_1d, dissipation_1d, dp_external, dp_level_b, &

palm/trunk/SOURCE/poisfft.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 !--    Transpose array
        CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLTOALL( work(nxl,1,0),      sendrecvcount_xy, MPI_REAL, &
                           f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
 …
 !--    Transpose array
        CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
                           work(nxl,1,0),     sendrecvcount_xy, MPI_REAL, &
 …
 !--    Transpose array
        CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLTOALL( work(nys,1,0),      sendrecvcount_xy, MPI_REAL, &
                           f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
 …
 !--    Transpose array
        CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
                           work(nys,1,0),     sendrecvcount_xy, MPI_REAL, &

palm/trunk/SOURCE/poismg.f90

-                      r392
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
           maxerror = SUM( r(nzb+1:nzt,nys:nyn,nxl:nxr)**2 )
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( maxerror, residual_norm, 1, MPI_REAL, MPI_SUM, &
                               comm2d, ierr)

palm/trunk/SOURCE/pres.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
                            MPI_SUM, comm1dy, ierr )
 …
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
                            MPI_SUM, comm1dx, ierr )
 …
           ENDDO
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, comm2d, &
                               ierr )
 …
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
                            MPI_SUM, comm2d, ierr )

palm/trunk/SOURCE/read_var_list.f90

-                      r601
+                      r622
 !------------------------------------------------------------------------------!
 ! Current revisions:
 ! -----------------_
+!
+! ------------------
+! +collective_wait
+!
 ! Former revisions:
 …
           CASE ( 'cloud_physics' )
              READ ( 13 )  cloud_physics
+          CASE ( 'collective_wait' )
+             READ ( 13 )  collective_wait
           CASE ( 'conserve_volume_flow' )
              READ ( 13 )  conserve_volume_flow

palm/trunk/SOURCE/set_particle_attributes.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( sums_l(nzb,4,0), sums(nzb,4), nzt+2-nzb, &
                            MPI_REAL, MPI_SUM, comm2d, ierr )

palm/trunk/SOURCE/timestep.f90

-                      r392
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
           uv_gtrans_l = uv_gtrans_l / REAL( (nxr-nxl+1)*(nyn-nys+1)*(nzt-nzb) )
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( uv_gtrans_l, uv_gtrans, 2, MPI_REAL, MPI_SUM, &
                               comm2d, ierr )
 …
 !$OMP END PARALLEL
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( dt_diff_l, dt_diff, 1, MPI_REAL, MPI_MIN, comm2d, &
                            ierr )
 …
 !--       Determine the global minumum
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( dt_plant_canopy_l, dt_plant_canopy, 1, MPI_REAL,  &
                               MPI_MIN, comm2d, ierr )

palm/trunk/SOURCE/transpose.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 !-- Transpose array
     CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
                        work(1),              sendrecvcount_xy, MPI_REAL, &
 …
 !--    Transpose array
        CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLTOALL( work(1),          sendrecvcount_zx, MPI_REAL, &
                           f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
 …
 !-- Transpose array
     CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLTOALL( work(1),              sendrecvcount_xy, MPI_REAL, &
                        f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
 …
 !-- Transpose array
     CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
                        work(1),          sendrecvcount_xy, MPI_REAL, &
 …
 !-- Transpose array
     CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
                        work(1),              sendrecvcount_yz, MPI_REAL, &
 …
 !-- Transpose array
     CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
                        work(1),          sendrecvcount_zx, MPI_REAL, &
 …
 !--    Transpose array
        CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLTOALL( work(1),              sendrecvcount_yz, MPI_REAL, &
                           f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
 …
 !-- Transpose array
     CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
                        work(1),          sendrecvcount_zyd, MPI_REAL, &

palm/trunk/SOURCE/user_statistics.f90

-                      r556
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 !--           assign ts_value(dots_num_palm+1:,sr) = ts_value_l directly.
 !#if defined( __parallel )
+!       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
 !       CALL MPI_ALLREDUCE( ts_value_l(dots_num_palm+1),                       &
 !                           ts_value(dots_num_palm+1,sr),                      &

palm/trunk/SOURCE/write_compressed.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 #if defined( __parallel )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( ifieldmax_l, ifieldmax, 1, MPI_INTEGER, MPI_MAX, &
                         comm2d, ierr )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( ifieldmin_l, ifieldmin, 1, MPI_INTEGER, MPI_MIN, &
                         comm2d, ierr )

palm/trunk/SOURCE/write_var_list.f90

-                      r601
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! +collective_wait
+!
 ! Former revisions:
 …
     WRITE ( 14 )  'cloud_physics                 '
     WRITE ( 14 )  cloud_physics
+    WRITE ( 14 )  'collective_wait               '
+    WRITE ( 14 )  collective_wait
     WRITE ( 14 )  'conserve_volume_flow          '
     WRITE ( 14 )  conserve_volume_flow

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 622 for palm/trunk/SOURCE

Legend:

Download in other formats: