Home

Context Navigation

← Previous Change
Next Change →

synthetic_turbulence_generator_mod.f90

Timestamp:

Mar 3, 2020 8:49:28 PM (5 years ago)

Author:

suehring

Message:

Synthetic turbulence: performance optimizations - random numbers only defined and computed locally, option to compute velocity seeds locally without need of global communication; paralell random number generator: new routine to initialize 1D random number arrays; virtual measurements: CPU-log points added

File:

: 1 edited

palm/trunk/SOURCE/synthetic_turbulence_generator_mod.f90 (modified) (56 diffs)

Legend:

: Unmodified
: Added
: Removed

palm/trunk/SOURCE/synthetic_turbulence_generator_mod.f90

-                      r4346
+                      r4438
 ! -----------------
 ! $Id$
+! Performance optimizations in velocity-seed calculation:
+!  - random number array is only defined and computed locally (except for
+!    normalization to zero mean and unit variance)
+!  - parallel random number generator is applied independent on the 2D random
+!    numbers in other routines
+!  - option to decide wheter velocity seeds are computed locally without any
+!    further communication or are computed by all processes along the
+!    communicator
+!
+! 4346 2019-12-18 11:55:56Z motisi
 ! Introduction of wall_flags_total_0, which currently sets bits based on static
 ! topography information used in wall_flags_static_0
 …
                time_since_reference_point,                                     &
                turbulent_inflow
+    USE cpulog,                                                                &
+        ONLY:  cpu_log,                                                        &
+               log_point_s
     USE grid_variables,                                                        &
         ONLY:  ddx,                                                            &
 …
                nxl,                                                            &
                nxlu,                                                           &
-               nxlg,                                                           &
                nxr,                                                            &
-               nxrg,                                                           &
                ny,                                                             &
                nys,                                                            &
                nysv,                                                           &
                nyn,                                                            &
-               nyng,                                                           &
-               nysg,                                                           &
                wall_flags_total_0
 …
                myidy,                                                          &
                pdims
     USE pmc_interface,                                                         &
         ONLY : rans_mode_parent
+   USE random_generator_parallel,                                              &
+        ONLY:  random_dummy,                                                   &
+    USE random_generator_parallel,                                             &
+        ONLY:  init_parallel_random_generator,                                 &
+               random_dummy,                                                   &
                random_number_parallel,                                         &
+               random_seed_parallel,                                           &
+               seq_random_array
+               random_seed_parallel
     USE transpose_indices,                                                     &
 …
               nzt_x
+    USE surface_mod,                                                           &
+        ONLY:  surf_def_h,                                                     &
+               surf_lsm_h,                                                     &
+               surf_usm_h
     IMPLICIT NONE
 …
     LOGICAL ::  parametrize_inflow_turbulence = .FALSE. !< flag indicating that inflow turbulence is either read from file (.FALSE.) or if it parametrized
     LOGICAL ::  use_syn_turb_gen = .FALSE.              !< switch to use synthetic turbulence generator
+    LOGICAL ::  compute_velocity_seeds_local = .TRUE.   !< switch to decide whether velocity seeds are computed locally or if computation
+                                                        !< is distributed over several processes
     INTEGER(iwp) ::  id_stg_left        !< left lateral boundary core id in case of turbulence generator
 …
     INTEGER(iwp) ::  id_stg_right       !< right lateral boundary core id in case of turbulence generator
     INTEGER(iwp) ::  id_stg_south       !< south lateral boundary core id in case of turbulence generator
+    INTEGER(iwp) ::  merg               !< maximum length scale (in gp)
+    INTEGER(iwp) ::  mergp              !< merg + nbgp
+    INTEGER(iwp) ::  mergp              !< maximum length scale (in gp)
     INTEGER(iwp) ::  nzb_x_stg          !< lower bound of z coordinate (required for transposing z on PEs along x)
     INTEGER(iwp) ::  nzt_x_stg          !< upper bound of z coordinate (required for transposing z on PEs along x)
 …
     INTEGER(iwp), DIMENSION(:), ALLOCATABLE ::  nwz            !< length scale of w in z direction (in gp)
+    INTEGER(isp), DIMENSION(:), ALLOCATABLE   ::  id_rand_xz     !< initial random IDs at xz inflow boundary
+    INTEGER(isp), DIMENSION(:), ALLOCATABLE   ::  id_rand_yz     !< initial random IDs at yz inflow boundary
+    INTEGER(isp), DIMENSION(:,:), ALLOCATABLE ::  seq_rand_xz    !< initial random seeds at xz inflow boundary
+    INTEGER(isp), DIMENSION(:,:), ALLOCATABLE ::  seq_rand_yz    !< initial random seeds at yz inflow boundary
     REAL(wp) ::  blend                    !< value to create gradually and smooth blending of Reynolds stress and length
                                           !< scales above the boundary layer
 …
     REAL(wp) ::  length_scale             !< length scale, default is 8 x minimum grid spacing
     REAL(wp) ::  dt_stg_adjust = 300.0_wp !< time interval for adjusting turbulence statistics
     REAL(wp) ::  dt_stg_call = 5.0_wp     !< time interval for calling synthetic turbulence generator
+    REAL(wp) ::  dt_stg_call = 0.0_wp     !< time interval for calling synthetic turbulence generator
     REAL(wp) ::  scale_l                  !< scaling parameter used for turbulence parametrization - Obukhov length
     REAL(wp) ::  scale_us                 !< scaling parameter used for turbulence parametrization - friction velocity
 …
  SUBROUTINE stg_check_parameters
-    IMPLICIT NONE
     IF ( .NOT. use_syn_turb_gen  .AND.  .NOT. rans_mode  .AND.                 &
           nesting_offline )  THEN
 …
  SUBROUTINE stg_header ( io )
-    IMPLICIT NONE
     INTEGER(iwp), INTENT(IN) ::  io   !< Unit of the output file
 …
 !------------------------------------------------------------------------------!
  SUBROUTINE stg_init
-    IMPLICIT NONE
     LOGICAL ::  file_stg_exist = .FALSE. !< flag indicating whether parameter file for Reynolds stress and length scales exist
 …
     CALL MPI_BARRIER( comm2d, ierr )
 #endif
+!
+!-- Create mpi-datatypes for exchange in case of non-local but distributed
+!-- computation of the velocity seeds. This option is useful in
+!-- case large turbulent length scales are presentm, where the computational
+!-- effort becomes large and need to be parallelized. For parametrized
+!-- turbulence the length scales are small and computing the velocity seeds
+!-- locally is faster (no overhead by communication).
+    IF ( .NOT. compute_velocity_seeds_local )  THEN
 #if defined( __parallel )
+!
+!-- Determine processor decomposition of z-axis along x- and y-direction
+    nnz = nz / pdims(1)
+    nzb_x_stg = 1 + myidx * INT( nnz )
+    nzt_x_stg = ( myidx + 1 ) * INT( nnz )
+    IF ( MOD( nz , pdims(1) ) /= 0  .AND.  myidx == id_stg_right )             &
+       nzt_x_stg = nzt_x_stg + myidx * ( nnz - INT( nnz ) )
+    IF ( nesting_offline   .OR.  ( child_domain  .AND.  rans_mode_parent       &
+                            .AND.  .NOT.  rans_mode ) )  THEN
+       nnz = nz / pdims(2)
+       nzb_y_stg = 1 + myidy * INT( nnz )
+       nzt_y_stg = ( myidy + 1 ) * INT( nnz )
+       IF ( MOD( nz , pdims(2) ) /= 0  .AND.  myidy == id_stg_north )          &
+          nzt_y_stg = nzt_y_stg + myidy * ( nnz - INT( nnz ) )
+    ENDIF
+!
+!-- Define MPI type used in stg_generate_seed_yz to gather vertical splitted
+!-- velocity seeds
+    CALL MPI_TYPE_SIZE( MPI_REAL, realsize, ierr )
+    extent = 1 * realsize
+!
+!-- Set-up MPI datatyp to involve all cores for turbulence generation at yz
+!-- layer
+!-- stg_type_yz: yz-slice with vertical bounds nzb:nzt+1
+    CALL MPI_TYPE_CREATE_SUBARRAY( 2, [nzt-nzb+2,nyng-nysg+1],                 &
+            [1,nyng-nysg+1], [0,0], MPI_ORDER_FORTRAN, MPI_REAL, newtype, ierr )
+    CALL MPI_TYPE_CREATE_RESIZED( newtype, tob, extent, stg_type_yz, ierr )
+    CALL MPI_TYPE_COMMIT( stg_type_yz, ierr )
+    CALL MPI_TYPE_FREE( newtype, ierr )
+    ! stg_type_yz_small: yz-slice with vertical bounds nzb_x_stg:nzt_x_stg+1
+    CALL MPI_TYPE_CREATE_SUBARRAY( 2, [nzt_x_stg-nzb_x_stg+2,nyng-nysg+1],     &
+            [1,nyng-nysg+1], [0,0], MPI_ORDER_FORTRAN, MPI_REAL, newtype, ierr )
+    CALL MPI_TYPE_CREATE_RESIZED( newtype, tob, extent, stg_type_yz_small, ierr )
+    CALL MPI_TYPE_COMMIT( stg_type_yz_small, ierr )
+    CALL MPI_TYPE_FREE( newtype, ierr )
+    ! receive count and displacement for MPI_GATHERV in stg_generate_seed_yz
+    ALLOCATE( recv_count_yz(pdims(1)), displs_yz(pdims(1)) )
+    recv_count_yz           = nzt_x_stg-nzb_x_stg + 1
+    recv_count_yz(pdims(1)) = recv_count_yz(pdims(1)) + 1
+    DO  j = 1, pdims(1)
+       displs_yz(j) = 0 + (nzt_x_stg-nzb_x_stg+1) * (j-1)
+    ENDDO
+!
+!-- Set-up MPI datatyp to involve all cores for turbulence generation at xz
+!-- layer
+!-- stg_type_xz: xz-slice with vertical bounds nzb:nzt+1
+    IF ( nesting_offline  .OR.  ( child_domain .AND.  rans_mode_parent         &
+                           .AND.  .NOT.  rans_mode ) )  THEN
+       CALL MPI_TYPE_CREATE_SUBARRAY( 2, [nzt-nzb+2,nxrg-nxlg+1],              &
+               [1,nxrg-nxlg+1], [0,0], MPI_ORDER_FORTRAN, MPI_REAL, newtype, ierr )
+       CALL MPI_TYPE_CREATE_RESIZED( newtype, tob, extent, stg_type_xz, ierr )
+       CALL MPI_TYPE_COMMIT( stg_type_xz, ierr )
+!
+!--    Determine processor decomposition of z-axis along x- and y-direction
+       nnz = nz / pdims(1)
+       nzb_x_stg = 1 + myidx * INT( nnz )
+       nzt_x_stg = ( myidx + 1 ) * INT( nnz )
+       IF ( MOD( nz , pdims(1) ) /= 0  .AND.  myidx == id_stg_right )          &
+          nzt_x_stg = nzt_x_stg + myidx * ( nnz - INT( nnz ) )
+       IF ( nesting_offline   .OR.  ( child_domain  .AND.  rans_mode_parent    &
+                               .AND.  .NOT.  rans_mode ) )  THEN
+          nnz = nz / pdims(2)
+          nzb_y_stg = 1 + myidy * INT( nnz )
+          nzt_y_stg = ( myidy + 1 ) * INT( nnz )
+          IF ( MOD( nz , pdims(2) ) /= 0  .AND.  myidy == id_stg_north )       &
+             nzt_y_stg = nzt_y_stg + myidy * ( nnz - INT( nnz ) )
+       ENDIF
+!
+!--    Define MPI type used in stg_generate_seed_yz to gather vertical splitted
+!--    velocity seeds
+       CALL MPI_TYPE_SIZE( MPI_REAL, realsize, ierr )
+       extent = 1 * realsize
+!
+!--    Set-up MPI datatyp to involve all cores for turbulence generation at yz
+!--    layer
+!--    stg_type_yz: yz-slice with vertical bounds nzb:nzt+1
+       CALL MPI_TYPE_CREATE_SUBARRAY( 2, [nzt-nzb+2,nyn-nys+1],                &
+               [1,nyn-nys+1], [0,0], MPI_ORDER_FORTRAN, MPI_REAL, newtype, ierr )
+       CALL MPI_TYPE_CREATE_RESIZED( newtype, tob, extent, stg_type_yz, ierr )
+       CALL MPI_TYPE_COMMIT( stg_type_yz, ierr )
        CALL MPI_TYPE_FREE( newtype, ierr )
        ! stg_type_yz_small: xz-slice with vertical bounds nzb_x_stg:nzt_x_stg+1
        CALL MPI_TYPE_CREATE_SUBARRAY( 2, [nzt_y_stg-nzb_y_stg+2,nxrg-nxlg+1],  &
                [1,nxrg-nxlg+1], [0,0], MPI_ORDER_FORTRAN, MPI_REAL, newtype, ierr )
        CALL MPI_TYPE_CREATE_RESIZED( newtype, tob, extent, stg_type_xz_small, ierr )
        CALL MPI_TYPE_COMMIT( stg_type_xz_small, ierr )
+       ! stg_type_yz_small: yz-slice with vertical bounds nzb_x_stg:nzt_x_stg+1
+       CALL MPI_TYPE_CREATE_SUBARRAY( 2, [nzt_x_stg-nzb_x_stg+2,nyn-nys+1],    &
+               [1,nyn-nys+1], [0,0], MPI_ORDER_FORTRAN, MPI_REAL, newtype, ierr )
+       CALL MPI_TYPE_CREATE_RESIZED( newtype, tob, extent, stg_type_yz_small, ierr )
+       CALL MPI_TYPE_COMMIT( stg_type_yz_small, ierr )
        CALL MPI_TYPE_FREE( newtype, ierr )
        ! receive count and displacement for MPI_GATHERV in stg_generate_seed_yz
        ALLOCATE( recv_count_xz(pdims(2)), displs_xz(pdims(2)) )
        recv_count_xz           = nzt_y_stg-nzb_y_stg + 1
        recv_count_xz(pdims(2)) = recv_count_xz(pdims(2)) + 1
        DO  j = 1, pdims(2)
           displs_xz(j) = 0 + (nzt_y_stg-nzb_y_stg+1) * (j-1)
+       ALLOCATE( recv_count_yz(pdims(1)), displs_yz(pdims(1)) )
+       recv_count_yz           = nzt_x_stg-nzb_x_stg + 1
+       recv_count_yz(pdims(1)) = recv_count_yz(pdims(1)) + 1
+       DO  j = 1, pdims(1)
+          displs_yz(j) = 0 + (nzt_x_stg-nzb_x_stg+1) * (j-1)
        ENDDO
+!
+!--    Set-up MPI datatyp to involve all cores for turbulence generation at xz
+!--    layer
+!--    stg_type_xz: xz-slice with vertical bounds nzb:nzt+1
+       IF ( nesting_offline  .OR.  ( child_domain .AND.  rans_mode_parent      &
+                              .AND.  .NOT.  rans_mode ) )  THEN
+          CALL MPI_TYPE_CREATE_SUBARRAY( 2, [nzt-nzb+2,nxr-nxl+1],             &
+                  [1,nxr-nxl+1], [0,0], MPI_ORDER_FORTRAN, MPI_REAL, newtype, ierr )
+          CALL MPI_TYPE_CREATE_RESIZED( newtype, tob, extent, stg_type_xz, ierr )
+          CALL MPI_TYPE_COMMIT( stg_type_xz, ierr )
+          CALL MPI_TYPE_FREE( newtype, ierr )
+          ! stg_type_yz_small: xz-slice with vertical bounds nzb_x_stg:nzt_x_stg+1
+          CALL MPI_TYPE_CREATE_SUBARRAY( 2, [nzt_y_stg-nzb_y_stg+2,nxr-nxl+1], &
+                  [1,nxr-nxl+1], [0,0], MPI_ORDER_FORTRAN, MPI_REAL, newtype, ierr )
+          CALL MPI_TYPE_CREATE_RESIZED( newtype, tob, extent, stg_type_xz_small, ierr )
+          CALL MPI_TYPE_COMMIT( stg_type_xz_small, ierr )
+          CALL MPI_TYPE_FREE( newtype, ierr )
+          ! receive count and displacement for MPI_GATHERV in stg_generate_seed_yz
+          ALLOCATE( recv_count_xz(pdims(2)), displs_xz(pdims(2)) )
+          recv_count_xz           = nzt_y_stg-nzb_y_stg + 1
+          recv_count_xz(pdims(2)) = recv_count_xz(pdims(2)) + 1
+          DO  j = 1, pdims(2)
+             displs_xz(j) = 0 + (nzt_y_stg-nzb_y_stg+1) * (j-1)
+          ENDDO
+       ENDIF
     ENDIF
 …
                tu(nzb:nzt+1),  tv(nzb:nzt+1),  tw(nzb:nzt+1)   )
     ALLOCATE ( dist_xz(nzb:nzt+1,nxlg:nxrg,3) )
     ALLOCATE ( dist_yz(nzb:nzt+1,nysg:nyng,3) )
+    ALLOCATE ( dist_xz(nzb:nzt+1,nxl:nxr,3) )
+    ALLOCATE ( dist_yz(nzb:nzt+1,nys:nyn,3) )
     dist_xz = 0.0_wp
     dist_yz = 0.0_wp
 …
 !--    Define length scale for the imposed turbulence, which is defined as
 !--    8 times the minimum grid spacing
        length_scale = 8.0_wp * MIN( dx, dy, MINVAL( dzw ) )
+       length_scale = 30.0_wp * MIN( dx, dy, MINVAL( dzw ) ) !8.0_wp * MIN( dx, dy, MINVAL( dzw ) )
+!
 !--    Define constant to gradually decrease length scales and Reynolds stress
 …
+!
 !-- Define the size of the filter functions and allocate them.
     merg = 0
+    mergp = 0
     ! arrays must be large enough to cover the largest length scale
 …
                 ABS(nvx(k)), ABS(nvy(k)), ABS(nvz(k)), &
                 ABS(nwx(k)), ABS(nwy(k)), ABS(nwz(k))  )
        IF ( j > merg )  merg = j
+       IF ( j > mergp )  mergp = j
     ENDDO
+    merg  = 2 * merg
     mergp = merg + nbgp
     ALLOCATE ( bux(-merg:merg,nzb:nzt+1),                                      &
                buy(-merg:merg,nzb:nzt+1),                                      &
                buz(-merg:merg,nzb:nzt+1),                                      &
                bvx(-merg:merg,nzb:nzt+1),                                      &
                bvy(-merg:merg,nzb:nzt+1),                                      &
                bvz(-merg:merg,nzb:nzt+1),                                      &
                bwx(-merg:merg,nzb:nzt+1),                                      &
                bwy(-merg:merg,nzb:nzt+1),                                      &
                bwz(-merg:merg,nzb:nzt+1)  )
+!     mergp  = 2 * mergp
+!     mergp = mergp
+    ALLOCATE ( bux(-mergp:mergp,nzb:nzt+1),                                      &
+               buy(-mergp:mergp,nzb:nzt+1),                                      &
+               buz(-mergp:mergp,nzb:nzt+1),                                      &
+               bvx(-mergp:mergp,nzb:nzt+1),                                      &
+               bvy(-mergp:mergp,nzb:nzt+1),                                      &
+               bvz(-mergp:mergp,nzb:nzt+1),                                      &
+               bwx(-mergp:mergp,nzb:nzt+1),                                      &
+               bwy(-mergp:mergp,nzb:nzt+1),                                      &
+               bwz(-mergp:mergp,nzb:nzt+1)  )
+!
 !-- Allocate velocity seeds for turbulence at xz-layer
     ALLOCATE ( fu_xz( nzb:nzt+1,nxlg:nxrg), fuo_xz(nzb:nzt+1,nxlg:nxrg),       &
                fv_xz( nzb:nzt+1,nxlg:nxrg), fvo_xz(nzb:nzt+1,nxlg:nxrg),       &
                fw_xz( nzb:nzt+1,nxlg:nxrg), fwo_xz(nzb:nzt+1,nxlg:nxrg)  )
+    ALLOCATE ( fu_xz( nzb:nzt+1,nxl:nxr), fuo_xz(nzb:nzt+1,nxl:nxr),       &
+               fv_xz( nzb:nzt+1,nxl:nxr), fvo_xz(nzb:nzt+1,nxl:nxr),       &
+               fw_xz( nzb:nzt+1,nxl:nxr), fwo_xz(nzb:nzt+1,nxl:nxr)  )
+!
 !-- Allocate velocity seeds for turbulence at yz-layer
     ALLOCATE ( fu_yz( nzb:nzt+1,nysg:nyng), fuo_yz(nzb:nzt+1,nysg:nyng),       &
                fv_yz( nzb:nzt+1,nysg:nyng), fvo_yz(nzb:nzt+1,nysg:nyng),       &
                fw_yz( nzb:nzt+1,nysg:nyng), fwo_yz(nzb:nzt+1,nysg:nyng)  )
+    ALLOCATE ( fu_yz( nzb:nzt+1,nys:nyn), fuo_yz(nzb:nzt+1,nys:nyn),       &
+               fv_yz( nzb:nzt+1,nys:nyn), fvo_yz(nzb:nzt+1,nys:nyn),       &
+               fw_yz( nzb:nzt+1,nys:nyn), fwo_yz(nzb:nzt+1,nys:nyn)  )
     fu_xz  = 0.0_wp
 …
           IF ( myidx == id_stg_right )  i = nxr+1
           DO  j = nysg, nyng
+          DO  j = nys, nyn
              DO  k = nzb, nzt+1
                 IF  ( a11(k) > 10E-8_wp )  THEN
 …
           IF ( myidy == id_stg_north )  j = nyn+1
           DO  i = nxlg, nxrg
+          DO  i = nxl, nxr
              DO  k = nzb, nzt+1
+!
 …
 #endif
     ENDIF
+!
+!-- Initialize random number generator at xz- and yz-layers. Random numbers
+!-- are initialized at each core. In case there is only inflow from the left,
+!-- it is sufficient to generate only random numbers for the yz-layer, else
+!-- random numbers for the xz-layer are also required.
+    ALLOCATE ( id_rand_yz(-mergp+nys:nyn+mergp) )
+    ALLOCATE ( seq_rand_yz(5,-mergp+nys:nyn+mergp) )
+    id_rand_yz  = 0
+    seq_rand_yz = 0
+    CALL init_parallel_random_generator( ny, -mergp+nys, nyn+mergp,            &
+                                         id_rand_yz, seq_rand_yz )
+    IF ( nesting_offline  .OR.  ( child_domain .AND.  rans_mode_parent         &
+                           .AND.  .NOT.  rans_mode ) )  THEN
+       ALLOCATE ( id_rand_xz(-mergp+nxl:nxr+mergp) )
+       ALLOCATE ( seq_rand_xz(5,-mergp+nxl:nxr+mergp) )
+       id_rand_xz  = 0
+       seq_rand_xz = 0
+       CALL init_parallel_random_generator( nx, -mergp+nxl, nxr+mergp,         &
+                                            id_rand_xz, seq_rand_xz )
+    ENDIF
 …
  SUBROUTINE stg_filter_func( nxx, bxx )
-    IMPLICIT NONE
     INTEGER(iwp) :: k         !< loop index
     INTEGER(iwp) :: n_k       !< length scale nXX in height k
-    INTEGER(iwp) :: n_k2      !< n_k * 2
     INTEGER(iwp) :: nf        !< index for length scales
 …
     REAL(wp) :: qsi = 1.0_wp  !< minimization factor
     INTEGER(iwp), DIMENSION(:) :: nxx(nzb:nzt+1)           !< length scale (in gp)
     REAL(wp), DIMENSION(:,:) :: bxx(-merg:merg,nzb:nzt+1)  !< filter function
+    INTEGER(iwp), DIMENSION(nzb:nzt+1) ::  nxx         !< length scale (in gp)
+    REAL(wp), DIMENSION(-mergp:mergp,nzb:nzt+1) ::  bxx  !< filter function
 …
        n_k    = nxx(k)
        IF ( n_k /= 0 )  THEN
-          n_k2 = n_k * 2
+!
 !--       ( Eq.10 )^2
           DO  nf = -n_k2, n_k2
+          DO  nf = -n_k, n_k
              bdenom = bdenom + EXP( -qsi * pi * ABS(nf) / n_k )**2
           ENDDO
 …
 !--       ( Eq.9 )
           bdenom = SQRT( bdenom )
           DO  nf = -n_k2, n_k2
+          DO  nf = -n_k, n_k
              bxx(nf,k) = EXP( -qsi * pi * ABS(nf) / n_k ) / bdenom
           ENDDO
 …
  SUBROUTINE stg_parin
-    IMPLICIT NONE
     CHARACTER (LEN=80) ::  line   !< dummy string that contains the current line of the parameter file
+    NAMELIST /stg_par/  dt_stg_adjust, dt_stg_call, use_syn_turb_gen
+    NAMELIST /stg_par/  dt_stg_adjust,                                         &
+                        dt_stg_call,                                           &
+                        use_syn_turb_gen,                                      &
+                        compute_velocity_seeds_local
     line = ' '
+!
 !-- Try to find stg package
 …
  SUBROUTINE stg_rrd_global( found )
-    IMPLICIT NONE
     LOGICAL, INTENT(OUT)  ::  found !< flag indicating if variable was found
     found = .TRUE.
 …
  SUBROUTINE stg_wrd_global
-    IMPLICIT NONE
     CALL wrd_write_string( 'time_stg_adjust' )
     WRITE ( 14 )  time_stg_adjust
 …
 !------------------------------------------------------------------------------!
  SUBROUTINE stg_main
-    IMPLICIT NONE
     INTEGER(iwp) :: i           !< grid index in x-direction
 …
                   ( child_domain .AND.  rans_mode_parent                       &
                                  .AND.  .NOT.  rans_mode ) ) )  THEN
           CALL stg_generate_seed_yz( nuy, nuz, buy, buz, fu_yz, id_stg_left )
           CALL stg_generate_seed_yz( nvy, nvz, bvy, bvz, fv_yz, id_stg_left )
 …
        IF ( stg_call )  THEN
           DO  j = nysg, nyng
+          DO  j = nys, nyn
              DO  k = nzb, nzt + 1
+!
 …
           IF ( myidx == id_stg_left  )  i = nxl
           IF ( myidx == id_stg_right )  i = nxr+1
           DO  j = nysg, nyng
+          DO  j = nys, nyn
              DO  k = nzb+1, nzt + 1
+!
 …
           IF ( myidx == id_stg_left  )  i = nxl-1
           IF ( myidx == id_stg_right )  i = nxr+1
           DO  j = nysg, nyng
+          DO  j = nys, nyn
              DO  k = nzb+1, nzt + 1
+!
 …
           IF ( myidx == id_stg_right )  i = nxr+1
           dist_yz(:,:,1) = ( dist_yz(:,:,1) - mc_factor(1) )                   &
+          dist_yz(:,nys:nyn,1) = ( dist_yz(:,nys:nyn,1) - mc_factor(1) )                   &
                         * MERGE( 1.0_wp, 0.0_wp,                               &
                           BTEST( wall_flags_total_0(:,:,i), 1 ) )
+                          BTEST( wall_flags_total_0(:,nys:nyn,i), 1 ) )
 …
           IF ( myidx == id_stg_right )  i = nxr+1
           dist_yz(:,:,2) = ( dist_yz(:,:,2) - mc_factor(2) )                   &
+          dist_yz(:,nys:nyn,2) = ( dist_yz(:,nys:nyn,2) - mc_factor(2) )                   &
                         * MERGE( 1.0_wp, 0.0_wp,                               &
                           BTEST( wall_flags_total_0(:,:,i), 2 ) )
+                          BTEST( wall_flags_total_0(:,nys:nyn,i), 2 ) )
           dist_yz(:,:,3) = ( dist_yz(:,:,3) - mc_factor(3) )                   &
+          dist_yz(:,nys:nyn,3) = ( dist_yz(:,nys:nyn,3) - mc_factor(3) )                   &
                         * MERGE( 1.0_wp, 0.0_wp,                               &
                           BTEST( wall_flags_total_0(:,:,i), 3 ) )
+                          BTEST( wall_flags_total_0(:,nys:nyn,i), 3 ) )
+!
 !--       Add disturbances
 …
+!
 !--             Add disturbance at the inflow
                 DO  j = nysg, nyng
+                DO  j = nys, nyn
                    DO  k = nzb, nzt+1
                       u(k,j,-nbgp+1:0) = ( mean_inflow_profiles(k,1) +         &
 …
 !--    imposed
        IF ( stg_call )  THEN
           DO  i = nxlg, nxrg
+          DO  i = nxl, nxr
              DO  k = nzb, nzt + 1
+!
 …
           IF ( myidy == id_stg_south  ) j = nys
           IF ( myidy == id_stg_north )  j = nyn+1
           DO  i = nxlg, nxrg
+          DO  i = nxl, nxr
              DO  k = nzb+1, nzt + 1
+!
 …
           IF ( myidy == id_stg_south  ) j = nys-1
           IF ( myidy == id_stg_north )  j = nyn+1
           DO  i = nxlg, nxrg
+          DO  i = nxl, nxr
              DO  k = nzb+1, nzt + 1
+!
 …
           IF ( myidy == id_stg_north )  j = nyn+1
           dist_xz(:,:,2)   = ( dist_xz(:,:,2) - mc_factor(2) )                 &
+          dist_xz(:,nxl:nxr,2)   = ( dist_xz(:,nxl:nxr,2) - mc_factor(2) )                 &
                            * MERGE( 1.0_wp, 0.0_wp,                            &
                              BTEST( wall_flags_total_0(:,j,:), 2 ) )
+                             BTEST( wall_flags_total_0(:,j,nxl:nxr), 2 ) )
 …
           IF ( myidy == id_stg_north )  j = nyn+1
           dist_xz(:,:,1)   = ( dist_xz(:,:,1) - mc_factor(1) )                 &
+          dist_xz(:,nxl:nxr,1)   = ( dist_xz(:,nxl:nxr,1) - mc_factor(1) )                 &
                            * MERGE( 1.0_wp, 0.0_wp,                            &
                              BTEST( wall_flags_total_0(:,j,:), 1 ) )
+                             BTEST( wall_flags_total_0(:,j,nxl:nxr), 1 ) )
           dist_xz(:,:,3)   = ( dist_xz(:,:,3) - mc_factor(3) )                 &
+          dist_xz(:,nxl:nxr,3)   = ( dist_xz(:,nxl:nxr,3) - mc_factor(3) )                 &
                            * MERGE( 1.0_wp, 0.0_wp,                            &
                              BTEST( wall_flags_total_0(:,j,:), 3 ) )
+                             BTEST( wall_flags_total_0(:,j,nxl:nxr), 3 ) )
+!
 !--       Add disturbances
 …
     ENDIF
+!
+!-- Exchange ghost points.
+    CALL exchange_horiz( u, nbgp )
+    CALL exchange_horiz( v, nbgp )
+    CALL exchange_horiz( w, nbgp )
+!
 !-- Finally, set time counter for calling STG to zero
     IF ( stg_call )  time_stg_call = 0.0_wp
 …
 !------------------------------------------------------------------------------!
  SUBROUTINE stg_generate_seed_yz( n_y, n_z, b_y, b_z, f_n, id_left, id_right )
+ USE pegrid
+    IMPLICIT NONE
+    INTEGER(iwp)           :: i           !< grid index x-direction
     INTEGER(iwp)           :: id_left     !< core ids at respective boundaries
     INTEGER(iwp), OPTIONAL :: id_right    !< core ids at respective boundaries
 …
     INTEGER(iwp), DIMENSION(nzb:nzt+1) :: n_y    !< length scale in y-direction
     INTEGER(iwp), DIMENSION(nzb:nzt+1) :: n_z    !< length scale in z-direction
-    INTEGER(iwp), DIMENSION(nzb:nzt+1) :: n_y2   !< n_y*2
-    INTEGER(iwp), DIMENSION(nzb:nzt+1) :: n_z2   !< n_z*2
     REAL(wp) :: nyz_inv         !< inverse of number of grid points in yz-slice
 …
     REAL(wp) :: rand_sigma_inv  !< inverse of stdev of random number
     REAL(wp), DIMENSION(-merg:merg,nzb:nzt+1)    :: b_y     !< filter function in y-direction
     REAL(wp), DIMENSION(-merg:merg,nzb:nzt+1)    :: b_z     !< filter function in z-direction
     REAL(wp), DIMENSION(nzb_x_stg:nzt_x_stg+1,nysg:nyng) :: f_n_l   !<  local velocity seed
     REAL(wp), DIMENSION(nzb:nzt+1,nysg:nyng)             :: f_n     !<  velocity seed
+    REAL(wp), DIMENSION(-mergp:mergp,nzb:nzt+1)    :: b_y     !< filter function in y-direction
+    REAL(wp), DIMENSION(-mergp:mergp,nzb:nzt+1)    :: b_z     !< filter function in z-direction
+    REAL(wp), DIMENSION(nzb_x_stg:nzt_x_stg+1,nys:nyn) :: f_n_l   !<  local velocity seed
+    REAL(wp), DIMENSION(nzb:nzt+1,nys:nyn)             :: f_n     !<  velocity seed
     REAL(wp), DIMENSION(:,:), ALLOCATABLE ::  rand_it   !< global array of random numbers
-    REAL(wp), DIMENSION(:,:), ALLOCATABLE ::  rand_it_l !< local array of random numbers
+!
 !-- Generate random numbers using the parallel random generator.
 …
 !-- left boundary, while the right boundary uses the same random numbers
 !-- and thus also computes the same correlation matrix.
     ALLOCATE( rand_it(nzb-mergp:nzt+1+mergp,-mergp:ny+mergp) )
     rand_it   = 0.0_wp
+    ALLOCATE( rand_it(nzb-mergp:nzt+1+mergp,-mergp+nys:nyn+mergp) )
+    rand_it = 0.0_wp
     rand_av        = 0.0_wp
     rand_sigma_inv = 0.0_wp
+    nyz_inv        = 1.0_wp / REAL( ( nzt+1 - nzb+1 ) * ( ny+1 ), KIND=wp )
+!
+!-- Compute and normalize random numbers only on left-boundary ranks.
+    IF ( myidx == id_stg_left )  THEN
+!
+!--    Allocate array for local set of random numbers
+       ALLOCATE( rand_it_l(nzb:nzt+1,nys:nyn) )
+       rand_it_l = 0.0_wp
+       i = nxl
+       DO  j = nys, nyn
+!
+!--       Put the random seeds at grid point j,i
+          CALL random_seed_parallel( put=seq_random_array(:, j, i) )
+          DO  k = nzb, nzt+1
+             CALL random_number_parallel( random_dummy )
+             rand_it_l(k,j) = random_dummy
+             rand_av        = rand_av + rand_it_l(k,j)
+          ENDDO
+!
+!--       Get the new random seeds from last call at grid point j,i
+          CALL random_seed_parallel( get=seq_random_array(:, j, i) )
+    nyz_inv        = 1.0_wp / REAL( ( nzt + 1 + mergp - ( nzb - mergp ) + 1 )  &
+                                  * ( ny + mergp - ( 0 - mergp ) + 1 ),        &
+                                    KIND=wp )
+!
+!-- Compute and normalize random numbers.
+    DO  j = nys - mergp, nyn + mergp
+!
+!--    Put the random seeds at grid point j
+       CALL random_seed_parallel( put=seq_rand_yz(:,j) )
+       DO  k = nzb - mergp, nzt + 1 + mergp
+          CALL random_number_parallel( random_dummy )
+          rand_it(k,j) = random_dummy
        ENDDO
+!
+!--    Get the new random seeds from last call at grid point j
+       CALL random_seed_parallel( get=seq_rand_yz(:,j) )
+    ENDDO
+!
+!-- For normalization to zero mean, sum-up the global random numers.
+!-- To normalize the global set of random numbers,
+!-- the inner ghost layers mergp must not be summed-up, else
+!-- the random numbers on the ghost layers will be stronger weighted as they
+!-- also occur on the inner subdomains.
+    DO  j = MERGE( nys, nys - mergp, nys /= 0 ),                              &
+            MERGE( nyn, nyn + mergp, nyn /= ny )
+       DO  k = nzb - mergp, nzt + 1 + mergp
+          rand_av = rand_av + rand_it(k,j)
+       ENDDO
+    ENDDO
 #if defined( __parallel )
+!
 !--    Sum-up the local averages of the random numbers
        CALL MPI_ALLREDUCE( MPI_IN_PLACE, rand_av, 1, MPI_REAL,                 &
                            MPI_SUM, comm1dy, ierr )
+!-- Sum-up the local averages of the random numbers
+    CALL MPI_ALLREDUCE( MPI_IN_PLACE, rand_av, 1, MPI_REAL,                    &
+                        MPI_SUM, comm1dy, ierr )
 #endif
+       rand_av = rand_av * nyz_inv
+!
+!--    Obtain zero mean
+       rand_it_l = rand_it_l - rand_av
+!
+!--    Now, compute the variance
+    rand_av = rand_av * nyz_inv
+!
+!-- Obtain zero mean
+    rand_it= rand_it - rand_av
+!
+!-- Now, compute the variance
+    DO  j = MERGE( nys, nys - mergp, nys /= 0 ),                               &
+            MERGE( nyn, nyn + mergp, nyn /= ny )
+       DO  k = nzb - mergp, nzt + 1 + mergp
+          rand_sigma_inv = rand_sigma_inv + rand_it(k,j)**2
+       ENDDO
+    ENDDO
+#if defined( __parallel )
+!
+!-- Sum-up the local quadratic averages of the random numbers
+    CALL MPI_ALLREDUCE( MPI_IN_PLACE, rand_sigma_inv, 1, MPI_REAL,          &
+                        MPI_SUM, comm1dy, ierr )
+#endif
+!
+!-- Compute standard deviation
+    IF ( rand_sigma_inv /= 0.0_wp )  THEN
+       rand_sigma_inv = 1.0_wp / SQRT( rand_sigma_inv * nyz_inv )
+    ELSE
+       rand_sigma_inv = 1.0_wp
+    ENDIF
+!
+!-- Normalize with standard deviation to obtain unit variance
+    rand_it = rand_it * rand_sigma_inv
+    CALL cpu_log( log_point_s(31), 'STG f_n factors', 'start' )
+!
+!-- Generate velocity seed following Eq.6 of Xie and Castro (2008). There
+!-- are two options. In the first one, the computation of the seeds is
+!-- distributed to all processes along the communicator comm1dy and
+!-- gathered on the leftmost and, if necessary, on the rightmost process.
+!-- For huge length scales the computational effort can become quite huge
+!-- (it scales with the turbulent length scales), so that gain by parallelization
+!-- exceeds the costs by the subsequent communication.
+!-- In the second option, which performs better when the turbulent length scales
+!-- are parametrized and thus the loops are smaller, the seeds are computed
+!-- locally and no communication is necessary.
+    IF ( compute_velocity_seeds_local )  THEN
+       f_n  = 0.0_wp
        DO  j = nys, nyn
           DO  k = nzb, nzt+1
+             rand_sigma_inv = rand_sigma_inv + rand_it_l(k,j)**2
+          ENDDO
+       ENDDO
+#if defined( __parallel )
+!
+!--    Sum-up the local quadratic averages of the random numbers
+       CALL MPI_ALLREDUCE( MPI_IN_PLACE, rand_sigma_inv, 1, MPI_REAL,          &
+                           MPI_SUM, comm1dy, ierr )
+#endif
+!
+!--    Compute standard deviation
+       IF ( rand_sigma_inv /= 0.0_wp )  THEN
+          rand_sigma_inv = 1.0_wp / SQRT( rand_sigma_inv * nyz_inv )
+       ELSE
+          rand_sigma_inv = 1.0_wp
+       ENDIF
+!
+!--    Normalize with standard deviation to obtain unit variance
+       rand_it_l = rand_it_l * rand_sigma_inv
+!
+!--    Copy local random numbers on the global array
+       rand_it(nzb:nzt+1,nys:nyn) = rand_it_l(nzb:nzt+1,nys:nyn)
+!
+!--    Deallocate local array
+       DEALLOCATE( rand_it_l )
+!
+!--    Now, distribute the final set of random numbers to all mpi ranks located
+!--    on the left boundary. Here, an allreduce with sum reduction is sufficient,
+!--    or, in the non-parallel case, nothing need to be done at all.
+#if defined( __parallel )
+       CALL MPI_ALLREDUCE( MPI_IN_PLACE, rand_it, SIZE( rand_it ), MPI_REAL,   &
+                           MPI_SUM, comm1dy, ierr )
+#endif
+    ENDIF
+!
+!-- Finally, distribute the set of random numbers (defined on the leftmost-
+!-- located mpi ranks) to all other mpi ranks. Here, a allreduce with sum
+!-- option is sufficient, because rand_it is zero on all other mpi_ranks.
+!-- Note, the reduce operation is only performed with communicator comm1dx,
+!-- where only 1 rank within the communicator has non-zero random numbers.
+#if defined( __parallel )
+    CALL MPI_ALLREDUCE( MPI_IN_PLACE, rand_it, SIZE( rand_it ), MPI_REAL,      &
+                        MPI_SUM, comm1dx, ierr )
+#endif
+!
+!-- Periodic fill of random numbers in space
+    DO  j = 0, ny
+       DO  k = 1, mergp
+          rand_it(nzb  -k,j) = rand_it(nzt+2-k,j)    ! bottom margin
+          rand_it(nzt+1+k,j) = rand_it(nzb+k-1,j)    ! top margin
+       ENDDO
+    ENDDO
+    DO  j = 1, mergp
+       DO  k = nzb-mergp, nzt+1+mergp
+          rand_it(k,  -j) = rand_it(k,ny-j+1)        ! south margin
+          rand_it(k,ny+j) = rand_it(k,   j-1)        ! north margin
+       ENDDO
+    ENDDO
+!
+!-- Generate velocity seed following Eq.6 of Xie and Castro (2008)
+    n_y2 = n_y * 2
+    n_z2 = n_z * 2
+    f_n_l  = 0.0_wp
+    DO  j = nysg, nyng
+       DO  k = nzb_x_stg, nzt_x_stg+1
+          DO  jj = -n_y2(k), n_y2(k)
+             DO  kk = -n_z2(k), n_z2(k)
+                f_n_l(k,j) = f_n_l(k,j)                                        &
+                           + b_y(jj,k) * b_z(kk,k) * rand_it(k+kk,j+jj)
+             DO  jj = -n_y(k), n_y(k)
+                DO  kk = -n_z(k), n_z(k)
+                   f_n(k,j) = f_n(k,j) + b_y(jj,k) * b_z(kk,k) * rand_it(k+kk,j+jj)
+                ENDDO
              ENDDO
           ENDDO
        ENDDO
+    ENDDO
+    ELSE
+       f_n_l  = 0.0_wp
+       DO  j = nys, nyn
+          DO  k = nzb_x_stg, nzt_x_stg+1
+             DO  jj = -n_y(k), n_y(k)
+                DO  kk = -n_z(k), n_z(k)
+                   f_n_l(k,j) = f_n_l(k,j) + b_y(jj,k) * b_z(kk,k) * rand_it(k+kk,j+jj)
+                ENDDO
+             ENDDO
+          ENDDO
+       ENDDO
+!
+!--    Gather velocity seeds of full subdomain
+       send_count = nzt_x_stg - nzb_x_stg + 1
+       IF ( nzt_x_stg == nzt )  send_count = send_count + 1
+#if defined( __parallel )
+!
+!--    Gather the velocity seed matrix on left boundary mpi ranks.
+       CALL MPI_GATHERV( f_n_l(nzb_x_stg,nys), send_count, stg_type_yz_small,  &
+                         f_n(nzb+1,nys), recv_count_yz, displs_yz, stg_type_yz,&
+                         id_left, comm1dx, ierr )
+!
+!--    If required, gather the same velocity seed matrix on right boundary
+!--    mpi ranks (in offline nesting for example).
+       IF ( PRESENT( id_right ) )  THEN
+          CALL MPI_GATHERV( f_n_l(nzb_x_stg,nys), send_count, stg_type_yz_small,  &
+                            f_n(nzb+1,nys), recv_count_yz, displs_yz, stg_type_yz,&
+                            id_right, comm1dx, ierr )
+       ENDIF
+#else
+       f_n(nzb+1:nzt+1,nys:nyn) = f_n_l(nzb_x_stg:nzt_x_stg+1,nys:nyn)
+#endif
+    ENDIF
     DEALLOCATE( rand_it )
+!
+!-- Gather velocity seeds of full subdomain
+    send_count = nzt_x_stg - nzb_x_stg + 1
+    IF ( nzt_x_stg == nzt )  send_count = send_count + 1
+#if defined( __parallel )
+!
+!-- Gather the velocity seed matrix on left boundary mpi ranks.
+    CALL MPI_GATHERV( f_n_l(nzb_x_stg,nysg), send_count, stg_type_yz_small,     &
+                      f_n(nzb+1,nysg), recv_count_yz, displs_yz, stg_type_yz,   &
+                      id_left, comm1dx, ierr )
+!
+!-- If required, gather the same velocity seed matrix on right boundary
+!-- mpi ranks (in offline nesting for example).
+    IF ( PRESENT( id_right ) )  THEN
+       CALL MPI_GATHERV( f_n_l(nzb_x_stg,nysg), send_count, stg_type_yz_small,  &
+                         f_n(nzb+1,nysg), recv_count_yz, displs_yz, stg_type_yz,&
+                         id_right, comm1dx, ierr )
+    ENDIF
+#else
+    f_n(nzb+1:nzt+1,nysg:nyng) = f_n_l(nzb_x_stg:nzt_x_stg+1,nysg:nyng)
+#endif
+    CALL cpu_log( log_point_s(31), 'STG f_n factors', 'stop' )
  END SUBROUTINE stg_generate_seed_yz
 …
  SUBROUTINE stg_generate_seed_xz( n_x, n_z, b_x, b_z, f_n, id_south, id_north )
-    IMPLICIT NONE
     INTEGER(iwp) :: i           !< loop index in x-direction
     INTEGER(iwp) :: id_north    !< core ids at respective boundaries
     INTEGER(iwp) :: id_south    !< core ids at respective boundaries
     INTEGER(iwp) :: ii          !< loop index in x-direction
-    INTEGER(iwp) :: j           !< grid index y-direction
     INTEGER(iwp) :: k           !< loop index in z-direction
     INTEGER(iwp) :: kk          !< loop index in z-direction
 …
     INTEGER(iwp), DIMENSION(nzb:nzt+1) :: n_x    !< length scale in x-direction
     INTEGER(iwp), DIMENSION(nzb:nzt+1) :: n_z    !< length scale in z-direction
-    INTEGER(iwp), DIMENSION(nzb:nzt+1) :: n_x2   !< n_x*2
-    INTEGER(iwp), DIMENSION(nzb:nzt+1) :: n_z2   !< n_z*2
     REAL(wp) :: nxz_inv         !< inverse of number of grid points in xz-slice
 …
     REAL(wp) :: rand_sigma_inv  !< inverse of stdev of random number
     REAL(wp), DIMENSION(-merg:merg,nzb:nzt+1)    :: b_x     !< filter function in x-direction
     REAL(wp), DIMENSION(-merg:merg,nzb:nzt+1)    :: b_z     !< filter function in z-direction
     REAL(wp), DIMENSION(nzb_y_stg:nzt_y_stg+1,nxlg:nxrg) :: f_n_l   !<  local velocity seed
     REAL(wp), DIMENSION(nzb:nzt+1,nxlg:nxrg)             :: f_n     !<  velocity seed
+    REAL(wp), DIMENSION(-mergp:mergp,nzb:nzt+1)    :: b_x     !< filter function in x-direction
+    REAL(wp), DIMENSION(-mergp:mergp,nzb:nzt+1)    :: b_z     !< filter function in z-direction
+    REAL(wp), DIMENSION(nzb_y_stg:nzt_y_stg+1,nxl:nxr) :: f_n_l   !<  local velocity seed
+    REAL(wp), DIMENSION(nzb:nzt+1,nxl:nxr)             :: f_n     !<  velocity seed
     REAL(wp), DIMENSION(:,:), ALLOCATABLE ::  rand_it   !< global array of random numbers
-    REAL(wp), DIMENSION(:,:), ALLOCATABLE ::  rand_it_l !< local array of random numbers
+!
 …
 !-- left boundary, while the right boundary uses the same random numbers
 !-- and thus also computes the same correlation matrix.
     ALLOCATE( rand_it(nzb-mergp:nzt+1+mergp,-mergp:nx+mergp) )
     rand_it   = 0.0_wp
+    ALLOCATE( rand_it(nzb-mergp:nzt+1+mergp,-mergp+nxl:nxr+mergp) )
+    rand_it = 0.0_wp
     rand_av        = 0.0_wp
     rand_sigma_inv = 0.0_wp
+    nxz_inv        = 1.0_wp / REAL( ( nzt+1 - nzb+1 ) * ( nx+1 ), KIND=wp )
+!
+!-- Compute and normalize random numbers only on south-boundary ranks.
+    IF ( myidy == id_stg_south )  THEN
+!
+!--    Allocate array for local set of random numbers
+       ALLOCATE( rand_it_l(nzb:nzt+1,nxl:nxr) )
+       rand_it_l = 0.0_wp
+       j = nys
+       DO  i = nxl, nxr
+!
+!--       Put the random seeds at grid point j,i
+          CALL random_seed_parallel( put=seq_random_array(:, j, i) )
+          DO  k = nzb, nzt+1
+             CALL random_number_parallel( random_dummy )
+             rand_it_l(k,i) = random_dummy
+             rand_av = rand_av + rand_it_l(k,i)
+          ENDDO
+!
+!--       Get the new random seeds from last call at grid point j,i
+          CALL random_seed_parallel( get=seq_random_array(:, j, i) )
+    nxz_inv        = 1.0_wp / REAL( ( nzt + 1 + mergp - ( nzb - mergp ) + 1 )  &
+                                  * ( nx + mergp - ( 0 - mergp ) +1 ),         &
+                                    KIND=wp )
+!
+!-- Compute and normalize random numbers.
+    DO  i = nxl - mergp, nxr + mergp
+!
+!--    Put the random seeds at grid point ii
+       CALL random_seed_parallel( put=seq_rand_xz(:,i) )
+       DO  k = nzb - mergp, nzt + 1 + mergp
+          CALL random_number_parallel( random_dummy )
+          rand_it(k,i) = random_dummy
        ENDDO
+!
+!--    Get the new random seeds from last call at grid point ii
+       CALL random_seed_parallel( get=seq_rand_xz(:,i) )
+    ENDDO
+!
+!-- For normalization to zero mean, sum-up the global random numers.
+!-- To normalize the global set of random numbers,
+!-- the inner ghost layers mergp must not be summed-up, else
+!-- the random numbers on the ghost layers will be stronger weighted as they
+!-- also occur on the inner subdomains.
+    DO  i = MERGE( nxl, nxl - mergp, nxl /= 0 ),                              &
+            MERGE( nxr, nxr + mergp, nxr /= nx )
+       DO  k = nzb - mergp, nzt + 1 + mergp
+          rand_av = rand_av + rand_it(k,i)
+       ENDDO
+    ENDDO
 #if defined( __parallel )
+       CALL MPI_ALLREDUCE( MPI_IN_PLACE, rand_av, 1, MPI_REAL,                 &
+                           MPI_SUM, comm1dx, ierr )
+!
+!-- Sum-up the local averages of the random numbers
+    CALL MPI_ALLREDUCE( MPI_IN_PLACE, rand_av, 1, MPI_REAL,                    &
+                        MPI_SUM, comm1dx, ierr )
 #endif
+       rand_av = rand_av * nxz_inv
+!
+!--    Obtain zero mean
+       rand_it_l = rand_it_l - rand_av
+!
+!--    Now, compute the variance
+    rand_av = rand_av * nxz_inv
+!
+!-- Obtain zero mean
+    rand_it= rand_it - rand_av
+!
+!-- Now, compute the variance
+    DO  i = MERGE( nxl, nxl - mergp, nxl /= 0 ),                               &
+            MERGE( nxr, nxr + mergp, nxr /= nx )
+       DO  k = nzb - mergp, nzt + 1 + mergp
+          rand_sigma_inv = rand_sigma_inv + rand_it(k,i)**2
+       ENDDO
+    ENDDO
+#if defined( __parallel )
+!
+!-- Sum-up the local quadratic averages of the random numbers
+    CALL MPI_ALLREDUCE( MPI_IN_PLACE, rand_sigma_inv, 1, MPI_REAL,          &
+                        MPI_SUM, comm1dx, ierr )
+#endif
+!
+!-- Compute standard deviation
+    IF ( rand_sigma_inv /= 0.0_wp )  THEN
+       rand_sigma_inv = 1.0_wp / SQRT( rand_sigma_inv * nxz_inv )
+    ELSE
+       rand_sigma_inv = 1.0_wp
+    ENDIF
+!
+!-- Normalize with standard deviation to obtain unit variance
+    rand_it = rand_it * rand_sigma_inv
+    CALL cpu_log( log_point_s(31), 'STG f_n factors', 'start' )
+!
+!-- Generate velocity seed following Eq.6 of Xie and Castro (2008). There
+!-- are two options. In the first one, the computation of the seeds is
+!-- distributed to all processes along the communicator comm1dx and
+!-- gathered on the southmost and, if necessary, on the northmost process.
+!-- For huge length scales the computational effort can become quite huge
+!-- (it scales with the turbulent length scales), so that gain by parallelization
+!-- exceeds the costs by the subsequent communication.
+!-- In the second option, which performs better when the turbulent length scales
+!-- are parametrized and thus the loops are smaller, the seeds are computed
+!-- locally and no communication is necessary.
+    IF ( compute_velocity_seeds_local )  THEN
+       f_n  = 0.0_wp
        DO  i = nxl, nxr
           DO  k = nzb, nzt+1
+             rand_sigma_inv = rand_sigma_inv + rand_it_l(k,i)**2
+          ENDDO
+       ENDDO
+#if defined( __parallel )
+       CALL MPI_ALLREDUCE( MPI_IN_PLACE, rand_sigma_inv, 1, MPI_REAL,          &
+                           MPI_SUM, comm1dx, ierr )
+#endif
+!
+!--    Compute standard deviation
+       IF ( rand_sigma_inv /= 0.0_wp )  THEN
+          rand_sigma_inv = 1.0_wp / SQRT( rand_sigma_inv * nxz_inv )
+       ELSE
+          rand_sigma_inv = 1.0_wp
+       ENDIF
+!
+!--    Normalize with standard deviation to obtain unit variance
+       rand_it_l = rand_it_l * rand_sigma_inv
+!
+!--    Copy local random numbers on the global array
+       rand_it(nzb:nzt+1,nxl:nxr) = rand_it_l(nzb:nzt+1,nxl:nxr)
+!
+!--    Deallocate local array
+       DEALLOCATE( rand_it_l )
+!
+!--    Now, distribute the final set of random numbers to all mpi ranks located
+!--    on the south boundary. Here, an allreduce with sum reduction is sufficient.
+#if defined( __parallel )
+       CALL MPI_ALLREDUCE( MPI_IN_PLACE, rand_it, SIZE( rand_it ), MPI_REAL,   &
+                           MPI_SUM, comm1dx, ierr )
+#endif
+    ENDIF
+!
+!-- Finally, distribute the set of random numbers (defined on the
+!-- southmost-located mpi ranks) to all other mpi ranks. Here, a allreduce
+!-- with sum option is sufficient, because rand_it is zero on all other
+!-- mpi_ranks. Note, the reduce operation is only performed with communicator
+!-- comm1dy, where only 1 rank within the communicator has non zero random numbers.
+#if defined( __parallel )
+    CALL MPI_ALLREDUCE( MPI_IN_PLACE, rand_it, SIZE( rand_it ), MPI_REAL,      &
+                        MPI_SUM, comm1dy, ierr )
+#endif
+!
+!-- Periodic fill of random number in space
+    DO  i = 0, nx
+       DO  k = 1, mergp
+          rand_it(nzb-k,i)   = rand_it(nzt+2-k,i)    ! bottom margin
+          rand_it(nzt+1+k,i) = rand_it(nzb+k-1,i)    ! top margin
+       ENDDO
+    ENDDO
+    DO  i = 1, mergp
+       DO  k = nzb-mergp, nzt+1+mergp
+          rand_it(k,-i)   = rand_it(k,nx-i+1)        ! left margin
+          rand_it(k,nx+i) = rand_it(k,i-1)           ! right margin
+       ENDDO
+    ENDDO
+!
+!-- Generate velocity seed following Eq.6 of Xie and Castro (2008)
+    n_x2 = n_x * 2
+    n_z2 = n_z * 2
+    f_n_l  = 0.0_wp
+    DO  i = nxlg, nxrg
+       DO  k = nzb_y_stg, nzt_y_stg+1
+          DO  ii = -n_x2(k), n_x2(k)
+             DO  kk = -n_z2(k), n_z2(k)
+                f_n_l(k,i) = f_n_l(k,i)                                        &
+                           + b_x(ii,k) * b_z(kk,k) * rand_it(k+kk,i+ii)
+             DO  ii = -n_x(k), n_x(k)
+                DO  kk = -n_z(k), n_z(k)
+                   f_n(k,i) = f_n(k,i) + b_x(ii,k) * b_z(kk,k) * rand_it(k+kk,i+ii)
+                ENDDO
              ENDDO
           ENDDO
        ENDDO
+    ENDDO
+    ELSE
+       f_n_l  = 0.0_wp
+       DO  i = nxl, nxr
+          DO  k = nzb_y_stg, nzt_y_stg+1
+             DO  ii = -n_x(k), n_x(k)
+                DO  kk = -n_z(k), n_z(k)
+                   f_n_l(k,i) = f_n_l(k,i) + b_x(ii,k) * b_z(kk,k) * rand_it(k+kk,i+ii)
+                ENDDO
+             ENDDO
+          ENDDO
+       ENDDO
+!
+!--    Gather velocity seeds of full subdomain
+       send_count = nzt_y_stg - nzb_y_stg + 1
+       IF ( nzt_y_stg == nzt )  send_count = send_count + 1
+#if defined( __parallel )
+!
+!--    Gather the processed velocity seed on south boundary mpi ranks.
+       CALL MPI_GATHERV( f_n_l(nzb_y_stg,nxl), send_count, stg_type_xz_small,   &
+                         f_n(nzb+1,nxl), recv_count_xz, displs_xz, stg_type_xz, &
+                         id_south, comm1dy, ierr )
+!
+!--    Gather the processed velocity seed on north boundary mpi ranks.
+       CALL MPI_GATHERV( f_n_l(nzb_y_stg,nxl), send_count, stg_type_xz_small,   &
+                         f_n(nzb+1,nxl), recv_count_xz, displs_xz, stg_type_xz, &
+                         id_north, comm1dy, ierr )
+#else
+       f_n(nzb+1:nzt+1,nxl:nxr) = f_n_l(nzb_y_stg:nzt_y_stg+1,nxl:nxr)
+#endif
+    ENDIF
     DEALLOCATE( rand_it )
+!
+!-- Gather velocity seeds of full subdomain
+    send_count = nzt_y_stg - nzb_y_stg + 1
+    IF ( nzt_y_stg == nzt )  send_count = send_count + 1
+#if defined( __parallel )
+!
+!-- Gather the processed velocity seed on south boundary mpi ranks.
+    CALL MPI_GATHERV( f_n_l(nzb_y_stg,nxlg), send_count, stg_type_xz_small,   &
+                      f_n(nzb+1,nxlg), recv_count_xz, displs_xz, stg_type_xz, &
+                      id_south, comm1dy, ierr )
+!
+!-- Gather the processed velocity seed on north boundary mpi ranks.
+    CALL MPI_GATHERV( f_n_l(nzb_y_stg,nxlg), send_count, stg_type_xz_small,   &
+                      f_n(nzb+1,nxlg), recv_count_xz, displs_xz, stg_type_xz, &
+                      id_north, comm1dy, ierr )
+#else
+    f_n(nzb+1:nzt+1,nxlg:nxrg) = f_n_l(nzb_y_stg:nzt_y_stg+1,nxlg:nxrg)
+#endif
+    CALL cpu_log( log_point_s(31), 'STG f_n factors', 'stop' )
  END SUBROUTINE stg_generate_seed_xz
 …
  SUBROUTINE parametrize_reynolds_stress
-    IMPLICIT NONE
     INTEGER(iwp) :: k            !< loop index in z-direction
 …
+!
 !--    u'u' and v'v'. Assume isotropy. Note, add a small negative number
 !--    to the denominator, else the merge-function can crash if scale_l is
+!--    to the denominator, else the mergpe-function can crash if scale_l is
 !--    zero.
        r11(k) = scale_us**2 * (                                                &
 …
 !------------------------------------------------------------------------------!
  SUBROUTINE calc_coeff_matrix
-    IMPLICIT NONE
     INTEGER(iwp) :: k   !< loop index in z-direction
 …
  SUBROUTINE stg_adjust
-    IMPLICIT NONE
     IF ( debug_output_timestep )  CALL debug_message( 'stg_adjust', 'start' )
+!
 …
  SUBROUTINE calc_length_and_time_scale
-    IMPLICIT NONE
     INTEGER(iwp) ::  k !< loop index in z-direction
 …
 !--    Assume isotropic turbulence length scales
        nux(k) = MAX( INT( length_scale * ddx     ), 1 ) * blend
        nuy(k) = MAX( INT( length_scale * ddy     ), 1 ) * blend
+       nuy(k) = MAX( INT( length_scale * ddy     ), 1 ) * blend
        nvx(k) = MAX( INT( length_scale * ddx     ), 1 ) * blend
        nvy(k) = MAX( INT( length_scale * ddy     ), 1 ) * blend
+       nvy(k) = MAX( INT( length_scale * ddy     ), 1 ) * blend
        nwx(k) = MAX( INT( length_scale * ddx     ), 1 ) * blend
        nwy(k) = MAX( INT( length_scale * ddy     ), 1 ) * blend
+       nwy(k) = MAX( INT( length_scale * ddy     ), 1 ) * blend
+!
 !--    Along the vertical direction limit the length scale further by the
 …
 !------------------------------------------------------------------------------!
  SUBROUTINE calc_scaling_variables
-    USE surface_mod,                                                           &
-        ONLY:  surf_def_h, surf_lsm_h, surf_usm_h
-    IMPLICIT NONE
     INTEGER(iwp) :: i            !< loop index in x-direction

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 4438 for palm/trunk/SOURCE/synthetic_turbulence_generator_mod.f90

Legend:

palm/trunk/SOURCE/synthetic_turbulence_generator_mod.f90

Download in other formats: