[4534] | 1 | !> @file shared_memory_io_mod.f90 |
---|
| 2 | !------------------------------------------------------------------------------! |
---|
| 3 | ! This file is part of PALM. |
---|
| 4 | ! |
---|
| 5 | ! PALM is free software: you can redistribute it and/or modify it under the |
---|
| 6 | ! terms of the GNU General Public License as published by the Free Software |
---|
| 7 | ! Foundation, either version 3 of the License, or (at your option) any later |
---|
| 8 | ! version. |
---|
| 9 | ! |
---|
| 10 | ! PALM is distributed in the hope that it will be useful, but WITHOUT ANY |
---|
| 11 | ! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
---|
| 12 | ! A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
---|
| 13 | ! |
---|
| 14 | ! You should have received a copy of the GNU General Public License along with |
---|
| 15 | ! PALM. If not, see <http://www.gnu.org/licenses/>. |
---|
| 16 | ! |
---|
| 17 | ! Copyright 1997-2020 Leibniz Universitaet Hannover |
---|
| 18 | !------------------------------------------------------------------------------! |
---|
| 19 | ! |
---|
| 20 | ! Current revisions: |
---|
| 21 | ! ----------------- |
---|
| 22 | ! |
---|
| 23 | ! |
---|
| 24 | ! Former revisions: |
---|
| 25 | ! $Id: shared_memory_io_mod.f90 4536 2020-05-17 17:24:13Z pavelkrc $ |
---|
| 26 | ! |
---|
| 27 | ! Initial version (Klaus Ketelsen) |
---|
| 28 | ! |
---|
| 29 | ! |
---|
| 30 | ! |
---|
| 31 | ! Description: |
---|
| 32 | ! ------------ |
---|
| 33 | !> handle MPI-IO or NetCDF-IO shared memory arrays. |
---|
| 34 | !> This module performs the organization of new communicators, adapted PE-grids |
---|
| 35 | !> and allocation of shared memory arrays. The IO itself is not done here. |
---|
| 36 | !------------------------------------------------------------------------------! |
---|
| 37 | MODULE shared_memory_io_mod |
---|
| 38 | |
---|
| 39 | #if defined( __parallel ) |
---|
| 40 | #if defined( __mpifh ) |
---|
| 41 | INCLUDE "mpif.h" |
---|
| 42 | #else |
---|
| 43 | USE MPI |
---|
| 44 | #endif |
---|
| 45 | #endif |
---|
| 46 | |
---|
| 47 | USE, INTRINSIC :: ISO_C_BINDING |
---|
| 48 | |
---|
| 49 | USE control_parameters, & |
---|
| 50 | ONLY: maximum_grid_level, mg_switch_to_pe0_level, message_string |
---|
| 51 | |
---|
| 52 | USE indices, & |
---|
| 53 | ONLY: nbgp, nnx, nny, nnz, nx, nxl, nxlg, nxr, nxrg, ny, nyn, nyng, nys, nysg, nzb, nzt |
---|
| 54 | |
---|
| 55 | USE kinds, & |
---|
| 56 | ONLY: wp, iwp |
---|
| 57 | |
---|
| 58 | USE transpose_indices, & |
---|
| 59 | ONLY: nys_x, nyn_x, nys_z, nyn_z, nxl_z, nxr_z |
---|
| 60 | |
---|
| 61 | USE pegrid, & |
---|
| 62 | ONLY: comm1dx, comm1dy, comm2d, ierr, myid, myidx, myidy, npex, npey, numprocs, pdims, & |
---|
| 63 | pleft, pnorth, pright, psouth, sendrecvcount_xy |
---|
| 64 | #if defined( __parallel ) |
---|
| 65 | USE pegrid, & |
---|
| 66 | ONLY: pcoord, reorder |
---|
| 67 | #endif |
---|
| 68 | |
---|
| 69 | IMPLICIT NONE |
---|
| 70 | |
---|
| 71 | PRIVATE |
---|
| 72 | |
---|
| 73 | SAVE |
---|
| 74 | |
---|
| 75 | ! |
---|
| 76 | !-- Type to store grid information |
---|
| 77 | TYPE, PUBLIC :: local_boundaries |
---|
| 78 | |
---|
| 79 | INTEGER(iwp) :: nxl |
---|
| 80 | INTEGER(iwp) :: nxr |
---|
| 81 | INTEGER(iwp) :: nys |
---|
| 82 | INTEGER(iwp) :: nyn |
---|
| 83 | INTEGER(iwp) :: nnx |
---|
| 84 | INTEGER(iwp) :: nny |
---|
| 85 | INTEGER(iwp) :: nx |
---|
| 86 | INTEGER(iwp) :: ny |
---|
| 87 | |
---|
| 88 | END TYPE local_boundaries |
---|
| 89 | |
---|
| 90 | ! |
---|
| 91 | !-- Class definition for shared memory instances. |
---|
| 92 | !-- For every use of shared memory IO, one instance of this class is created. |
---|
| 93 | TYPE, PUBLIC :: sm_class |
---|
| 94 | |
---|
| 95 | INTEGER(iwp) :: nr_io_pe_per_node = 2 !< typical configuration, 2 sockets per node |
---|
| 96 | LOGICAL :: no_shared_Memory_in_this_run |
---|
| 97 | ! |
---|
| 98 | !-- Variables for the shared memory communicator |
---|
| 99 | INTEGER(iwp), PUBLIC :: comm_shared !< Communicator for processes with shared array |
---|
| 100 | INTEGER(iwp), PUBLIC :: sh_npes |
---|
| 101 | INTEGER(iwp), PUBLIC :: sh_rank |
---|
| 102 | |
---|
| 103 | LOGICAL, PUBLIC :: iam_io_pe = .TRUE. !< This PE is an IO-PE |
---|
| 104 | ! |
---|
| 105 | !-- Variables for the I/O virtual grid |
---|
| 106 | INTEGER(iwp), PUBLIC :: comm_io !< Communicator for all IO processes |
---|
| 107 | INTEGER(iwp), PUBLIC :: io_npes |
---|
| 108 | INTEGER(iwp), PUBLIC :: io_rank |
---|
| 109 | |
---|
| 110 | TYPE( local_boundaries ), PUBLIC :: io_grid |
---|
| 111 | |
---|
| 112 | ! |
---|
| 113 | !-- Variables for the node local communicator |
---|
| 114 | INTEGER(iwp) :: comm_node !< Communicator for all processes of current node |
---|
| 115 | INTEGER(iwp) :: io_pe_global_rank |
---|
| 116 | INTEGER(iwp) :: n_npes |
---|
| 117 | INTEGER(iwp) :: n_rank |
---|
| 118 | |
---|
| 119 | CONTAINS |
---|
| 120 | |
---|
| 121 | PRIVATE |
---|
| 122 | |
---|
| 123 | PROCEDURE, PASS(this), PUBLIC :: is_sm_active |
---|
| 124 | PROCEDURE, PASS(this), PUBLIC :: sm_adjust_outer_boundary |
---|
| 125 | PROCEDURE, PASS(this), PUBLIC :: sm_free_shared |
---|
| 126 | PROCEDURE, PASS(this), PUBLIC :: sm_init_comm |
---|
| 127 | PROCEDURE, PASS(this), PUBLIC :: sm_node_barrier |
---|
| 128 | #if defined( __parallel ) |
---|
| 129 | PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1d |
---|
| 130 | PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2d |
---|
| 131 | PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2di |
---|
| 132 | PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3d |
---|
| 133 | |
---|
| 134 | GENERIC, PUBLIC :: sm_allocate_shared => sm_allocate_shared_1d, sm_allocate_shared_2d, & |
---|
| 135 | sm_allocate_shared_2di, sm_allocate_shared_3d |
---|
| 136 | #endif |
---|
| 137 | END TYPE sm_class |
---|
| 138 | |
---|
| 139 | |
---|
| 140 | CONTAINS |
---|
| 141 | |
---|
| 142 | |
---|
| 143 | !--------------------------------------------------------------------------------------------------! |
---|
| 144 | ! Description: |
---|
| 145 | ! ------------ |
---|
| 146 | !> Create the communicator for shared memory groups and IO-PEs. |
---|
| 147 | !> Setup the grid for shared memory IO. |
---|
| 148 | !--------------------------------------------------------------------------------------------------! |
---|
| 149 | SUBROUTINE sm_init_comm( this, sm_active ) |
---|
| 150 | |
---|
| 151 | IMPLICIT NONE |
---|
| 152 | |
---|
| 153 | CLASS(sm_class), INTENT(INOUT) :: this !< pointer to access internal variables of this call |
---|
| 154 | |
---|
| 155 | #if defined( __parallel ) |
---|
| 156 | INTEGER :: color |
---|
| 157 | INTEGER :: max_n_npes !< Maximum number of PEs/node |
---|
| 158 | #endif |
---|
| 159 | |
---|
| 160 | LOGICAL, INTENT(IN) :: sm_active !< Flag to activate shared-memory IO |
---|
| 161 | |
---|
| 162 | |
---|
| 163 | this%no_shared_memory_in_this_run = .NOT. sm_active |
---|
| 164 | |
---|
| 165 | IF ( this%no_shared_memory_in_this_run ) THEN |
---|
| 166 | this%iam_io_pe = .TRUE. |
---|
| 167 | RETURN |
---|
| 168 | ENDIF |
---|
| 169 | |
---|
| 170 | #if defined( __parallel ) |
---|
| 171 | ! |
---|
| 172 | !-- Determine, how many MPI threads are running on a node |
---|
| 173 | this%iam_io_pe = .FALSE. |
---|
| 174 | CALL MPI_COMM_SPLIT_TYPE( comm2d, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, this%comm_node, ierr ) |
---|
| 175 | CALL MPI_COMM_SIZE( this%comm_node, this%n_npes, ierr ) |
---|
| 176 | CALL MPI_COMM_RANK( this%comm_node, this%n_rank, ierr ) |
---|
| 177 | |
---|
| 178 | CALL MPI_ALLREDUCE( this%n_npes, max_n_npes, 1, MPI_INTEGER, MPI_MAX, comm2d, ierr ) |
---|
| 179 | ! |
---|
| 180 | !-- Decide, if the configuration can run with shared-memory IO |
---|
| 181 | IF ( max_n_npes > 64 ) THEN |
---|
| 182 | ! |
---|
| 183 | !-- Special configuration on the HLRN-IV system with 4 shared memory blocks/node |
---|
| 184 | this%nr_io_pe_per_node = 4 |
---|
| 185 | |
---|
| 186 | ELSEIF ( max_n_npes <= 32 ) THEN |
---|
| 187 | ! |
---|
| 188 | !-- No shared memory IO with less than 32 threads/node |
---|
| 189 | this%no_shared_memory_in_this_run = .TRUE. |
---|
| 190 | this%iam_io_pe = .TRUE. |
---|
| 191 | RETURN |
---|
| 192 | ENDIF |
---|
| 193 | |
---|
| 194 | ! |
---|
| 195 | !-- No shared memory IO with small setups |
---|
| 196 | IF ( nx < 24 .OR. ny < 24 ) THEN |
---|
| 197 | this%no_shared_memory_in_this_run = .TRUE. |
---|
| 198 | this%iam_io_pe = .TRUE. |
---|
| 199 | RETURN |
---|
| 200 | ENDIF |
---|
| 201 | |
---|
| 202 | ! |
---|
| 203 | !-- Divide a node into shared memory groups, depending on the virtual x-y grid |
---|
| 204 | CALL compute_color( color ) |
---|
| 205 | ! |
---|
| 206 | !-- If no shared memory IO possible, nothing is left to be done here. |
---|
| 207 | IF ( this%no_shared_memory_in_this_run ) RETURN |
---|
| 208 | |
---|
| 209 | ! |
---|
| 210 | !-- Setup the shared memory area |
---|
| 211 | CALL MPI_COMM_SPLIT( this%comm_node, color, 0, this%comm_shared, ierr ) |
---|
| 212 | CALL MPI_COMM_SIZE( this%comm_shared, this%sh_npes, ierr ) |
---|
| 213 | CALL MPI_COMM_RANK( this%comm_shared, this%sh_rank, ierr ) |
---|
| 214 | |
---|
| 215 | ! |
---|
| 216 | !-- Setup the communicator across the nodes depending on the shared memory rank. |
---|
| 217 | !-- All threads with shared memory rank 0 will be I/O threads. |
---|
| 218 | color = this%sh_rank |
---|
| 219 | CALL MPI_COMM_SPLIT( comm2d, color, 0, this%comm_io, ierr ) |
---|
| 220 | |
---|
| 221 | IF ( this%comm_io /= MPI_COMM_NULL ) THEN |
---|
| 222 | CALL MPI_COMM_SIZE( this%comm_io, this%io_npes, ierr ) |
---|
| 223 | CALL MPI_COMM_RANK( this%comm_io, this%io_rank, ierr ) |
---|
| 224 | ELSE |
---|
| 225 | this%io_npes = -1 |
---|
| 226 | this%io_rank = -1 |
---|
| 227 | ENDIF |
---|
| 228 | |
---|
| 229 | IF ( this%sh_rank == 0 ) THEN |
---|
| 230 | this%iam_io_pe = .TRUE. |
---|
| 231 | this%io_pe_global_rank = myid |
---|
| 232 | ENDIF |
---|
| 233 | CALL MPI_BCAST( this%io_pe_global_rank, 1, MPI_INTEGER, 0, this%comm_shared, ierr ) |
---|
| 234 | |
---|
| 235 | #else |
---|
| 236 | this%iam_io_pe = .TRUE. |
---|
| 237 | #endif |
---|
| 238 | |
---|
| 239 | #if defined( __parallel ) |
---|
| 240 | CONTAINS |
---|
| 241 | |
---|
| 242 | SUBROUTINE compute_color( color ) |
---|
| 243 | |
---|
| 244 | IMPLICIT NONE |
---|
| 245 | |
---|
| 246 | INTEGER(iwp), INTENT(OUT) :: color |
---|
| 247 | |
---|
| 248 | INTEGER(iwp) :: group_start |
---|
| 249 | INTEGER(iwp) :: n |
---|
| 250 | INTEGER(iwp) :: my_color |
---|
| 251 | INTEGER(iwp) :: pe |
---|
| 252 | INTEGER(iwp) :: sh_group_size |
---|
| 253 | |
---|
| 254 | INTEGER(iwp), DIMENSION(4,0:this%n_npes-1) :: local_dim_s |
---|
| 255 | INTEGER(iwp), DIMENSION(4,0:this%n_npes-1) :: local_dim_r |
---|
| 256 | |
---|
| 257 | TYPE(local_boundaries), DIMENSION(32) :: node_grid |
---|
| 258 | |
---|
[4536] | 259 | ! |
---|
| 260 | !-- Nn shared memory I/O on one node jobs |
---|
| 261 | IF ( numprocs < this%n_npes ) THEN |
---|
| 262 | this%no_shared_memory_in_this_run = .TRUE. |
---|
[4534] | 263 | RETURN |
---|
| 264 | ENDIF |
---|
| 265 | |
---|
| 266 | local_dim_s = 0 |
---|
| 267 | local_dim_s(1,this%n_rank) = nxl |
---|
| 268 | local_dim_s(2,this%n_rank) = nxr |
---|
| 269 | local_dim_s(3,this%n_rank) = nys |
---|
| 270 | local_dim_s(4,this%n_rank) = nyn |
---|
| 271 | |
---|
| 272 | node_grid%nyn = -1 |
---|
| 273 | ! |
---|
| 274 | !-- Distribute the x-y layout of all cores of a node to all node processes |
---|
| 275 | CALL MPI_ALLREDUCE( local_dim_s, local_dim_r, SIZE( local_dim_s ), MPI_INTEGER, MPI_SUM, & |
---|
| 276 | this%comm_node, ierr ) |
---|
| 277 | sh_group_size = ( max_n_npes + this%nr_io_pe_per_node - 1 ) / this%nr_io_pe_per_node |
---|
| 278 | |
---|
| 279 | pe = 0 |
---|
| 280 | my_color = 1 ! color is used to split the shared memory communicator into a communicator for |
---|
| 281 | ! io groups |
---|
| 282 | group_start = pe |
---|
| 283 | node_grid(my_color)%nxl = local_dim_r(1,group_start) |
---|
| 284 | node_grid(my_color)%nxr = local_dim_r(2,group_start) |
---|
| 285 | node_grid(my_color)%nys = local_dim_r(3,group_start) |
---|
| 286 | |
---|
| 287 | DO n = 1, this%n_npes-1 |
---|
| 288 | |
---|
| 289 | pe = n |
---|
| 290 | IF ( n > 0 .AND. MOD( n,sh_group_size ) == 0 ) THEN |
---|
| 291 | ! |
---|
| 292 | !-- If group boundary, start new IO group |
---|
| 293 | node_grid(my_color)%nyn = local_dim_r(4,pe-1) |
---|
| 294 | my_color = my_color + 1 |
---|
| 295 | group_start = pe |
---|
| 296 | node_grid(my_color)%nxl = local_dim_r(1,group_start) |
---|
| 297 | node_grid(my_color)%nxr = local_dim_r(2,group_start) |
---|
| 298 | node_grid(my_color)%nys = local_dim_r(3,group_start) |
---|
| 299 | |
---|
| 300 | ELSEIF ( local_dim_r(1,pe) /= node_grid(my_color)%nxl ) THEN |
---|
| 301 | ! |
---|
| 302 | !-- If nxl changes, start new IO group |
---|
| 303 | node_grid(my_color)%nyn = local_dim_r(4,pe-1) |
---|
| 304 | my_color = my_color+1 |
---|
| 305 | group_start = pe |
---|
| 306 | node_grid(my_color)%nxl = local_dim_r(1,group_start) |
---|
| 307 | node_grid(my_color)%nxr = local_dim_r(2,group_start) |
---|
| 308 | node_grid(my_color)%nys = local_dim_r(3,group_start) |
---|
| 309 | ENDIF |
---|
| 310 | ! |
---|
| 311 | !-- Save values for local PE |
---|
| 312 | IF ( this%n_rank == pe ) THEN ! |
---|
| 313 | color = my_color |
---|
| 314 | ENDIF |
---|
| 315 | IF ( n == this%n_npes-1 ) node_grid(my_color)%nyn = local_dim_r(4,pe) |
---|
| 316 | |
---|
| 317 | ENDDO |
---|
| 318 | |
---|
| 319 | IF ( this%n_rank == 0 ) THEN |
---|
| 320 | color = 1 |
---|
| 321 | ENDIF |
---|
| 322 | |
---|
[4536] | 323 | this%io_grid = node_grid(color) |
---|
| 324 | this%io_grid%nnx = this%io_grid%nxr - this%io_grid%nxl + 1 |
---|
| 325 | this%io_grid%nny = this%io_grid%nyn - this%io_grid%nys + 1 |
---|
| 326 | |
---|
[4534] | 327 | END SUBROUTINE compute_color |
---|
| 328 | #endif |
---|
| 329 | |
---|
| 330 | END SUBROUTINE sm_init_comm |
---|
| 331 | |
---|
| 332 | |
---|
| 333 | |
---|
| 334 | !--------------------------------------------------------------------------------------------------! |
---|
| 335 | ! Description: |
---|
| 336 | ! ------------ |
---|
| 337 | !> Function to return if shared Memory IO is active. |
---|
| 338 | !--------------------------------------------------------------------------------------------------! |
---|
| 339 | FUNCTION is_sm_active( this) RESULT( ac ) |
---|
| 340 | |
---|
| 341 | IMPLICIT NONE |
---|
| 342 | |
---|
| 343 | CLASS(sm_class), INTENT(inout) :: this |
---|
| 344 | |
---|
| 345 | LOGICAL :: ac |
---|
| 346 | |
---|
| 347 | ac = .NOT. this%no_shared_memory_in_this_run |
---|
| 348 | |
---|
| 349 | END FUNCTION is_sm_active |
---|
| 350 | |
---|
| 351 | |
---|
| 352 | #if defined( __parallel ) |
---|
| 353 | !--------------------------------------------------------------------------------------------------! |
---|
| 354 | ! Description: |
---|
| 355 | ! ------------ |
---|
| 356 | !> Allocate shared 1d-REAL array on ALL threads |
---|
| 357 | !--------------------------------------------------------------------------------------------------! |
---|
| 358 | SUBROUTINE sm_allocate_shared_1d( this, p1, d1, d2, win ) |
---|
| 359 | |
---|
| 360 | IMPLICIT NONE |
---|
| 361 | |
---|
| 362 | CLASS(sm_class), INTENT(inout) :: this |
---|
| 363 | |
---|
| 364 | INTEGER(iwp) :: disp_unit |
---|
| 365 | INTEGER(iwp), INTENT(IN) :: d1 |
---|
| 366 | INTEGER(iwp), INTENT(IN) :: d2 |
---|
| 367 | INTEGER(iwp), SAVE :: pe_from = 0 |
---|
| 368 | INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size |
---|
| 369 | INTEGER(iwp), INTENT(OUT) :: win |
---|
| 370 | INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize |
---|
| 371 | |
---|
| 372 | INTEGER, DIMENSION(1) :: buf_shape |
---|
| 373 | |
---|
| 374 | REAL(wp), DIMENSION(:), POINTER :: buf |
---|
| 375 | REAL(wp), DIMENSION(:), POINTER :: p1 |
---|
| 376 | |
---|
| 377 | TYPE(C_PTR), SAVE :: base_ptr |
---|
| 378 | TYPE(C_PTR), SAVE :: rem_ptr |
---|
| 379 | |
---|
| 380 | |
---|
| 381 | IF ( this%no_shared_memory_in_this_run ) RETURN |
---|
| 382 | ! |
---|
| 383 | !-- Allocate shared memory on node rank 0 threads. |
---|
| 384 | IF ( this%sh_rank == pe_from ) THEN |
---|
| 385 | wsize = d2 - d1 + 1 |
---|
| 386 | ELSE |
---|
| 387 | wsize = 1 |
---|
| 388 | ENDIF |
---|
| 389 | wsize = wsize * 8 ! Please note, size is always in bytes, independently of the displacement |
---|
| 390 | ! unit |
---|
| 391 | |
---|
| 392 | CALL MPI_WIN_ALLOCATE_SHARED( wsize, 8, MPI_INFO_NULL, this%comm_shared,base_ptr, win, ierr ) |
---|
| 393 | ! |
---|
| 394 | !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) |
---|
| 395 | CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) |
---|
| 396 | ! |
---|
| 397 | !-- Convert C- to Fortran-pointer |
---|
| 398 | buf_shape(1) = d2 - d1 + 1 |
---|
| 399 | CALL C_F_POINTER( rem_ptr, buf, buf_shape ) |
---|
| 400 | p1(d1:) => buf |
---|
| 401 | ! |
---|
| 402 | !-- Allocate shared memory in round robin on all PEs of a node. |
---|
| 403 | pe_from = MOD( pe_from, this%sh_npes ) |
---|
| 404 | |
---|
| 405 | END SUBROUTINE sm_allocate_shared_1d |
---|
| 406 | |
---|
| 407 | |
---|
| 408 | !--------------------------------------------------------------------------------------------------! |
---|
| 409 | ! Description: |
---|
| 410 | ! ------------ |
---|
| 411 | !> Allocate shared 2d-REAL array on ALL threads |
---|
| 412 | !--------------------------------------------------------------------------------------------------! |
---|
| 413 | SUBROUTINE sm_allocate_shared_2d( this, p2, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) |
---|
| 414 | |
---|
| 415 | IMPLICIT NONE |
---|
| 416 | |
---|
| 417 | CLASS(sm_class), INTENT(INOUT) :: this |
---|
| 418 | |
---|
| 419 | INTEGER(iwp) :: disp_unit |
---|
| 420 | INTEGER(iwp), INTENT(IN) :: n_nxlg |
---|
| 421 | INTEGER(iwp), INTENT(IN) :: n_nxrg |
---|
| 422 | INTEGER(iwp), INTENT(IN) :: n_nyng |
---|
| 423 | INTEGER(iwp), INTENT(IN) :: n_nysg |
---|
| 424 | INTEGER(iwp), SAVE :: pe_from = 0 |
---|
| 425 | INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size |
---|
| 426 | INTEGER(iwp), INTENT(OUT) :: win |
---|
| 427 | INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize |
---|
| 428 | |
---|
| 429 | INTEGER(iwp), DIMENSION(2) :: buf_shape |
---|
| 430 | |
---|
| 431 | REAL(wp), DIMENSION(:,:), POINTER :: buf |
---|
| 432 | REAL(wp), DIMENSION(:,:), POINTER :: p2 |
---|
| 433 | |
---|
| 434 | TYPE(C_PTR),SAVE :: base_ptr |
---|
| 435 | TYPE(C_PTR),SAVE :: rem_ptr |
---|
| 436 | |
---|
| 437 | |
---|
| 438 | IF ( this%no_shared_memory_in_this_run ) RETURN |
---|
| 439 | ! |
---|
| 440 | !-- Allocate shared memory on node rank 0 threads. |
---|
| 441 | IF ( this%sh_rank == pe_from ) THEN |
---|
| 442 | wsize = ( n_nyng - n_nysg + 1 ) * ( n_nxrg - n_nxlg + 1 ) |
---|
| 443 | ELSE |
---|
| 444 | wsize = 1 |
---|
| 445 | ENDIF |
---|
| 446 | |
---|
| 447 | wsize = wsize * 8 ! Please note, size is always in bytes, independently of the displacement |
---|
| 448 | ! unit |
---|
| 449 | |
---|
| 450 | CALL MPI_WIN_ALLOCATE_SHARED( wsize, 8, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) |
---|
| 451 | ! |
---|
| 452 | !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) |
---|
| 453 | CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) |
---|
| 454 | ! |
---|
| 455 | !-- Convert C- to Fortran-pointer |
---|
| 456 | buf_shape(2) = n_nyng - n_nysg + 1 |
---|
| 457 | buf_shape(1) = n_nxrg - n_nxlg + 1 |
---|
| 458 | CALL C_F_POINTER( rem_ptr, buf, buf_shape ) |
---|
| 459 | p2(n_nxlg:, n_nysg:) => buf |
---|
| 460 | ! |
---|
| 461 | !-- Allocate shared memory in round robin on all PEs of a node. |
---|
| 462 | pe_from = MOD( pe_from, this%sh_npes ) |
---|
| 463 | |
---|
| 464 | END SUBROUTINE sm_allocate_shared_2d |
---|
| 465 | |
---|
| 466 | |
---|
| 467 | !--------------------------------------------------------------------------------------------------! |
---|
| 468 | ! Description: |
---|
| 469 | ! ------------ |
---|
| 470 | !> Allocate shared 2d-INTEGER array on ALL threads |
---|
| 471 | !--------------------------------------------------------------------------------------------------! |
---|
| 472 | SUBROUTINE sm_allocate_shared_2di( this, p2i, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) |
---|
| 473 | |
---|
| 474 | IMPLICIT NONE |
---|
| 475 | |
---|
| 476 | CLASS(sm_class), INTENT(inout) :: this |
---|
| 477 | |
---|
| 478 | INTEGER(iwp) :: disp_unit |
---|
| 479 | INTEGER(iwp), INTENT(IN) :: n_nxlg |
---|
| 480 | INTEGER(iwp), INTENT(IN) :: n_nxrg |
---|
| 481 | INTEGER(iwp), INTENT(IN) :: n_nyng |
---|
| 482 | INTEGER(iwp), INTENT(IN) :: n_nysg |
---|
| 483 | INTEGER(iwp), SAVE :: pe_from = 0 |
---|
| 484 | INTEGER(kind=MPI_ADDRESS_KIND) :: rem_size |
---|
| 485 | INTEGER(iwp), INTENT(OUT) :: win |
---|
| 486 | INTEGER(kind=MPI_ADDRESS_KIND) :: wsize |
---|
| 487 | |
---|
| 488 | INTEGER(iwp), DIMENSION(2) :: buf_shape |
---|
| 489 | |
---|
| 490 | INTEGER(iwp), DIMENSION(:,:), POINTER :: buf |
---|
| 491 | INTEGER(iwp), DIMENSION(:,:), POINTER :: p2i |
---|
| 492 | |
---|
| 493 | TYPE(C_PTR),SAVE :: base_ptr |
---|
| 494 | TYPE(C_PTR),SAVE :: rem_ptr |
---|
| 495 | |
---|
| 496 | |
---|
| 497 | IF ( this%no_shared_memory_in_this_run ) RETURN |
---|
| 498 | ! |
---|
| 499 | !-- Allocate shared memory on node rank 0 threads. |
---|
| 500 | IF ( this%sh_rank == pe_from ) THEN |
---|
| 501 | wsize = ( n_nyng - n_nysg + 1 ) * ( n_nxrg - n_nxlg + 1 ) |
---|
| 502 | ELSE |
---|
| 503 | wsize = 1 |
---|
| 504 | ENDIF |
---|
| 505 | |
---|
| 506 | wsize = wsize * 4 ! Please note, size is always in bytes, independently of the displacement |
---|
| 507 | ! unit |
---|
| 508 | |
---|
| 509 | CALL MPI_WIN_ALLOCATE_SHARED( wsize, 4, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) |
---|
| 510 | ! |
---|
| 511 | !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) |
---|
| 512 | CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) |
---|
| 513 | ! |
---|
| 514 | !-- Convert C- to Fortran-pointer |
---|
| 515 | buf_shape(2) = n_nyng - n_nysg + 1 |
---|
| 516 | buf_shape(1) = n_nxrg - n_nxlg + 1 |
---|
| 517 | CALL C_F_POINTER( rem_ptr, buf, buf_shape ) |
---|
| 518 | p2i(n_nxlg:, n_nysg:) => buf |
---|
| 519 | ! |
---|
| 520 | !-- Allocate shared memory in round robin on all PEs of a node. |
---|
| 521 | pe_from = MOD( pe_from, this%sh_npes ) |
---|
| 522 | |
---|
| 523 | END SUBROUTINE sm_allocate_shared_2di |
---|
| 524 | |
---|
| 525 | |
---|
| 526 | !--------------------------------------------------------------------------------------------------! |
---|
| 527 | ! Description: |
---|
| 528 | ! ------------ |
---|
| 529 | !> Allocate shared 3d-REAL array on ALL threads |
---|
| 530 | !--------------------------------------------------------------------------------------------------! |
---|
| 531 | SUBROUTINE sm_allocate_shared_3d( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win ) |
---|
| 532 | |
---|
| 533 | IMPLICIT NONE |
---|
| 534 | |
---|
| 535 | CLASS(sm_class), INTENT(inout) :: this |
---|
| 536 | |
---|
| 537 | INTEGER :: disp_unit |
---|
| 538 | INTEGER, INTENT(IN) :: d1e |
---|
| 539 | INTEGER, INTENT(IN) :: d1s |
---|
| 540 | INTEGER, INTENT(IN) :: d2e |
---|
| 541 | INTEGER, INTENT(IN) :: d2s |
---|
| 542 | INTEGER, INTENT(IN) :: d3e |
---|
| 543 | INTEGER, INTENT(IN) :: d3s |
---|
| 544 | INTEGER, SAVE :: pe_from = 0 |
---|
| 545 | INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size |
---|
| 546 | INTEGER, INTENT(OUT) :: win |
---|
| 547 | INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize |
---|
| 548 | |
---|
| 549 | INTEGER, DIMENSION(3) :: buf_shape |
---|
| 550 | |
---|
| 551 | REAL(wp), DIMENSION(:,:,:), POINTER :: buf |
---|
| 552 | REAL(wp), DIMENSION(:,:,:), POINTER :: p3 |
---|
| 553 | |
---|
| 554 | TYPE(C_PTR), SAVE :: base_ptr |
---|
| 555 | TYPE(C_PTR), SAVE :: rem_ptr |
---|
| 556 | |
---|
| 557 | |
---|
| 558 | IF ( this%no_shared_memory_in_this_run ) RETURN |
---|
| 559 | ! |
---|
| 560 | !-- Allocate shared memory on node rank 0 threads. |
---|
| 561 | IF ( this%sh_rank == pe_from ) THEN |
---|
| 562 | wsize = ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) |
---|
| 563 | ELSE |
---|
| 564 | wsize = 1 |
---|
| 565 | ENDIF |
---|
| 566 | |
---|
| 567 | wsize = wsize * 8 ! Please note, size is always in bytes, independently of the displacement |
---|
| 568 | ! unit |
---|
| 569 | |
---|
| 570 | CALL MPI_WIN_ALLOCATE_SHARED( wsize, 8, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) |
---|
| 571 | ! |
---|
| 572 | !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) |
---|
| 573 | CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) |
---|
| 574 | ! |
---|
| 575 | !-- Convert C- to Fortran-pointer |
---|
| 576 | buf_shape(3) = d3e - d3s + 1 |
---|
| 577 | buf_shape(2) = d2e - d2s + 1 |
---|
| 578 | buf_shape(1) = d1e - d1s + 1 |
---|
| 579 | CALL C_F_POINTER( rem_ptr, buf, buf_shape ) |
---|
| 580 | p3(d1s:,d2s:,d3s:) => buf |
---|
| 581 | ! |
---|
| 582 | !-- Allocate shared memory in round robin on all PEs of a node. |
---|
| 583 | pe_from = MOD( pe_from, this%sh_npes ) |
---|
| 584 | |
---|
| 585 | END SUBROUTINE sm_allocate_shared_3d |
---|
| 586 | #endif |
---|
| 587 | |
---|
| 588 | |
---|
| 589 | !--------------------------------------------------------------------------------------------------! |
---|
| 590 | ! Description: |
---|
| 591 | ! ------------ |
---|
| 592 | !> ??? |
---|
| 593 | !--------------------------------------------------------------------------------------------------! |
---|
| 594 | SUBROUTINE sm_adjust_outer_boundary( this ) |
---|
| 595 | |
---|
| 596 | IMPLICIT NONE |
---|
| 597 | |
---|
| 598 | CLASS(sm_class), INTENT(inout) :: this |
---|
| 599 | |
---|
| 600 | |
---|
| 601 | IF ( this%no_shared_memory_in_this_run ) RETURN |
---|
| 602 | |
---|
| 603 | IF ( this%io_grid%nxl == 0 ) THEN |
---|
| 604 | this%io_grid%nxl = this%io_grid%nxl - nbgp |
---|
| 605 | this%io_grid%nnx = this%io_grid%nnx + nbgp |
---|
| 606 | ENDIF |
---|
| 607 | |
---|
| 608 | IF ( this%io_grid%nxr == nx .OR. npex == -1 ) THEN ! npex == -1 if -D__parallel not set |
---|
| 609 | this%io_grid%nxr = this%io_grid%nxr + nbgp |
---|
| 610 | this%io_grid%nnx = this%io_grid%nnx + nbgp |
---|
| 611 | ENDIF |
---|
| 612 | |
---|
| 613 | IF ( this%io_grid%nys == 0 ) THEN |
---|
| 614 | this%io_grid%nys = this%io_grid%nys - nbgp |
---|
| 615 | this%io_grid%nny = this%io_grid%nny + nbgp |
---|
| 616 | ENDIF |
---|
| 617 | |
---|
| 618 | IF ( this%io_grid%nyn == ny .OR. npey == -1 ) THEN ! npey == -1 if -D__parallel not set |
---|
| 619 | this%io_grid%nyn = this%io_grid%nyn + nbgp |
---|
| 620 | this%io_grid%nny = this%io_grid%nny + nbgp |
---|
| 621 | ENDIF |
---|
| 622 | |
---|
| 623 | this%io_grid%nxl = this%io_grid%nxl + nbgp |
---|
| 624 | this%io_grid%nxr = this%io_grid%nxr + nbgp |
---|
| 625 | this%io_grid%nys = this%io_grid%nys + nbgp |
---|
| 626 | this%io_grid%nyn = this%io_grid%nyn + nbgp |
---|
| 627 | this%io_grid%nnx = this%io_grid%nnx |
---|
| 628 | this%io_grid%nny = this%io_grid%nny |
---|
| 629 | |
---|
| 630 | END SUBROUTINE sm_adjust_outer_boundary |
---|
| 631 | |
---|
| 632 | |
---|
| 633 | !--------------------------------------------------------------------------------------------------! |
---|
| 634 | ! Description: |
---|
| 635 | ! ------------ |
---|
| 636 | !> Deallocate shared aray and free related window. |
---|
| 637 | !--------------------------------------------------------------------------------------------------! |
---|
| 638 | SUBROUTINE sm_free_shared( this, win ) |
---|
| 639 | |
---|
| 640 | IMPLICIT NONE |
---|
| 641 | |
---|
| 642 | CLASS(sm_class), INTENT(inout) :: this |
---|
| 643 | |
---|
| 644 | INTEGER(iwp), INTENT(INOUT) :: win |
---|
| 645 | |
---|
| 646 | IF ( this%no_shared_memory_in_this_run .OR. win == -1234567890 ) RETURN |
---|
| 647 | ! win is used just to avoid compile errors because of unused arguments |
---|
| 648 | #if defined( __parallel ) |
---|
| 649 | CALL MPI_WIN_FREE( win, ierr ) |
---|
| 650 | #endif |
---|
| 651 | |
---|
| 652 | END SUBROUTINE sm_free_shared |
---|
| 653 | |
---|
| 654 | |
---|
| 655 | !--------------------------------------------------------------------------------------------------! |
---|
| 656 | ! Description: |
---|
| 657 | ! ------------ |
---|
| 658 | !> ... |
---|
| 659 | !--------------------------------------------------------------------------------------------------! |
---|
| 660 | SUBROUTINE sm_node_barrier( this ) |
---|
| 661 | |
---|
| 662 | IMPLICIT NONE |
---|
| 663 | |
---|
| 664 | CLASS(sm_class), INTENT(inout) :: this |
---|
| 665 | |
---|
| 666 | |
---|
| 667 | IF ( this%no_shared_memory_in_this_run ) RETURN |
---|
| 668 | |
---|
| 669 | #if defined( __parallel ) |
---|
| 670 | CALL MPI_BARRIER( this%comm_shared, ierr ) |
---|
| 671 | #endif |
---|
| 672 | |
---|
| 673 | END SUBROUTINE sm_node_barrier |
---|
| 674 | |
---|
| 675 | END MODULE shared_memory_io_mod |
---|