Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 3216

Last change on this file since 3216 was 2718, checked in by maronga, 7 years ago
deleting of deprecated files; headers updated where needed
Property svn:keywords set to `Id`
File size: 29.7 KB

Rev	Line
[1682]	1	!> @file transpose.f90
[2000]	2	!------------------------------------------------------------------------------!
[2696]	3	! This file is part of the PALM model system.
[1036]	4	!
[2000]	5	! PALM is free software: you can redistribute it and/or modify it under the
	6	! terms of the GNU General Public License as published by the Free Software
	7	! Foundation, either version 3 of the License, or (at your option) any later
	8	! version.
[1036]	9	!
	10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
	11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
	12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
	13	!
	14	! You should have received a copy of the GNU General Public License along with
	15	! PALM. If not, see <http://www.gnu.org/licenses/>.
	16	!
[2718]	17	! Copyright 1997-2018 Leibniz Universitaet Hannover
[2000]	18	!------------------------------------------------------------------------------!
[1036]	19	!
[484]	20	! Current revisions:
[1]	21	! -----------------
[1321]	22	!
[2119]	23	!
[1321]	24	! Former revisions:
	25	! -----------------
	26	! $Id: transpose.f90 2718 2018-01-02 08:49:38Z sward $
[2716]	27	! Corrected "Former revisions" section
	28	!
	29	! 2696 2017-12-14 17:12:51Z kanani
	30	! Change in file header (GPL part)
[1321]	31	!
[2716]	32	! 2119 2017-01-17 16:51:50Z raasch
	33	!
[2119]	34	! 2118 2017-01-17 16:38:49Z raasch
	35	! OpenACC directives removed
	36	!
[2001]	37	! 2000 2016-08-20 18:09:15Z knoop
	38	! Forced header and separation lines into 80 columns
	39	!
[1683]	40	! 1682 2015-10-07 23:56:08Z knoop
	41	! Code annotations made doxygen readable
	42	!
[1325]	43	! 1324 2014-03-21 09:13:16Z suehring
	44	! Bugfix: ONLY statement for module pegrid removed
	45	!
[1321]	46	! 1320 2014-03-20 08:40:49Z raasch
[1320]	47	! ONLY-attribute added to USE-statements,
	48	! kind-parameters added to all INTEGER and REAL declaration statements,
	49	! kinds are defined in new module kinds,
	50	! old module precision_kind is removed,
	51	! revision history before 2012 removed,
	52	! comment fields (!:) to be used for variable explanations added to
	53	! all variable declaration statements
[198]	54	!
[1319]	55	! 1318 2014-03-17 13:35:16Z raasch
	56	! cpu_log_nowait parameter added to cpu measurements of the transpositions
	57	! required for solving the Poisson equation (poisfft),
	58	! module interfaces removed
	59	!
[1258]	60	! 1257 2013-11-08 15:18:40Z raasch
	61	! openacc loop and loop vector clauses removed
	62	!
[1217]	63	! 1216 2013-08-26 09:31:42Z raasch
	64	! re-sorting of the transposed / to be transposed arrays moved to separate
	65	! routines resort_for_...
	66	!
[1112]	67	! 1111 2013-03-08 23:54:10Z raasch
	68	! openACC directives added,
	69	! resorting data from/to work changed, work got 4 dimensions instead of 1
	70	!
[1107]	71	! 1106 2013-03-04 05:31:38Z raasch
	72	! preprocessor lines rearranged so that routines can also be used in serial
	73	! (non-parallel) mode
	74	!
[1093]	75	! 1092 2013-02-02 11:24:22Z raasch
	76	! unused variables removed
	77	!
[1037]	78	! 1036 2012-10-22 13:43:42Z raasch
	79	! code put under GPL (PALM 3.9)
	80	!
[1004]	81	! 1003 2012-09-14 14:35:53Z raasch
	82	! indices nxa, nya, etc. replaced by nx, ny, etc.
	83	!
[1]	84	! Revision 1.1 1997/07/24 11:25:18 raasch
	85	! Initial revision
	86	!
[1216]	87	!------------------------------------------------------------------------------!
	88	! Description:
	89	! ------------
[1682]	90	!> Resorting data for the transposition from x to y. The transposition itself
	91	!> is carried out in transpose_xy
[1216]	92	!------------------------------------------------------------------------------!
[1682]	93	SUBROUTINE resort_for_xy( f_in, f_inv )
	94
[1216]	95
[1320]	96	USE indices, &
	97	ONLY: nx
[1216]	98
[1320]	99	USE kinds
	100
	101	USE transpose_indices, &
	102	ONLY: nxl_z, nxr_z, nyn_x, nyn_z, nys_x, nys_z, nzb_x, nzt_x
	103
[1216]	104	IMPLICIT NONE
	105
[1682]	106	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
	107	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
[1216]	108
	109
[1682]	110	INTEGER(iwp) :: i !<
	111	INTEGER(iwp) :: j !<
	112	INTEGER(iwp) :: k !<
[1]	113	!
[1216]	114	!-- Rearrange indices of input array in order to make data to be send
	115	!-- by MPI contiguous
	116	!$OMP PARALLEL PRIVATE ( i, j, k )
	117	!$OMP DO
	118	DO i = 0, nx
	119	DO k = nzb_x, nzt_x
	120	DO j = nys_x, nyn_x
	121	f_inv(j,k,i) = f_in(i,j,k)
	122	ENDDO
	123	ENDDO
	124	ENDDO
	125	!$OMP END PARALLEL
	126
	127	END SUBROUTINE resort_for_xy
	128
	129
	130	!------------------------------------------------------------------------------!
[1]	131	! Description:
	132	! ------------
[1682]	133	!> Transposition of input array (f_in) from x to y. For the input array, all
	134	!> elements along x reside on the same PE, while after transposition, all
	135	!> elements along y reside on the same PE.
[1]	136	!------------------------------------------------------------------------------!
[1682]	137	SUBROUTINE transpose_xy( f_inv, f_out )
[1]	138
[1682]	139
[1320]	140	USE cpulog, &
	141	ONLY: cpu_log, cpu_log_nowait, log_point_s
	142
	143	USE indices, &
	144	ONLY: nx, ny
	145
	146	USE kinds
	147
[1]	148	USE pegrid
	149
[1320]	150	USE transpose_indices, &
	151	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
	152
[1]	153	IMPLICIT NONE
	154
[1682]	155	INTEGER(iwp) :: i !<
	156	INTEGER(iwp) :: j !<
	157	INTEGER(iwp) :: k !<
	158	INTEGER(iwp) :: l !<
	159	INTEGER(iwp) :: ys !<
[1320]	160
[1682]	161	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
	162	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
[1]	163
[1682]	164	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
[1111]	165
	166
[1106]	167	IF ( numprocs /= 1 ) THEN
	168
	169	#if defined( __parallel )
[1]	170	!
[1106]	171	!-- Transpose array
[1318]	172	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	173	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	174	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
	175	work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
[1106]	176	comm1dy, ierr )
	177	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	178
	179	!
[1106]	180	!-- Reorder transposed array
[1111]	181	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
[683]	182	!$OMP DO
[1106]	183	DO l = 0, pdims(2) - 1
	184	ys = 0 + l * ( nyn_x - nys_x + 1 )
	185	DO i = nxl_y, nxr_y
	186	DO k = nzb_y, nzt_y
	187	DO j = ys, ys + nyn_x - nys_x
[1111]	188	f_out(j,i,k) = work(j-ys+1,k,i,l)
[1106]	189	ENDDO
[1]	190	ENDDO
	191	ENDDO
	192	ENDDO
[683]	193	!$OMP END PARALLEL
[1]	194	#endif
	195
[1106]	196	ELSE
	197
	198	!
	199	!-- Reorder transposed array
	200	!$OMP PARALLEL PRIVATE ( i, j, k )
	201	!$OMP DO
	202	DO k = nzb_y, nzt_y
	203	DO i = nxl_y, nxr_y
	204	DO j = 0, ny
	205	f_out(j,i,k) = f_inv(j,k,i)
	206	ENDDO
	207	ENDDO
	208	ENDDO
	209	!$OMP END PARALLEL
	210
	211	ENDIF
	212
[1]	213	END SUBROUTINE transpose_xy
	214
	215
	216	!------------------------------------------------------------------------------!
	217	! Description:
	218	! ------------
[1682]	219	!> Resorting data after the transposition from x to z. The transposition itself
	220	!> is carried out in transpose_xz
[1216]	221	!------------------------------------------------------------------------------!
[1682]	222	SUBROUTINE resort_for_xz( f_inv, f_out )
[1216]	223
[1682]	224
[1320]	225	USE indices, &
	226	ONLY: nxl, nxr, nyn, nys, nz
[1216]	227
[1320]	228	USE kinds
	229
[1216]	230	IMPLICIT NONE
	231
[1682]	232	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
	233	REAL(wp) :: f_out(1:nz,nys:nyn,nxl:nxr) !<
[1216]	234
[1682]	235	INTEGER(iwp) :: i !<
	236	INTEGER(iwp) :: j !<
	237	INTEGER(iwp) :: k !<
[1216]	238	!
	239	!-- Rearrange indices of input array in order to make data to be send
	240	!-- by MPI contiguous.
	241	!-- In case of parallel fft/transposition, scattered store is faster in
	242	!-- backward direction!!!
	243	!$OMP PARALLEL PRIVATE ( i, j, k )
	244	!$OMP DO
	245	DO k = 1, nz
	246	DO i = nxl, nxr
	247	DO j = nys, nyn
	248	f_out(k,j,i) = f_inv(j,i,k)
	249	ENDDO
	250	ENDDO
	251	ENDDO
	252	!$OMP END PARALLEL
	253
	254	END SUBROUTINE resort_for_xz
	255
	256
	257	!------------------------------------------------------------------------------!
	258	! Description:
	259	! ------------
[1682]	260	!> Transposition of input array (f_in) from x to z. For the input array, all
	261	!> elements along x reside on the same PE, while after transposition, all
	262	!> elements along z reside on the same PE.
[1]	263	!------------------------------------------------------------------------------!
[1682]	264	SUBROUTINE transpose_xz( f_in, f_inv )
[1]	265
[1682]	266
[1320]	267	USE cpulog, &
	268	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	269
[1320]	270	USE indices, &
	271	ONLY: nnx, nx, nxl, nxr, ny, nyn, nys, nz
	272
	273	USE kinds
	274
[1324]	275	USE pegrid
[1320]	276
	277	USE transpose_indices, &
	278	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	279
[1]	280	IMPLICIT NONE
	281
[1682]	282	INTEGER(iwp) :: i !<
	283	INTEGER(iwp) :: j !<
	284	INTEGER(iwp) :: k !<
	285	INTEGER(iwp) :: l !<
	286	INTEGER(iwp) :: xs !<
[1]	287
[1682]	288	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
	289	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
[1]	290
[1682]	291	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
[1111]	292
[1320]	293
[1]	294	!
	295	!-- If the PE grid is one-dimensional along y, the array has only to be
	296	!-- reordered locally and therefore no transposition has to be done.
	297	IF ( pdims(1) /= 1 ) THEN
[1106]	298
	299	#if defined( __parallel )
[1]	300	!
	301	!-- Reorder input array for transposition
[1111]	302	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
[683]	303	!$OMP DO
[1]	304	DO l = 0, pdims(1) - 1
	305	xs = 0 + l * nnx
[1003]	306	DO k = nzb_x, nzt_x
[164]	307	DO i = xs, xs + nnx - 1
[1003]	308	DO j = nys_x, nyn_x
[1111]	309	work(j,i-xs+1,k,l) = f_in(i,j,k)
[1]	310	ENDDO
	311	ENDDO
	312	ENDDO
	313	ENDDO
[683]	314	!$OMP END PARALLEL
[1]	315
	316	!
	317	!-- Transpose array
[1318]	318	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[622]	319	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	320	CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
	321	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
[1]	322	comm1dx, ierr )
	323	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1106]	324	#endif
	325
[1]	326	ELSE
[1106]	327
[1]	328	!
	329	!-- Reorder the array in a way that the z index is in first position
[683]	330	!$OMP PARALLEL PRIVATE ( i, j, k )
	331	!$OMP DO
[1003]	332	DO i = nxl, nxr
	333	DO j = nys, nyn
	334	DO k = 1, nz
[164]	335	f_inv(j,i,k) = f_in(i,j,k)
[1]	336	ENDDO
	337	ENDDO
	338	ENDDO
[683]	339	!$OMP END PARALLEL
[1]	340
[164]	341	ENDIF
	342
[1]	343	END SUBROUTINE transpose_xz
	344
	345
	346	!------------------------------------------------------------------------------!
	347	! Description:
	348	! ------------
[1682]	349	!> Resorting data after the transposition from y to x. The transposition itself
	350	!> is carried out in transpose_yx
[1216]	351	!------------------------------------------------------------------------------!
[1682]	352	SUBROUTINE resort_for_yx( f_inv, f_out )
[1216]	353
[1682]	354
[1320]	355	USE indices, &
	356	ONLY: nx
[1216]	357
[1320]	358	USE kinds
	359
	360	USE transpose_indices, &
	361	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	362
[1216]	363	IMPLICIT NONE
	364
[1682]	365	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
	366	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
[1216]	367
	368
[1682]	369	INTEGER(iwp) :: i !<
	370	INTEGER(iwp) :: j !<
	371	INTEGER(iwp) :: k !<
[1216]	372	!
	373	!-- Rearrange indices of input array in order to make data to be send
	374	!-- by MPI contiguous
	375	!$OMP PARALLEL PRIVATE ( i, j, k )
	376	!$OMP DO
	377	DO i = 0, nx
	378	DO k = nzb_x, nzt_x
	379	DO j = nys_x, nyn_x
	380	f_out(i,j,k) = f_inv(j,k,i)
	381	ENDDO
	382	ENDDO
	383	ENDDO
	384	!$OMP END PARALLEL
	385
	386	END SUBROUTINE resort_for_yx
	387
	388
	389	!------------------------------------------------------------------------------!
	390	! Description:
	391	! ------------
[1682]	392	!> Transposition of input array (f_in) from y to x. For the input array, all
	393	!> elements along y reside on the same PE, while after transposition, all
	394	!> elements along x reside on the same PE.
[1]	395	!------------------------------------------------------------------------------!
[1682]	396	SUBROUTINE transpose_yx( f_in, f_inv )
[1]	397
[1682]	398
[1320]	399	USE cpulog, &
	400	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	401
[1320]	402	USE indices, &
	403	ONLY: nx, ny
	404
	405	USE kinds
	406
[1324]	407	USE pegrid
[1320]	408
	409	USE transpose_indices, &
	410	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
	411
[1]	412	IMPLICIT NONE
	413
[1682]	414	INTEGER(iwp) :: i !<
	415	INTEGER(iwp) :: j !<
	416	INTEGER(iwp) :: k !<
	417	INTEGER(iwp) :: l !<
	418	INTEGER(iwp) :: ys !<
[1]	419
[1682]	420	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
	421	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
[1111]	422
[1682]	423	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
[1111]	424
[1320]	425
[1106]	426	IF ( numprocs /= 1 ) THEN
	427
[1]	428	#if defined( __parallel )
	429	!
[1106]	430	!-- Reorder input array for transposition
[1111]	431	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
[683]	432	!$OMP DO
[1106]	433	DO l = 0, pdims(2) - 1
	434	ys = 0 + l * ( nyn_x - nys_x + 1 )
	435	DO i = nxl_y, nxr_y
	436	DO k = nzb_y, nzt_y
	437	DO j = ys, ys + nyn_x - nys_x
[1111]	438	work(j-ys+1,k,i,l) = f_in(j,i,k)
[1106]	439	ENDDO
	440	ENDDO
	441	ENDDO
	442	ENDDO
	443	!$OMP END PARALLEL
	444
	445	!
	446	!-- Transpose array
[1318]	447	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	448	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	449	CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
	450	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
[1106]	451	comm1dy, ierr )
	452	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	453	#endif
	454
	455	ELSE
	456
	457	!
	458	!-- Reorder array f_in the same way as ALLTOALL did it
	459	!$OMP PARALLEL PRIVATE ( i, j, k )
	460	!$OMP DO
[1003]	461	DO i = nxl_y, nxr_y
	462	DO k = nzb_y, nzt_y
[1106]	463	DO j = 0, ny
	464	f_inv(j,k,i) = f_in(j,i,k)
[1]	465	ENDDO
	466	ENDDO
	467	ENDDO
[683]	468	!$OMP END PARALLEL
[1]	469
[1106]	470	ENDIF
[1]	471
	472	END SUBROUTINE transpose_yx
	473
	474
	475	!------------------------------------------------------------------------------!
	476	! Description:
	477	! ------------
[1682]	478	!> Transposition of input array (f_in) from y to x. For the input array, all
	479	!> elements along y reside on the same PE, while after transposition, all
	480	!> elements along x reside on the same PE.
	481	!> This is a direct transposition for arrays with indices in regular order
	482	!> (k,j,i) (cf. transpose_yx).
[1]	483	!------------------------------------------------------------------------------!
[1682]	484	SUBROUTINE transpose_yxd( f_in, f_out )
[1]	485
[1682]	486
[1320]	487	USE cpulog, &
	488	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	489
[1320]	490	USE indices, &
	491	ONLY: nnx, nny, nnz, nx, nxl, nxr, nyn, nys, nz
	492
	493	USE kinds
	494
[1324]	495	USE pegrid
[1320]	496
	497	USE transpose_indices, &
	498	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	499
[1]	500	IMPLICIT NONE
	501
[1682]	502	INTEGER(iwp) :: i !<
	503	INTEGER(iwp) :: j !<
	504	INTEGER(iwp) :: k !<
	505	INTEGER(iwp) :: l !<
	506	INTEGER(iwp) :: m !<
	507	INTEGER(iwp) :: xs !<
[1]	508
[1682]	509	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
	510	REAL(wp) :: f_inv(nxl:nxr,1:nz,nys:nyn) !<
	511	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
	512	REAL(wp) :: work(nnxnnynnz) !<
[1]	513	#if defined( __parallel )
	514
	515	!
	516	!-- Rearrange indices of input array in order to make data to be send
	517	!-- by MPI contiguous
[1003]	518	DO k = 1, nz
	519	DO j = nys, nyn
	520	DO i = nxl, nxr
[164]	521	f_inv(i,k,j) = f_in(k,j,i)
[1]	522	ENDDO
	523	ENDDO
	524	ENDDO
	525
	526	!
	527	!-- Transpose array
	528	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	529	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	530	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
[164]	531	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	532	comm1dx, ierr )
	533	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	534
	535	!
	536	!-- Reorder transposed array
	537	m = 0
	538	DO l = 0, pdims(1) - 1
	539	xs = 0 + l * nnx
[1003]	540	DO j = nys_x, nyn_x
	541	DO k = 1, nz
[1]	542	DO i = xs, xs + nnx - 1
	543	m = m + 1
[164]	544	f_out(i,j,k) = work(m)
[1]	545	ENDDO
	546	ENDDO
	547	ENDDO
	548	ENDDO
	549
	550	#endif
	551
	552	END SUBROUTINE transpose_yxd
	553
	554
	555	!------------------------------------------------------------------------------!
	556	! Description:
	557	! ------------
[1682]	558	!> Resorting data for the transposition from y to z. The transposition itself
	559	!> is carried out in transpose_yz
[1216]	560	!------------------------------------------------------------------------------!
[1682]	561	SUBROUTINE resort_for_yz( f_in, f_inv )
[1216]	562
[1682]	563
[1320]	564	USE indices, &
	565	ONLY: ny
[1216]	566
[1320]	567	USE kinds
	568
	569	USE transpose_indices, &
	570	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
	571
[1216]	572	IMPLICIT NONE
	573
[1682]	574	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
	575	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
[1216]	576
[1682]	577	INTEGER(iwp) :: i !<
	578	INTEGER(iwp) :: j !<
	579	INTEGER(iwp) :: k !<
[1216]	580
	581	!
	582	!-- Rearrange indices of input array in order to make data to be send
	583	!-- by MPI contiguous
	584	!$OMP PARALLEL PRIVATE ( i, j, k )
	585	!$OMP DO
	586	DO j = 0, ny
	587	DO k = nzb_y, nzt_y
	588	DO i = nxl_y, nxr_y
	589	f_inv(i,k,j) = f_in(j,i,k)
	590	ENDDO
	591	ENDDO
	592	ENDDO
	593	!$OMP END PARALLEL
	594
	595	END SUBROUTINE resort_for_yz
	596
	597
	598	!------------------------------------------------------------------------------!
	599	! Description:
	600	! ------------
[1682]	601	!> Transposition of input array (f_in) from y to z. For the input array, all
	602	!> elements along y reside on the same PE, while after transposition, all
	603	!> elements along z reside on the same PE.
[1]	604	!------------------------------------------------------------------------------!
[1682]	605	SUBROUTINE transpose_yz( f_inv, f_out )
[1]	606
[1682]	607
[1320]	608	USE cpulog, &
	609	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	610
[1320]	611	USE indices, &
	612	ONLY: ny, nz
	613
	614	USE kinds
	615
[1324]	616	USE pegrid
[1320]	617
	618	USE transpose_indices, &
	619	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
	620
[1]	621	IMPLICIT NONE
	622
[1682]	623	INTEGER(iwp) :: i !<
	624	INTEGER(iwp) :: j !<
	625	INTEGER(iwp) :: k !<
	626	INTEGER(iwp) :: l !<
	627	INTEGER(iwp) :: zs !<
[1]	628
[1682]	629	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
	630	REAL(wp) :: f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
[1111]	631
[1682]	632	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
[1111]	633
[1320]	634
[1]	635	!
	636	!-- If the PE grid is one-dimensional along y, only local reordering
	637	!-- of the data is necessary and no transposition has to be done.
	638	IF ( pdims(1) == 1 ) THEN
[1106]	639
[683]	640	!$OMP PARALLEL PRIVATE ( i, j, k )
	641	!$OMP DO
[1003]	642	DO j = 0, ny
	643	DO k = nzb_y, nzt_y
	644	DO i = nxl_y, nxr_y
[164]	645	f_out(i,j,k) = f_inv(i,k,j)
[1]	646	ENDDO
	647	ENDDO
	648	ENDDO
[683]	649	!$OMP END PARALLEL
[1]	650
[1106]	651	ELSE
	652
	653	#if defined( __parallel )
[1]	654	!
[1106]	655	!-- Transpose array
[1318]	656	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	657	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	658	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
	659	work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
[1106]	660	comm1dx, ierr )
	661	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	662
	663	!
[1106]	664	!-- Reorder transposed array
[1111]	665	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
[683]	666	!$OMP DO
[1106]	667	DO l = 0, pdims(1) - 1
	668	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	669	DO j = nys_z, nyn_z
	670	DO k = zs, zs + nzt_y - nzb_y
	671	DO i = nxl_z, nxr_z
[1111]	672	f_out(i,j,k) = work(i,k-zs+1,j,l)
[1106]	673	ENDDO
[1]	674	ENDDO
	675	ENDDO
	676	ENDDO
[683]	677	!$OMP END PARALLEL
[1]	678	#endif
	679
[1106]	680	ENDIF
	681
[1]	682	END SUBROUTINE transpose_yz
	683
	684
	685	!------------------------------------------------------------------------------!
	686	! Description:
	687	! ------------
[1682]	688	!> Resorting data for the transposition from z to x. The transposition itself
	689	!> is carried out in transpose_zx
[1216]	690	!------------------------------------------------------------------------------!
[1682]	691	SUBROUTINE resort_for_zx( f_in, f_inv )
[1216]	692
[1682]	693
[1320]	694	USE indices, &
	695	ONLY: nxl, nxr, nyn, nys, nz
[1216]	696
[1320]	697	USE kinds
	698
[1216]	699	IMPLICIT NONE
	700
[1682]	701	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
	702	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
[1216]	703
[1682]	704	INTEGER(iwp) :: i !<
	705	INTEGER(iwp) :: j !<
	706	INTEGER(iwp) :: k !<
[1216]	707
	708	!
	709	!-- Rearrange indices of input array in order to make data to be send
	710	!-- by MPI contiguous
	711	!$OMP PARALLEL PRIVATE ( i, j, k )
	712	!$OMP DO
	713	DO k = 1,nz
	714	DO i = nxl, nxr
	715	DO j = nys, nyn
	716	f_inv(j,i,k) = f_in(k,j,i)
	717	ENDDO
	718	ENDDO
	719	ENDDO
	720	!$OMP END PARALLEL
	721
	722	END SUBROUTINE resort_for_zx
	723
	724
	725	!------------------------------------------------------------------------------!
	726	! Description:
	727	! ------------
[1682]	728	!> Transposition of input array (f_in) from z to x. For the input array, all
	729	!> elements along z reside on the same PE, while after transposition, all
	730	!> elements along x reside on the same PE.
[1]	731	!------------------------------------------------------------------------------!
[1682]	732	SUBROUTINE transpose_zx( f_inv, f_out )
[1]	733
[1682]	734
[1320]	735	USE cpulog, &
	736	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	737
[1320]	738	USE indices, &
	739	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
	740
	741	USE kinds
	742
[1324]	743	USE pegrid
[1320]	744
	745	USE transpose_indices, &
	746	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	747
[1]	748	IMPLICIT NONE
	749
[1682]	750	INTEGER(iwp) :: i !<
	751	INTEGER(iwp) :: j !<
	752	INTEGER(iwp) :: k !<
	753	INTEGER(iwp) :: l !<
	754	INTEGER(iwp) :: xs !<
[1]	755
[1682]	756	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
	757	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
[1111]	758
[1682]	759	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
[1]	760
[1320]	761
[1]	762	!
	763	!-- If the PE grid is one-dimensional along y, only local reordering
	764	!-- of the data is necessary and no transposition has to be done.
	765	IF ( pdims(1) == 1 ) THEN
[1106]	766
[683]	767	!$OMP PARALLEL PRIVATE ( i, j, k )
	768	!$OMP DO
[1003]	769	DO k = 1, nz
	770	DO i = nxl, nxr
	771	DO j = nys, nyn
[164]	772	f_out(i,j,k) = f_inv(j,i,k)
[1]	773	ENDDO
	774	ENDDO
	775	ENDDO
[683]	776	!$OMP END PARALLEL
[1]	777
[1106]	778	ELSE
	779
	780	#if defined( __parallel )
[1]	781	!
[1106]	782	!-- Transpose array
[1318]	783	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	784	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	785	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
	786	work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
[1106]	787	comm1dx, ierr )
	788	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	789
	790	!
[1106]	791	!-- Reorder transposed array
[1111]	792	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
[683]	793	!$OMP DO
[1106]	794	DO l = 0, pdims(1) - 1
	795	xs = 0 + l * nnx
	796	DO k = nzb_x, nzt_x
	797	DO i = xs, xs + nnx - 1
	798	DO j = nys_x, nyn_x
[1111]	799	f_out(i,j,k) = work(j,i-xs+1,k,l)
[1106]	800	ENDDO
[1]	801	ENDDO
	802	ENDDO
	803	ENDDO
[683]	804	!$OMP END PARALLEL
[1]	805	#endif
	806
[1106]	807	ENDIF
	808
[1]	809	END SUBROUTINE transpose_zx
	810
	811
	812	!------------------------------------------------------------------------------!
	813	! Description:
	814	! ------------
[1682]	815	!> Resorting data after the transposition from z to y. The transposition itself
	816	!> is carried out in transpose_zy
[1216]	817	!------------------------------------------------------------------------------!
[1682]	818	SUBROUTINE resort_for_zy( f_inv, f_out )
[1216]	819
[1682]	820
[1320]	821	USE indices, &
	822	ONLY: ny
[1216]	823
[1320]	824	USE kinds
	825
	826	USE transpose_indices, &
	827	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
	828
[1216]	829	IMPLICIT NONE
	830
[1682]	831	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
	832	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
[1216]	833
	834
[1682]	835	INTEGER(iwp) :: i !<
	836	INTEGER(iwp) :: j !<
	837	INTEGER(iwp) :: k !<
[1216]	838
	839	!
	840	!-- Rearrange indices of input array in order to make data to be send
	841	!-- by MPI contiguous
	842	!$OMP PARALLEL PRIVATE ( i, j, k )
	843	!$OMP DO
	844	DO k = nzb_y, nzt_y
	845	DO j = 0, ny
	846	DO i = nxl_y, nxr_y
	847	f_out(j,i,k) = f_inv(i,k,j)
	848	ENDDO
	849	ENDDO
	850	ENDDO
	851	!$OMP END PARALLEL
	852
	853	END SUBROUTINE resort_for_zy
	854
	855
	856	!------------------------------------------------------------------------------!
	857	! Description:
	858	! ------------
[1682]	859	!> Transposition of input array (f_in) from z to y. For the input array, all
	860	!> elements along z reside on the same PE, while after transposition, all
	861	!> elements along y reside on the same PE.
[1]	862	!------------------------------------------------------------------------------!
[1682]	863	SUBROUTINE transpose_zy( f_in, f_inv )
[1]	864
[1682]	865
[1320]	866	USE cpulog, &
	867	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	868
[1320]	869	USE indices, &
	870	ONLY: ny, nz
	871
	872	USE kinds
	873
[1324]	874	USE pegrid
[1320]	875
	876	USE transpose_indices, &
	877	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
	878
[1]	879	IMPLICIT NONE
	880
[1682]	881	INTEGER(iwp) :: i !<
	882	INTEGER(iwp) :: j !<
	883	INTEGER(iwp) :: k !<
	884	INTEGER(iwp) :: l !<
	885	INTEGER(iwp) :: zs !<
[1]	886
[1682]	887	REAL(wp) :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
	888	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
[1111]	889
[1682]	890	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
[1111]	891
[1]	892	!
	893	!-- If the PE grid is one-dimensional along y, the array has only to be
	894	!-- reordered locally and therefore no transposition has to be done.
	895	IF ( pdims(1) /= 1 ) THEN
[1106]	896
	897	#if defined( __parallel )
[1]	898	!
	899	!-- Reorder input array for transposition
[1111]	900	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
[683]	901	!$OMP DO
[1]	902	DO l = 0, pdims(1) - 1
[1003]	903	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	904	DO j = nys_z, nyn_z
	905	DO k = zs, zs + nzt_y - nzb_y
	906	DO i = nxl_z, nxr_z
[1111]	907	work(i,k-zs+1,j,l) = f_in(i,j,k)
[1]	908	ENDDO
	909	ENDDO
	910	ENDDO
	911	ENDDO
[683]	912	!$OMP END PARALLEL
[1]	913
	914	!
	915	!-- Transpose array
[1318]	916	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[622]	917	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	918	CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
	919	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
[1]	920	comm1dx, ierr )
	921	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1106]	922	#endif
[1]	923
	924	ELSE
	925	!
[1106]	926	!-- Reorder the array in the same way like ALLTOALL did it
[683]	927	!$OMP PARALLEL PRIVATE ( i, j, k )
	928	!$OMP DO
[1003]	929	DO k = nzb_y, nzt_y
	930	DO j = 0, ny
	931	DO i = nxl_y, nxr_y
[164]	932	f_inv(i,k,j) = f_in(i,j,k)
	933	ENDDO
	934	ENDDO
	935	ENDDO
[683]	936	!$OMP END PARALLEL
[1106]	937
	938	ENDIF
	939
[1]	940	END SUBROUTINE transpose_zy
	941
	942
	943	!------------------------------------------------------------------------------!
	944	! Description:
	945	! ------------
[1682]	946	!> Transposition of input array (f_in) from z to y. For the input array, all
	947	!> elements along z reside on the same PE, while after transposition, all
	948	!> elements along y reside on the same PE.
	949	!> This is a direct transposition for arrays with indices in regular order
	950	!> (k,j,i) (cf. transpose_zy).
[1]	951	!------------------------------------------------------------------------------!
[1682]	952	SUBROUTINE transpose_zyd( f_in, f_out )
[1]	953
[1682]	954
[1320]	955	USE cpulog, &
	956	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	957
[1320]	958	USE indices, &
	959	ONLY: nnx, nny, nnz, nxl, nxr, nyn, nys, ny, nz
	960
	961	USE kinds
	962
[1324]	963	USE pegrid
[1320]	964
	965	USE transpose_indices, &
	966	ONLY: nxl_y, nxl_yd, nxr_y, nxr_yd, nzb_y, nzb_yd, nzt_y, nzt_yd
	967
[1]	968	IMPLICIT NONE
	969
[1682]	970	INTEGER(iwp) :: i !<
	971	INTEGER(iwp) :: j !<
	972	INTEGER(iwp) :: k !<
	973	INTEGER(iwp) :: l !<
	974	INTEGER(iwp) :: m !<
	975	INTEGER(iwp) :: ys !<
[1]	976
[1682]	977	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
	978	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
	979	REAL(wp) :: f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd) !<
	980	REAL(wp) :: work(nnxnnynnz) !<
[1320]	981
[1]	982	#if defined( __parallel )
	983
	984	!
	985	!-- Rearrange indices of input array in order to make data to be send
	986	!-- by MPI contiguous
[1003]	987	DO i = nxl, nxr
	988	DO j = nys, nyn
	989	DO k = 1, nz
[164]	990	f_inv(j,i,k) = f_in(k,j,i)
[1]	991	ENDDO
	992	ENDDO
	993	ENDDO
	994
	995	!
	996	!-- Move data to different array, because memory location of work1 is
	997	!-- needed further below (work1 = work2).
	998	!-- If the PE grid is one-dimensional along x, only local reordering
	999	!-- of the data is necessary and no transposition has to be done.
	1000	IF ( pdims(2) == 1 ) THEN
[1003]	1001	DO k = 1, nz
	1002	DO i = nxl, nxr
	1003	DO j = nys, nyn
[164]	1004	f_out(j,i,k) = f_inv(j,i,k)
[1]	1005	ENDDO
	1006	ENDDO
	1007	ENDDO
	1008	RETURN
	1009	ENDIF
	1010
	1011	!
	1012	!-- Transpose array
	1013	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	1014	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	1015	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
[164]	1016	work(1), sendrecvcount_zyd, MPI_REAL, &
[1]	1017	comm1dy, ierr )
	1018	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	1019
	1020	!
	1021	!-- Reorder transposed array
	1022	m = 0
	1023	DO l = 0, pdims(2) - 1
	1024	ys = 0 + l * nny
[1003]	1025	DO k = nzb_yd, nzt_yd
	1026	DO i = nxl_yd, nxr_yd
[1]	1027	DO j = ys, ys + nny - 1
	1028	m = m + 1
[164]	1029	f_out(j,i,k) = work(m)
[1]	1030	ENDDO
	1031	ENDDO
	1032	ENDDO
	1033	ENDDO
	1034
	1035	#endif
	1036
	1037	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |