Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 2515

Last change on this file since 2515 was 2119, checked in by raasch, 8 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 29.6 KB

Rev	Line
[1682]	1	!> @file transpose.f90
[2000]	2	!------------------------------------------------------------------------------!
[1036]	3	! This file is part of PALM.
	4	!
[2000]	5	! PALM is free software: you can redistribute it and/or modify it under the
	6	! terms of the GNU General Public License as published by the Free Software
	7	! Foundation, either version 3 of the License, or (at your option) any later
	8	! version.
[1036]	9	!
	10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
	11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
	12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
	13	!
	14	! You should have received a copy of the GNU General Public License along with
	15	! PALM. If not, see <http://www.gnu.org/licenses/>.
	16	!
[2101]	17	! Copyright 1997-2017 Leibniz Universitaet Hannover
[2000]	18	!------------------------------------------------------------------------------!
[1036]	19	!
[484]	20	! Current revisions:
[1]	21	! -----------------
[1321]	22	!
[2119]	23	!
[1321]	24	! Former revisions:
	25	! -----------------
	26	! $Id: transpose.f90 2119 2017-01-17 16:51:50Z kanani $
	27	!
[2119]	28	! 2118 2017-01-17 16:38:49Z raasch
	29	! OpenACC directives removed
	30	!
[2001]	31	! 2000 2016-08-20 18:09:15Z knoop
	32	! Forced header and separation lines into 80 columns
	33	!
[1683]	34	! 1682 2015-10-07 23:56:08Z knoop
	35	! Code annotations made doxygen readable
	36	!
[1325]	37	! 1324 2014-03-21 09:13:16Z suehring
	38	! Bugfix: ONLY statement for module pegrid removed
	39	!
[1321]	40	! 1320 2014-03-20 08:40:49Z raasch
[1320]	41	! ONLY-attribute added to USE-statements,
	42	! kind-parameters added to all INTEGER and REAL declaration statements,
	43	! kinds are defined in new module kinds,
	44	! old module precision_kind is removed,
	45	! revision history before 2012 removed,
	46	! comment fields (!:) to be used for variable explanations added to
	47	! all variable declaration statements
[198]	48	!
[1319]	49	! 1318 2014-03-17 13:35:16Z raasch
	50	! cpu_log_nowait parameter added to cpu measurements of the transpositions
	51	! required for solving the Poisson equation (poisfft),
	52	! module interfaces removed
	53	!
[1258]	54	! 1257 2013-11-08 15:18:40Z raasch
	55	! openacc loop and loop vector clauses removed
	56	!
[1217]	57	! 1216 2013-08-26 09:31:42Z raasch
	58	! re-sorting of the transposed / to be transposed arrays moved to separate
	59	! routines resort_for_...
	60	!
[1112]	61	! 1111 2013-03-08 23:54:10Z raasch
	62	! openACC directives added,
	63	! resorting data from/to work changed, work got 4 dimensions instead of 1
	64	!
[1107]	65	! 1106 2013-03-04 05:31:38Z raasch
	66	! preprocessor lines rearranged so that routines can also be used in serial
	67	! (non-parallel) mode
	68	!
[1093]	69	! 1092 2013-02-02 11:24:22Z raasch
	70	! unused variables removed
	71	!
[1037]	72	! 1036 2012-10-22 13:43:42Z raasch
	73	! code put under GPL (PALM 3.9)
	74	!
[1004]	75	! 1003 2012-09-14 14:35:53Z raasch
	76	! indices nxa, nya, etc. replaced by nx, ny, etc.
	77	!
[1]	78	! Revision 1.1 1997/07/24 11:25:18 raasch
	79	! Initial revision
	80	!
[1216]	81	!------------------------------------------------------------------------------!
	82	! Description:
	83	! ------------
[1682]	84	!> Resorting data for the transposition from x to y. The transposition itself
	85	!> is carried out in transpose_xy
[1216]	86	!------------------------------------------------------------------------------!
[1682]	87	SUBROUTINE resort_for_xy( f_in, f_inv )
	88
[1216]	89
[1320]	90	USE indices, &
	91	ONLY: nx
[1216]	92
[1320]	93	USE kinds
	94
	95	USE transpose_indices, &
	96	ONLY: nxl_z, nxr_z, nyn_x, nyn_z, nys_x, nys_z, nzb_x, nzt_x
	97
[1216]	98	IMPLICIT NONE
	99
[1682]	100	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
	101	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
[1216]	102
	103
[1682]	104	INTEGER(iwp) :: i !<
	105	INTEGER(iwp) :: j !<
	106	INTEGER(iwp) :: k !<
[1]	107	!
[1216]	108	!-- Rearrange indices of input array in order to make data to be send
	109	!-- by MPI contiguous
	110	!$OMP PARALLEL PRIVATE ( i, j, k )
	111	!$OMP DO
	112	DO i = 0, nx
	113	DO k = nzb_x, nzt_x
	114	DO j = nys_x, nyn_x
	115	f_inv(j,k,i) = f_in(i,j,k)
	116	ENDDO
	117	ENDDO
	118	ENDDO
	119	!$OMP END PARALLEL
	120
	121	END SUBROUTINE resort_for_xy
	122
	123
	124	!------------------------------------------------------------------------------!
[1]	125	! Description:
	126	! ------------
[1682]	127	!> Transposition of input array (f_in) from x to y. For the input array, all
	128	!> elements along x reside on the same PE, while after transposition, all
	129	!> elements along y reside on the same PE.
[1]	130	!------------------------------------------------------------------------------!
[1682]	131	SUBROUTINE transpose_xy( f_inv, f_out )
[1]	132
[1682]	133
[1320]	134	USE cpulog, &
	135	ONLY: cpu_log, cpu_log_nowait, log_point_s
	136
	137	USE indices, &
	138	ONLY: nx, ny
	139
	140	USE kinds
	141
[1]	142	USE pegrid
	143
[1320]	144	USE transpose_indices, &
	145	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
	146
[1]	147	IMPLICIT NONE
	148
[1682]	149	INTEGER(iwp) :: i !<
	150	INTEGER(iwp) :: j !<
	151	INTEGER(iwp) :: k !<
	152	INTEGER(iwp) :: l !<
	153	INTEGER(iwp) :: ys !<
[1320]	154
[1682]	155	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
	156	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
[1]	157
[1682]	158	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
[1111]	159
	160
[1106]	161	IF ( numprocs /= 1 ) THEN
	162
	163	#if defined( __parallel )
[1]	164	!
[1106]	165	!-- Transpose array
[1318]	166	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	167	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	168	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
	169	work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
[1106]	170	comm1dy, ierr )
	171	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	172
	173	!
[1106]	174	!-- Reorder transposed array
[1111]	175	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
[683]	176	!$OMP DO
[1106]	177	DO l = 0, pdims(2) - 1
	178	ys = 0 + l * ( nyn_x - nys_x + 1 )
	179	DO i = nxl_y, nxr_y
	180	DO k = nzb_y, nzt_y
	181	DO j = ys, ys + nyn_x - nys_x
[1111]	182	f_out(j,i,k) = work(j-ys+1,k,i,l)
[1106]	183	ENDDO
[1]	184	ENDDO
	185	ENDDO
	186	ENDDO
[683]	187	!$OMP END PARALLEL
[1]	188	#endif
	189
[1106]	190	ELSE
	191
	192	!
	193	!-- Reorder transposed array
	194	!$OMP PARALLEL PRIVATE ( i, j, k )
	195	!$OMP DO
	196	DO k = nzb_y, nzt_y
	197	DO i = nxl_y, nxr_y
	198	DO j = 0, ny
	199	f_out(j,i,k) = f_inv(j,k,i)
	200	ENDDO
	201	ENDDO
	202	ENDDO
	203	!$OMP END PARALLEL
	204
	205	ENDIF
	206
[1]	207	END SUBROUTINE transpose_xy
	208
	209
	210	!------------------------------------------------------------------------------!
	211	! Description:
	212	! ------------
[1682]	213	!> Resorting data after the transposition from x to z. The transposition itself
	214	!> is carried out in transpose_xz
[1216]	215	!------------------------------------------------------------------------------!
[1682]	216	SUBROUTINE resort_for_xz( f_inv, f_out )
[1216]	217
[1682]	218
[1320]	219	USE indices, &
	220	ONLY: nxl, nxr, nyn, nys, nz
[1216]	221
[1320]	222	USE kinds
	223
[1216]	224	IMPLICIT NONE
	225
[1682]	226	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
	227	REAL(wp) :: f_out(1:nz,nys:nyn,nxl:nxr) !<
[1216]	228
[1682]	229	INTEGER(iwp) :: i !<
	230	INTEGER(iwp) :: j !<
	231	INTEGER(iwp) :: k !<
[1216]	232	!
	233	!-- Rearrange indices of input array in order to make data to be send
	234	!-- by MPI contiguous.
	235	!-- In case of parallel fft/transposition, scattered store is faster in
	236	!-- backward direction!!!
	237	!$OMP PARALLEL PRIVATE ( i, j, k )
	238	!$OMP DO
	239	DO k = 1, nz
	240	DO i = nxl, nxr
	241	DO j = nys, nyn
	242	f_out(k,j,i) = f_inv(j,i,k)
	243	ENDDO
	244	ENDDO
	245	ENDDO
	246	!$OMP END PARALLEL
	247
	248	END SUBROUTINE resort_for_xz
	249
	250
	251	!------------------------------------------------------------------------------!
	252	! Description:
	253	! ------------
[1682]	254	!> Transposition of input array (f_in) from x to z. For the input array, all
	255	!> elements along x reside on the same PE, while after transposition, all
	256	!> elements along z reside on the same PE.
[1]	257	!------------------------------------------------------------------------------!
[1682]	258	SUBROUTINE transpose_xz( f_in, f_inv )
[1]	259
[1682]	260
[1320]	261	USE cpulog, &
	262	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	263
[1320]	264	USE indices, &
	265	ONLY: nnx, nx, nxl, nxr, ny, nyn, nys, nz
	266
	267	USE kinds
	268
[1324]	269	USE pegrid
[1320]	270
	271	USE transpose_indices, &
	272	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	273
[1]	274	IMPLICIT NONE
	275
[1682]	276	INTEGER(iwp) :: i !<
	277	INTEGER(iwp) :: j !<
	278	INTEGER(iwp) :: k !<
	279	INTEGER(iwp) :: l !<
	280	INTEGER(iwp) :: xs !<
[1]	281
[1682]	282	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
	283	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
[1]	284
[1682]	285	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
[1111]	286
[1320]	287
[1]	288	!
	289	!-- If the PE grid is one-dimensional along y, the array has only to be
	290	!-- reordered locally and therefore no transposition has to be done.
	291	IF ( pdims(1) /= 1 ) THEN
[1106]	292
	293	#if defined( __parallel )
[1]	294	!
	295	!-- Reorder input array for transposition
[1111]	296	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
[683]	297	!$OMP DO
[1]	298	DO l = 0, pdims(1) - 1
	299	xs = 0 + l * nnx
[1003]	300	DO k = nzb_x, nzt_x
[164]	301	DO i = xs, xs + nnx - 1
[1003]	302	DO j = nys_x, nyn_x
[1111]	303	work(j,i-xs+1,k,l) = f_in(i,j,k)
[1]	304	ENDDO
	305	ENDDO
	306	ENDDO
	307	ENDDO
[683]	308	!$OMP END PARALLEL
[1]	309
	310	!
	311	!-- Transpose array
[1318]	312	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[622]	313	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	314	CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
	315	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
[1]	316	comm1dx, ierr )
	317	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1106]	318	#endif
	319
[1]	320	ELSE
[1106]	321
[1]	322	!
	323	!-- Reorder the array in a way that the z index is in first position
[683]	324	!$OMP PARALLEL PRIVATE ( i, j, k )
	325	!$OMP DO
[1003]	326	DO i = nxl, nxr
	327	DO j = nys, nyn
	328	DO k = 1, nz
[164]	329	f_inv(j,i,k) = f_in(i,j,k)
[1]	330	ENDDO
	331	ENDDO
	332	ENDDO
[683]	333	!$OMP END PARALLEL
[1]	334
[164]	335	ENDIF
	336
[1]	337	END SUBROUTINE transpose_xz
	338
	339
	340	!------------------------------------------------------------------------------!
	341	! Description:
	342	! ------------
[1682]	343	!> Resorting data after the transposition from y to x. The transposition itself
	344	!> is carried out in transpose_yx
[1216]	345	!------------------------------------------------------------------------------!
[1682]	346	SUBROUTINE resort_for_yx( f_inv, f_out )
[1216]	347
[1682]	348
[1320]	349	USE indices, &
	350	ONLY: nx
[1216]	351
[1320]	352	USE kinds
	353
	354	USE transpose_indices, &
	355	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	356
[1216]	357	IMPLICIT NONE
	358
[1682]	359	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
	360	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
[1216]	361
	362
[1682]	363	INTEGER(iwp) :: i !<
	364	INTEGER(iwp) :: j !<
	365	INTEGER(iwp) :: k !<
[1216]	366	!
	367	!-- Rearrange indices of input array in order to make data to be send
	368	!-- by MPI contiguous
	369	!$OMP PARALLEL PRIVATE ( i, j, k )
	370	!$OMP DO
	371	DO i = 0, nx
	372	DO k = nzb_x, nzt_x
	373	DO j = nys_x, nyn_x
	374	f_out(i,j,k) = f_inv(j,k,i)
	375	ENDDO
	376	ENDDO
	377	ENDDO
	378	!$OMP END PARALLEL
	379
	380	END SUBROUTINE resort_for_yx
	381
	382
	383	!------------------------------------------------------------------------------!
	384	! Description:
	385	! ------------
[1682]	386	!> Transposition of input array (f_in) from y to x. For the input array, all
	387	!> elements along y reside on the same PE, while after transposition, all
	388	!> elements along x reside on the same PE.
[1]	389	!------------------------------------------------------------------------------!
[1682]	390	SUBROUTINE transpose_yx( f_in, f_inv )
[1]	391
[1682]	392
[1320]	393	USE cpulog, &
	394	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	395
[1320]	396	USE indices, &
	397	ONLY: nx, ny
	398
	399	USE kinds
	400
[1324]	401	USE pegrid
[1320]	402
	403	USE transpose_indices, &
	404	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
	405
[1]	406	IMPLICIT NONE
	407
[1682]	408	INTEGER(iwp) :: i !<
	409	INTEGER(iwp) :: j !<
	410	INTEGER(iwp) :: k !<
	411	INTEGER(iwp) :: l !<
	412	INTEGER(iwp) :: ys !<
[1]	413
[1682]	414	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
	415	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
[1111]	416
[1682]	417	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
[1111]	418
[1320]	419
[1106]	420	IF ( numprocs /= 1 ) THEN
	421
[1]	422	#if defined( __parallel )
	423	!
[1106]	424	!-- Reorder input array for transposition
[1111]	425	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
[683]	426	!$OMP DO
[1106]	427	DO l = 0, pdims(2) - 1
	428	ys = 0 + l * ( nyn_x - nys_x + 1 )
	429	DO i = nxl_y, nxr_y
	430	DO k = nzb_y, nzt_y
	431	DO j = ys, ys + nyn_x - nys_x
[1111]	432	work(j-ys+1,k,i,l) = f_in(j,i,k)
[1106]	433	ENDDO
	434	ENDDO
	435	ENDDO
	436	ENDDO
	437	!$OMP END PARALLEL
	438
	439	!
	440	!-- Transpose array
[1318]	441	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	442	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	443	CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
	444	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
[1106]	445	comm1dy, ierr )
	446	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	447	#endif
	448
	449	ELSE
	450
	451	!
	452	!-- Reorder array f_in the same way as ALLTOALL did it
	453	!$OMP PARALLEL PRIVATE ( i, j, k )
	454	!$OMP DO
[1003]	455	DO i = nxl_y, nxr_y
	456	DO k = nzb_y, nzt_y
[1106]	457	DO j = 0, ny
	458	f_inv(j,k,i) = f_in(j,i,k)
[1]	459	ENDDO
	460	ENDDO
	461	ENDDO
[683]	462	!$OMP END PARALLEL
[1]	463
[1106]	464	ENDIF
[1]	465
	466	END SUBROUTINE transpose_yx
	467
	468
	469	!------------------------------------------------------------------------------!
	470	! Description:
	471	! ------------
[1682]	472	!> Transposition of input array (f_in) from y to x. For the input array, all
	473	!> elements along y reside on the same PE, while after transposition, all
	474	!> elements along x reside on the same PE.
	475	!> This is a direct transposition for arrays with indices in regular order
	476	!> (k,j,i) (cf. transpose_yx).
[1]	477	!------------------------------------------------------------------------------!
[1682]	478	SUBROUTINE transpose_yxd( f_in, f_out )
[1]	479
[1682]	480
[1320]	481	USE cpulog, &
	482	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	483
[1320]	484	USE indices, &
	485	ONLY: nnx, nny, nnz, nx, nxl, nxr, nyn, nys, nz
	486
	487	USE kinds
	488
[1324]	489	USE pegrid
[1320]	490
	491	USE transpose_indices, &
	492	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	493
[1]	494	IMPLICIT NONE
	495
[1682]	496	INTEGER(iwp) :: i !<
	497	INTEGER(iwp) :: j !<
	498	INTEGER(iwp) :: k !<
	499	INTEGER(iwp) :: l !<
	500	INTEGER(iwp) :: m !<
	501	INTEGER(iwp) :: xs !<
[1]	502
[1682]	503	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
	504	REAL(wp) :: f_inv(nxl:nxr,1:nz,nys:nyn) !<
	505	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
	506	REAL(wp) :: work(nnxnnynnz) !<
[1]	507	#if defined( __parallel )
	508
	509	!
	510	!-- Rearrange indices of input array in order to make data to be send
	511	!-- by MPI contiguous
[1003]	512	DO k = 1, nz
	513	DO j = nys, nyn
	514	DO i = nxl, nxr
[164]	515	f_inv(i,k,j) = f_in(k,j,i)
[1]	516	ENDDO
	517	ENDDO
	518	ENDDO
	519
	520	!
	521	!-- Transpose array
	522	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	523	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	524	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
[164]	525	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	526	comm1dx, ierr )
	527	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	528
	529	!
	530	!-- Reorder transposed array
	531	m = 0
	532	DO l = 0, pdims(1) - 1
	533	xs = 0 + l * nnx
[1003]	534	DO j = nys_x, nyn_x
	535	DO k = 1, nz
[1]	536	DO i = xs, xs + nnx - 1
	537	m = m + 1
[164]	538	f_out(i,j,k) = work(m)
[1]	539	ENDDO
	540	ENDDO
	541	ENDDO
	542	ENDDO
	543
	544	#endif
	545
	546	END SUBROUTINE transpose_yxd
	547
	548
	549	!------------------------------------------------------------------------------!
	550	! Description:
	551	! ------------
[1682]	552	!> Resorting data for the transposition from y to z. The transposition itself
	553	!> is carried out in transpose_yz
[1216]	554	!------------------------------------------------------------------------------!
[1682]	555	SUBROUTINE resort_for_yz( f_in, f_inv )
[1216]	556
[1682]	557
[1320]	558	USE indices, &
	559	ONLY: ny
[1216]	560
[1320]	561	USE kinds
	562
	563	USE transpose_indices, &
	564	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
	565
[1216]	566	IMPLICIT NONE
	567
[1682]	568	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
	569	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
[1216]	570
[1682]	571	INTEGER(iwp) :: i !<
	572	INTEGER(iwp) :: j !<
	573	INTEGER(iwp) :: k !<
[1216]	574
	575	!
	576	!-- Rearrange indices of input array in order to make data to be send
	577	!-- by MPI contiguous
	578	!$OMP PARALLEL PRIVATE ( i, j, k )
	579	!$OMP DO
	580	DO j = 0, ny
	581	DO k = nzb_y, nzt_y
	582	DO i = nxl_y, nxr_y
	583	f_inv(i,k,j) = f_in(j,i,k)
	584	ENDDO
	585	ENDDO
	586	ENDDO
	587	!$OMP END PARALLEL
	588
	589	END SUBROUTINE resort_for_yz
	590
	591
	592	!------------------------------------------------------------------------------!
	593	! Description:
	594	! ------------
[1682]	595	!> Transposition of input array (f_in) from y to z. For the input array, all
	596	!> elements along y reside on the same PE, while after transposition, all
	597	!> elements along z reside on the same PE.
[1]	598	!------------------------------------------------------------------------------!
[1682]	599	SUBROUTINE transpose_yz( f_inv, f_out )
[1]	600
[1682]	601
[1320]	602	USE cpulog, &
	603	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	604
[1320]	605	USE indices, &
	606	ONLY: ny, nz
	607
	608	USE kinds
	609
[1324]	610	USE pegrid
[1320]	611
	612	USE transpose_indices, &
	613	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
	614
[1]	615	IMPLICIT NONE
	616
[1682]	617	INTEGER(iwp) :: i !<
	618	INTEGER(iwp) :: j !<
	619	INTEGER(iwp) :: k !<
	620	INTEGER(iwp) :: l !<
	621	INTEGER(iwp) :: zs !<
[1]	622
[1682]	623	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
	624	REAL(wp) :: f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
[1111]	625
[1682]	626	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
[1111]	627
[1320]	628
[1]	629	!
	630	!-- If the PE grid is one-dimensional along y, only local reordering
	631	!-- of the data is necessary and no transposition has to be done.
	632	IF ( pdims(1) == 1 ) THEN
[1106]	633
[683]	634	!$OMP PARALLEL PRIVATE ( i, j, k )
	635	!$OMP DO
[1003]	636	DO j = 0, ny
	637	DO k = nzb_y, nzt_y
	638	DO i = nxl_y, nxr_y
[164]	639	f_out(i,j,k) = f_inv(i,k,j)
[1]	640	ENDDO
	641	ENDDO
	642	ENDDO
[683]	643	!$OMP END PARALLEL
[1]	644
[1106]	645	ELSE
	646
	647	#if defined( __parallel )
[1]	648	!
[1106]	649	!-- Transpose array
[1318]	650	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	651	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	652	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
	653	work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
[1106]	654	comm1dx, ierr )
	655	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	656
	657	!
[1106]	658	!-- Reorder transposed array
[1111]	659	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
[683]	660	!$OMP DO
[1106]	661	DO l = 0, pdims(1) - 1
	662	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	663	DO j = nys_z, nyn_z
	664	DO k = zs, zs + nzt_y - nzb_y
	665	DO i = nxl_z, nxr_z
[1111]	666	f_out(i,j,k) = work(i,k-zs+1,j,l)
[1106]	667	ENDDO
[1]	668	ENDDO
	669	ENDDO
	670	ENDDO
[683]	671	!$OMP END PARALLEL
[1]	672	#endif
	673
[1106]	674	ENDIF
	675
[1]	676	END SUBROUTINE transpose_yz
	677
	678
	679	!------------------------------------------------------------------------------!
	680	! Description:
	681	! ------------
[1682]	682	!> Resorting data for the transposition from z to x. The transposition itself
	683	!> is carried out in transpose_zx
[1216]	684	!------------------------------------------------------------------------------!
[1682]	685	SUBROUTINE resort_for_zx( f_in, f_inv )
[1216]	686
[1682]	687
[1320]	688	USE indices, &
	689	ONLY: nxl, nxr, nyn, nys, nz
[1216]	690
[1320]	691	USE kinds
	692
[1216]	693	IMPLICIT NONE
	694
[1682]	695	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
	696	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
[1216]	697
[1682]	698	INTEGER(iwp) :: i !<
	699	INTEGER(iwp) :: j !<
	700	INTEGER(iwp) :: k !<
[1216]	701
	702	!
	703	!-- Rearrange indices of input array in order to make data to be send
	704	!-- by MPI contiguous
	705	!$OMP PARALLEL PRIVATE ( i, j, k )
	706	!$OMP DO
	707	DO k = 1,nz
	708	DO i = nxl, nxr
	709	DO j = nys, nyn
	710	f_inv(j,i,k) = f_in(k,j,i)
	711	ENDDO
	712	ENDDO
	713	ENDDO
	714	!$OMP END PARALLEL
	715
	716	END SUBROUTINE resort_for_zx
	717
	718
	719	!------------------------------------------------------------------------------!
	720	! Description:
	721	! ------------
[1682]	722	!> Transposition of input array (f_in) from z to x. For the input array, all
	723	!> elements along z reside on the same PE, while after transposition, all
	724	!> elements along x reside on the same PE.
[1]	725	!------------------------------------------------------------------------------!
[1682]	726	SUBROUTINE transpose_zx( f_inv, f_out )
[1]	727
[1682]	728
[1320]	729	USE cpulog, &
	730	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	731
[1320]	732	USE indices, &
	733	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
	734
	735	USE kinds
	736
[1324]	737	USE pegrid
[1320]	738
	739	USE transpose_indices, &
	740	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	741
[1]	742	IMPLICIT NONE
	743
[1682]	744	INTEGER(iwp) :: i !<
	745	INTEGER(iwp) :: j !<
	746	INTEGER(iwp) :: k !<
	747	INTEGER(iwp) :: l !<
	748	INTEGER(iwp) :: xs !<
[1]	749
[1682]	750	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
	751	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
[1111]	752
[1682]	753	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
[1]	754
[1320]	755
[1]	756	!
	757	!-- If the PE grid is one-dimensional along y, only local reordering
	758	!-- of the data is necessary and no transposition has to be done.
	759	IF ( pdims(1) == 1 ) THEN
[1106]	760
[683]	761	!$OMP PARALLEL PRIVATE ( i, j, k )
	762	!$OMP DO
[1003]	763	DO k = 1, nz
	764	DO i = nxl, nxr
	765	DO j = nys, nyn
[164]	766	f_out(i,j,k) = f_inv(j,i,k)
[1]	767	ENDDO
	768	ENDDO
	769	ENDDO
[683]	770	!$OMP END PARALLEL
[1]	771
[1106]	772	ELSE
	773
	774	#if defined( __parallel )
[1]	775	!
[1106]	776	!-- Transpose array
[1318]	777	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	778	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	779	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
	780	work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
[1106]	781	comm1dx, ierr )
	782	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	783
	784	!
[1106]	785	!-- Reorder transposed array
[1111]	786	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
[683]	787	!$OMP DO
[1106]	788	DO l = 0, pdims(1) - 1
	789	xs = 0 + l * nnx
	790	DO k = nzb_x, nzt_x
	791	DO i = xs, xs + nnx - 1
	792	DO j = nys_x, nyn_x
[1111]	793	f_out(i,j,k) = work(j,i-xs+1,k,l)
[1106]	794	ENDDO
[1]	795	ENDDO
	796	ENDDO
	797	ENDDO
[683]	798	!$OMP END PARALLEL
[1]	799	#endif
	800
[1106]	801	ENDIF
	802
[1]	803	END SUBROUTINE transpose_zx
	804
	805
	806	!------------------------------------------------------------------------------!
	807	! Description:
	808	! ------------
[1682]	809	!> Resorting data after the transposition from z to y. The transposition itself
	810	!> is carried out in transpose_zy
[1216]	811	!------------------------------------------------------------------------------!
[1682]	812	SUBROUTINE resort_for_zy( f_inv, f_out )
[1216]	813
[1682]	814
[1320]	815	USE indices, &
	816	ONLY: ny
[1216]	817
[1320]	818	USE kinds
	819
	820	USE transpose_indices, &
	821	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
	822
[1216]	823	IMPLICIT NONE
	824
[1682]	825	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
	826	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
[1216]	827
	828
[1682]	829	INTEGER(iwp) :: i !<
	830	INTEGER(iwp) :: j !<
	831	INTEGER(iwp) :: k !<
[1216]	832
	833	!
	834	!-- Rearrange indices of input array in order to make data to be send
	835	!-- by MPI contiguous
	836	!$OMP PARALLEL PRIVATE ( i, j, k )
	837	!$OMP DO
	838	DO k = nzb_y, nzt_y
	839	DO j = 0, ny
	840	DO i = nxl_y, nxr_y
	841	f_out(j,i,k) = f_inv(i,k,j)
	842	ENDDO
	843	ENDDO
	844	ENDDO
	845	!$OMP END PARALLEL
	846
	847	END SUBROUTINE resort_for_zy
	848
	849
	850	!------------------------------------------------------------------------------!
	851	! Description:
	852	! ------------
[1682]	853	!> Transposition of input array (f_in) from z to y. For the input array, all
	854	!> elements along z reside on the same PE, while after transposition, all
	855	!> elements along y reside on the same PE.
[1]	856	!------------------------------------------------------------------------------!
[1682]	857	SUBROUTINE transpose_zy( f_in, f_inv )
[1]	858
[1682]	859
[1320]	860	USE cpulog, &
	861	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	862
[1320]	863	USE indices, &
	864	ONLY: ny, nz
	865
	866	USE kinds
	867
[1324]	868	USE pegrid
[1320]	869
	870	USE transpose_indices, &
	871	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
	872
[1]	873	IMPLICIT NONE
	874
[1682]	875	INTEGER(iwp) :: i !<
	876	INTEGER(iwp) :: j !<
	877	INTEGER(iwp) :: k !<
	878	INTEGER(iwp) :: l !<
	879	INTEGER(iwp) :: zs !<
[1]	880
[1682]	881	REAL(wp) :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
	882	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
[1111]	883
[1682]	884	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
[1111]	885
[1]	886	!
	887	!-- If the PE grid is one-dimensional along y, the array has only to be
	888	!-- reordered locally and therefore no transposition has to be done.
	889	IF ( pdims(1) /= 1 ) THEN
[1106]	890
	891	#if defined( __parallel )
[1]	892	!
	893	!-- Reorder input array for transposition
[1111]	894	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
[683]	895	!$OMP DO
[1]	896	DO l = 0, pdims(1) - 1
[1003]	897	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	898	DO j = nys_z, nyn_z
	899	DO k = zs, zs + nzt_y - nzb_y
	900	DO i = nxl_z, nxr_z
[1111]	901	work(i,k-zs+1,j,l) = f_in(i,j,k)
[1]	902	ENDDO
	903	ENDDO
	904	ENDDO
	905	ENDDO
[683]	906	!$OMP END PARALLEL
[1]	907
	908	!
	909	!-- Transpose array
[1318]	910	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[622]	911	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	912	CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
	913	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
[1]	914	comm1dx, ierr )
	915	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1106]	916	#endif
[1]	917
	918	ELSE
	919	!
[1106]	920	!-- Reorder the array in the same way like ALLTOALL did it
[683]	921	!$OMP PARALLEL PRIVATE ( i, j, k )
	922	!$OMP DO
[1003]	923	DO k = nzb_y, nzt_y
	924	DO j = 0, ny
	925	DO i = nxl_y, nxr_y
[164]	926	f_inv(i,k,j) = f_in(i,j,k)
	927	ENDDO
	928	ENDDO
	929	ENDDO
[683]	930	!$OMP END PARALLEL
[1106]	931
	932	ENDIF
	933
[1]	934	END SUBROUTINE transpose_zy
	935
	936
	937	!------------------------------------------------------------------------------!
	938	! Description:
	939	! ------------
[1682]	940	!> Transposition of input array (f_in) from z to y. For the input array, all
	941	!> elements along z reside on the same PE, while after transposition, all
	942	!> elements along y reside on the same PE.
	943	!> This is a direct transposition for arrays with indices in regular order
	944	!> (k,j,i) (cf. transpose_zy).
[1]	945	!------------------------------------------------------------------------------!
[1682]	946	SUBROUTINE transpose_zyd( f_in, f_out )
[1]	947
[1682]	948
[1320]	949	USE cpulog, &
	950	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	951
[1320]	952	USE indices, &
	953	ONLY: nnx, nny, nnz, nxl, nxr, nyn, nys, ny, nz
	954
	955	USE kinds
	956
[1324]	957	USE pegrid
[1320]	958
	959	USE transpose_indices, &
	960	ONLY: nxl_y, nxl_yd, nxr_y, nxr_yd, nzb_y, nzb_yd, nzt_y, nzt_yd
	961
[1]	962	IMPLICIT NONE
	963
[1682]	964	INTEGER(iwp) :: i !<
	965	INTEGER(iwp) :: j !<
	966	INTEGER(iwp) :: k !<
	967	INTEGER(iwp) :: l !<
	968	INTEGER(iwp) :: m !<
	969	INTEGER(iwp) :: ys !<
[1]	970
[1682]	971	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
	972	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
	973	REAL(wp) :: f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd) !<
	974	REAL(wp) :: work(nnxnnynnz) !<
[1320]	975
[1]	976	#if defined( __parallel )
	977
	978	!
	979	!-- Rearrange indices of input array in order to make data to be send
	980	!-- by MPI contiguous
[1003]	981	DO i = nxl, nxr
	982	DO j = nys, nyn
	983	DO k = 1, nz
[164]	984	f_inv(j,i,k) = f_in(k,j,i)
[1]	985	ENDDO
	986	ENDDO
	987	ENDDO
	988
	989	!
	990	!-- Move data to different array, because memory location of work1 is
	991	!-- needed further below (work1 = work2).
	992	!-- If the PE grid is one-dimensional along x, only local reordering
	993	!-- of the data is necessary and no transposition has to be done.
	994	IF ( pdims(2) == 1 ) THEN
[1003]	995	DO k = 1, nz
	996	DO i = nxl, nxr
	997	DO j = nys, nyn
[164]	998	f_out(j,i,k) = f_inv(j,i,k)
[1]	999	ENDDO
	1000	ENDDO
	1001	ENDDO
	1002	RETURN
	1003	ENDIF
	1004
	1005	!
	1006	!-- Transpose array
	1007	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	1008	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	1009	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
[164]	1010	work(1), sendrecvcount_zyd, MPI_REAL, &
[1]	1011	comm1dy, ierr )
	1012	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	1013
	1014	!
	1015	!-- Reorder transposed array
	1016	m = 0
	1017	DO l = 0, pdims(2) - 1
	1018	ys = 0 + l * nny
[1003]	1019	DO k = nzb_yd, nzt_yd
	1020	DO i = nxl_yd, nxr_yd
[1]	1021	DO j = ys, ys + nny - 1
	1022	m = m + 1
[164]	1023	f_out(j,i,k) = work(m)
[1]	1024	ENDDO
	1025	ENDDO
	1026	ENDDO
	1027	ENDDO
	1028
	1029	#endif
	1030
	1031	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |