Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 3393

Last change on this file since 3393 was 3241, checked in by raasch, 6 years ago
various changes to avoid compiler warnings (mainly removal of unused variables)
Property svn:keywords set to `Id`
File size: 29.7 KB

Rev	Line
[1682]	1	!> @file transpose.f90
[2000]	2	!------------------------------------------------------------------------------!
[2696]	3	! This file is part of the PALM model system.
[1036]	4	!
[2000]	5	! PALM is free software: you can redistribute it and/or modify it under the
	6	! terms of the GNU General Public License as published by the Free Software
	7	! Foundation, either version 3 of the License, or (at your option) any later
	8	! version.
[1036]	9	!
	10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
	11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
	12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
	13	!
	14	! You should have received a copy of the GNU General Public License along with
	15	! PALM. If not, see <http://www.gnu.org/licenses/>.
	16	!
[2718]	17	! Copyright 1997-2018 Leibniz Universitaet Hannover
[2000]	18	!------------------------------------------------------------------------------!
[1036]	19	!
[484]	20	! Current revisions:
[1]	21	! -----------------
[1321]	22	!
[2119]	23	!
[1321]	24	! Former revisions:
	25	! -----------------
	26	! $Id: transpose.f90 3241 2018-09-12 15:02:00Z knoop $
[3241]	27	! unused variables removed
	28	!
	29	! 2718 2018-01-02 08:49:38Z maronga
[2716]	30	! Corrected "Former revisions" section
	31	!
	32	! 2696 2017-12-14 17:12:51Z kanani
	33	! Change in file header (GPL part)
[1321]	34	!
[2716]	35	! 2119 2017-01-17 16:51:50Z raasch
	36	!
[2119]	37	! 2118 2017-01-17 16:38:49Z raasch
	38	! OpenACC directives removed
	39	!
[2001]	40	! 2000 2016-08-20 18:09:15Z knoop
	41	! Forced header and separation lines into 80 columns
	42	!
[1683]	43	! 1682 2015-10-07 23:56:08Z knoop
	44	! Code annotations made doxygen readable
	45	!
[1325]	46	! 1324 2014-03-21 09:13:16Z suehring
	47	! Bugfix: ONLY statement for module pegrid removed
	48	!
[1321]	49	! 1320 2014-03-20 08:40:49Z raasch
[1320]	50	! ONLY-attribute added to USE-statements,
	51	! kind-parameters added to all INTEGER and REAL declaration statements,
	52	! kinds are defined in new module kinds,
	53	! old module precision_kind is removed,
	54	! revision history before 2012 removed,
	55	! comment fields (!:) to be used for variable explanations added to
	56	! all variable declaration statements
[198]	57	!
[1319]	58	! 1318 2014-03-17 13:35:16Z raasch
	59	! cpu_log_nowait parameter added to cpu measurements of the transpositions
	60	! required for solving the Poisson equation (poisfft),
	61	! module interfaces removed
	62	!
[1258]	63	! 1257 2013-11-08 15:18:40Z raasch
	64	! openacc loop and loop vector clauses removed
	65	!
[1217]	66	! 1216 2013-08-26 09:31:42Z raasch
	67	! re-sorting of the transposed / to be transposed arrays moved to separate
	68	! routines resort_for_...
	69	!
[1112]	70	! 1111 2013-03-08 23:54:10Z raasch
	71	! openACC directives added,
	72	! resorting data from/to work changed, work got 4 dimensions instead of 1
	73	!
[1107]	74	! 1106 2013-03-04 05:31:38Z raasch
	75	! preprocessor lines rearranged so that routines can also be used in serial
	76	! (non-parallel) mode
	77	!
[1093]	78	! 1092 2013-02-02 11:24:22Z raasch
	79	! unused variables removed
	80	!
[1037]	81	! 1036 2012-10-22 13:43:42Z raasch
	82	! code put under GPL (PALM 3.9)
	83	!
[1004]	84	! 1003 2012-09-14 14:35:53Z raasch
	85	! indices nxa, nya, etc. replaced by nx, ny, etc.
	86	!
[1]	87	! Revision 1.1 1997/07/24 11:25:18 raasch
	88	! Initial revision
	89	!
[1216]	90	!------------------------------------------------------------------------------!
	91	! Description:
	92	! ------------
[1682]	93	!> Resorting data for the transposition from x to y. The transposition itself
	94	!> is carried out in transpose_xy
[1216]	95	!------------------------------------------------------------------------------!
[1682]	96	SUBROUTINE resort_for_xy( f_in, f_inv )
	97
[1216]	98
[1320]	99	USE indices, &
	100	ONLY: nx
[1216]	101
[1320]	102	USE kinds
	103
	104	USE transpose_indices, &
[3241]	105	ONLY: nyn_x, nys_x, nzb_x, nzt_x
[1320]	106
[1216]	107	IMPLICIT NONE
	108
[1682]	109	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
	110	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
[1216]	111
	112
[1682]	113	INTEGER(iwp) :: i !<
	114	INTEGER(iwp) :: j !<
	115	INTEGER(iwp) :: k !<
[1]	116	!
[1216]	117	!-- Rearrange indices of input array in order to make data to be send
	118	!-- by MPI contiguous
	119	!$OMP PARALLEL PRIVATE ( i, j, k )
	120	!$OMP DO
	121	DO i = 0, nx
	122	DO k = nzb_x, nzt_x
	123	DO j = nys_x, nyn_x
	124	f_inv(j,k,i) = f_in(i,j,k)
	125	ENDDO
	126	ENDDO
	127	ENDDO
	128	!$OMP END PARALLEL
	129
	130	END SUBROUTINE resort_for_xy
	131
	132
	133	!------------------------------------------------------------------------------!
[1]	134	! Description:
	135	! ------------
[1682]	136	!> Transposition of input array (f_in) from x to y. For the input array, all
	137	!> elements along x reside on the same PE, while after transposition, all
	138	!> elements along y reside on the same PE.
[1]	139	!------------------------------------------------------------------------------!
[1682]	140	SUBROUTINE transpose_xy( f_inv, f_out )
[1]	141
[1682]	142
[1320]	143	USE cpulog, &
	144	ONLY: cpu_log, cpu_log_nowait, log_point_s
	145
	146	USE indices, &
	147	ONLY: nx, ny
	148
	149	USE kinds
	150
[1]	151	USE pegrid
	152
[1320]	153	USE transpose_indices, &
	154	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
	155
[1]	156	IMPLICIT NONE
	157
[1682]	158	INTEGER(iwp) :: i !<
	159	INTEGER(iwp) :: j !<
	160	INTEGER(iwp) :: k !<
	161	INTEGER(iwp) :: l !<
	162	INTEGER(iwp) :: ys !<
[1320]	163
[1682]	164	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
	165	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
[1]	166
[1682]	167	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
[1111]	168
	169
[1106]	170	IF ( numprocs /= 1 ) THEN
	171
	172	#if defined( __parallel )
[1]	173	!
[1106]	174	!-- Transpose array
[1318]	175	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	176	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	177	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
	178	work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
[1106]	179	comm1dy, ierr )
	180	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	181
	182	!
[1106]	183	!-- Reorder transposed array
[1111]	184	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
[683]	185	!$OMP DO
[1106]	186	DO l = 0, pdims(2) - 1
	187	ys = 0 + l * ( nyn_x - nys_x + 1 )
	188	DO i = nxl_y, nxr_y
	189	DO k = nzb_y, nzt_y
	190	DO j = ys, ys + nyn_x - nys_x
[1111]	191	f_out(j,i,k) = work(j-ys+1,k,i,l)
[1106]	192	ENDDO
[1]	193	ENDDO
	194	ENDDO
	195	ENDDO
[683]	196	!$OMP END PARALLEL
[1]	197	#endif
	198
[1106]	199	ELSE
	200
	201	!
	202	!-- Reorder transposed array
	203	!$OMP PARALLEL PRIVATE ( i, j, k )
	204	!$OMP DO
	205	DO k = nzb_y, nzt_y
	206	DO i = nxl_y, nxr_y
	207	DO j = 0, ny
	208	f_out(j,i,k) = f_inv(j,k,i)
	209	ENDDO
	210	ENDDO
	211	ENDDO
	212	!$OMP END PARALLEL
	213
	214	ENDIF
	215
[1]	216	END SUBROUTINE transpose_xy
	217
	218
	219	!------------------------------------------------------------------------------!
	220	! Description:
	221	! ------------
[1682]	222	!> Resorting data after the transposition from x to z. The transposition itself
	223	!> is carried out in transpose_xz
[1216]	224	!------------------------------------------------------------------------------!
[1682]	225	SUBROUTINE resort_for_xz( f_inv, f_out )
[1216]	226
[1682]	227
[1320]	228	USE indices, &
	229	ONLY: nxl, nxr, nyn, nys, nz
[1216]	230
[1320]	231	USE kinds
	232
[1216]	233	IMPLICIT NONE
	234
[1682]	235	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
	236	REAL(wp) :: f_out(1:nz,nys:nyn,nxl:nxr) !<
[1216]	237
[1682]	238	INTEGER(iwp) :: i !<
	239	INTEGER(iwp) :: j !<
	240	INTEGER(iwp) :: k !<
[1216]	241	!
	242	!-- Rearrange indices of input array in order to make data to be send
	243	!-- by MPI contiguous.
	244	!-- In case of parallel fft/transposition, scattered store is faster in
	245	!-- backward direction!!!
	246	!$OMP PARALLEL PRIVATE ( i, j, k )
	247	!$OMP DO
	248	DO k = 1, nz
	249	DO i = nxl, nxr
	250	DO j = nys, nyn
	251	f_out(k,j,i) = f_inv(j,i,k)
	252	ENDDO
	253	ENDDO
	254	ENDDO
	255	!$OMP END PARALLEL
	256
	257	END SUBROUTINE resort_for_xz
	258
	259
	260	!------------------------------------------------------------------------------!
	261	! Description:
	262	! ------------
[1682]	263	!> Transposition of input array (f_in) from x to z. For the input array, all
	264	!> elements along x reside on the same PE, while after transposition, all
	265	!> elements along z reside on the same PE.
[1]	266	!------------------------------------------------------------------------------!
[1682]	267	SUBROUTINE transpose_xz( f_in, f_inv )
[1]	268
[1682]	269
[1320]	270	USE cpulog, &
	271	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	272
[1320]	273	USE indices, &
[3241]	274	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
[1320]	275
	276	USE kinds
	277
[1324]	278	USE pegrid
[1320]	279
	280	USE transpose_indices, &
	281	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	282
[1]	283	IMPLICIT NONE
	284
[1682]	285	INTEGER(iwp) :: i !<
	286	INTEGER(iwp) :: j !<
	287	INTEGER(iwp) :: k !<
	288	INTEGER(iwp) :: l !<
	289	INTEGER(iwp) :: xs !<
[1]	290
[1682]	291	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
	292	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
[1]	293
[1682]	294	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
[1111]	295
[1320]	296
[1]	297	!
	298	!-- If the PE grid is one-dimensional along y, the array has only to be
	299	!-- reordered locally and therefore no transposition has to be done.
	300	IF ( pdims(1) /= 1 ) THEN
[1106]	301
	302	#if defined( __parallel )
[1]	303	!
	304	!-- Reorder input array for transposition
[1111]	305	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
[683]	306	!$OMP DO
[1]	307	DO l = 0, pdims(1) - 1
	308	xs = 0 + l * nnx
[1003]	309	DO k = nzb_x, nzt_x
[164]	310	DO i = xs, xs + nnx - 1
[1003]	311	DO j = nys_x, nyn_x
[1111]	312	work(j,i-xs+1,k,l) = f_in(i,j,k)
[1]	313	ENDDO
	314	ENDDO
	315	ENDDO
	316	ENDDO
[683]	317	!$OMP END PARALLEL
[1]	318
	319	!
	320	!-- Transpose array
[1318]	321	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[622]	322	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	323	CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
	324	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
[1]	325	comm1dx, ierr )
	326	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1106]	327	#endif
	328
[1]	329	ELSE
[1106]	330
[1]	331	!
	332	!-- Reorder the array in a way that the z index is in first position
[683]	333	!$OMP PARALLEL PRIVATE ( i, j, k )
	334	!$OMP DO
[1003]	335	DO i = nxl, nxr
	336	DO j = nys, nyn
	337	DO k = 1, nz
[164]	338	f_inv(j,i,k) = f_in(i,j,k)
[1]	339	ENDDO
	340	ENDDO
	341	ENDDO
[683]	342	!$OMP END PARALLEL
[1]	343
[164]	344	ENDIF
	345
[1]	346	END SUBROUTINE transpose_xz
	347
	348
	349	!------------------------------------------------------------------------------!
	350	! Description:
	351	! ------------
[1682]	352	!> Resorting data after the transposition from y to x. The transposition itself
	353	!> is carried out in transpose_yx
[1216]	354	!------------------------------------------------------------------------------!
[1682]	355	SUBROUTINE resort_for_yx( f_inv, f_out )
[1216]	356
[1682]	357
[1320]	358	USE indices, &
	359	ONLY: nx
[1216]	360
[1320]	361	USE kinds
	362
	363	USE transpose_indices, &
	364	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	365
[1216]	366	IMPLICIT NONE
	367
[1682]	368	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
	369	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
[1216]	370
	371
[1682]	372	INTEGER(iwp) :: i !<
	373	INTEGER(iwp) :: j !<
	374	INTEGER(iwp) :: k !<
[1216]	375	!
	376	!-- Rearrange indices of input array in order to make data to be send
	377	!-- by MPI contiguous
	378	!$OMP PARALLEL PRIVATE ( i, j, k )
	379	!$OMP DO
	380	DO i = 0, nx
	381	DO k = nzb_x, nzt_x
	382	DO j = nys_x, nyn_x
	383	f_out(i,j,k) = f_inv(j,k,i)
	384	ENDDO
	385	ENDDO
	386	ENDDO
	387	!$OMP END PARALLEL
	388
	389	END SUBROUTINE resort_for_yx
	390
	391
	392	!------------------------------------------------------------------------------!
	393	! Description:
	394	! ------------
[1682]	395	!> Transposition of input array (f_in) from y to x. For the input array, all
	396	!> elements along y reside on the same PE, while after transposition, all
	397	!> elements along x reside on the same PE.
[1]	398	!------------------------------------------------------------------------------!
[1682]	399	SUBROUTINE transpose_yx( f_in, f_inv )
[1]	400
[1682]	401
[1320]	402	USE cpulog, &
	403	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	404
[1320]	405	USE indices, &
	406	ONLY: nx, ny
	407
	408	USE kinds
	409
[1324]	410	USE pegrid
[1320]	411
	412	USE transpose_indices, &
	413	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
	414
[1]	415	IMPLICIT NONE
	416
[1682]	417	INTEGER(iwp) :: i !<
	418	INTEGER(iwp) :: j !<
	419	INTEGER(iwp) :: k !<
	420	INTEGER(iwp) :: l !<
	421	INTEGER(iwp) :: ys !<
[1]	422
[1682]	423	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
	424	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
[1111]	425
[1682]	426	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
[1111]	427
[1320]	428
[1106]	429	IF ( numprocs /= 1 ) THEN
	430
[1]	431	#if defined( __parallel )
	432	!
[1106]	433	!-- Reorder input array for transposition
[1111]	434	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
[683]	435	!$OMP DO
[1106]	436	DO l = 0, pdims(2) - 1
	437	ys = 0 + l * ( nyn_x - nys_x + 1 )
	438	DO i = nxl_y, nxr_y
	439	DO k = nzb_y, nzt_y
	440	DO j = ys, ys + nyn_x - nys_x
[1111]	441	work(j-ys+1,k,i,l) = f_in(j,i,k)
[1106]	442	ENDDO
	443	ENDDO
	444	ENDDO
	445	ENDDO
	446	!$OMP END PARALLEL
	447
	448	!
	449	!-- Transpose array
[1318]	450	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	451	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	452	CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
	453	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
[1106]	454	comm1dy, ierr )
	455	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	456	#endif
	457
	458	ELSE
	459
	460	!
	461	!-- Reorder array f_in the same way as ALLTOALL did it
	462	!$OMP PARALLEL PRIVATE ( i, j, k )
	463	!$OMP DO
[1003]	464	DO i = nxl_y, nxr_y
	465	DO k = nzb_y, nzt_y
[1106]	466	DO j = 0, ny
	467	f_inv(j,k,i) = f_in(j,i,k)
[1]	468	ENDDO
	469	ENDDO
	470	ENDDO
[683]	471	!$OMP END PARALLEL
[1]	472
[1106]	473	ENDIF
[1]	474
	475	END SUBROUTINE transpose_yx
	476
	477
	478	!------------------------------------------------------------------------------!
	479	! Description:
	480	! ------------
[1682]	481	!> Transposition of input array (f_in) from y to x. For the input array, all
	482	!> elements along y reside on the same PE, while after transposition, all
	483	!> elements along x reside on the same PE.
	484	!> This is a direct transposition for arrays with indices in regular order
	485	!> (k,j,i) (cf. transpose_yx).
[1]	486	!------------------------------------------------------------------------------!
[1682]	487	SUBROUTINE transpose_yxd( f_in, f_out )
[1]	488
[1682]	489
[1320]	490	USE cpulog, &
[3241]	491	ONLY: cpu_log, log_point_s
[1]	492
[1320]	493	USE indices, &
	494	ONLY: nnx, nny, nnz, nx, nxl, nxr, nyn, nys, nz
	495
	496	USE kinds
	497
[1324]	498	USE pegrid
[1320]	499
	500	USE transpose_indices, &
	501	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	502
[1]	503	IMPLICIT NONE
	504
[1682]	505	INTEGER(iwp) :: i !<
	506	INTEGER(iwp) :: j !<
	507	INTEGER(iwp) :: k !<
	508	INTEGER(iwp) :: l !<
	509	INTEGER(iwp) :: m !<
	510	INTEGER(iwp) :: xs !<
[1]	511
[1682]	512	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
	513	REAL(wp) :: f_inv(nxl:nxr,1:nz,nys:nyn) !<
	514	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
	515	REAL(wp) :: work(nnxnnynnz) !<
[1]	516	#if defined( __parallel )
	517
	518	!
	519	!-- Rearrange indices of input array in order to make data to be send
	520	!-- by MPI contiguous
[1003]	521	DO k = 1, nz
	522	DO j = nys, nyn
	523	DO i = nxl, nxr
[164]	524	f_inv(i,k,j) = f_in(k,j,i)
[1]	525	ENDDO
	526	ENDDO
	527	ENDDO
	528
	529	!
	530	!-- Transpose array
	531	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	532	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	533	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
[164]	534	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	535	comm1dx, ierr )
	536	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	537
	538	!
	539	!-- Reorder transposed array
	540	m = 0
	541	DO l = 0, pdims(1) - 1
	542	xs = 0 + l * nnx
[1003]	543	DO j = nys_x, nyn_x
	544	DO k = 1, nz
[1]	545	DO i = xs, xs + nnx - 1
	546	m = m + 1
[164]	547	f_out(i,j,k) = work(m)
[1]	548	ENDDO
	549	ENDDO
	550	ENDDO
	551	ENDDO
	552
	553	#endif
	554
	555	END SUBROUTINE transpose_yxd
	556
	557
	558	!------------------------------------------------------------------------------!
	559	! Description:
	560	! ------------
[1682]	561	!> Resorting data for the transposition from y to z. The transposition itself
	562	!> is carried out in transpose_yz
[1216]	563	!------------------------------------------------------------------------------!
[1682]	564	SUBROUTINE resort_for_yz( f_in, f_inv )
[1216]	565
[1682]	566
[1320]	567	USE indices, &
	568	ONLY: ny
[1216]	569
[1320]	570	USE kinds
	571
	572	USE transpose_indices, &
	573	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
	574
[1216]	575	IMPLICIT NONE
	576
[1682]	577	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
	578	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
[1216]	579
[1682]	580	INTEGER(iwp) :: i !<
	581	INTEGER(iwp) :: j !<
	582	INTEGER(iwp) :: k !<
[1216]	583
	584	!
	585	!-- Rearrange indices of input array in order to make data to be send
	586	!-- by MPI contiguous
	587	!$OMP PARALLEL PRIVATE ( i, j, k )
	588	!$OMP DO
	589	DO j = 0, ny
	590	DO k = nzb_y, nzt_y
	591	DO i = nxl_y, nxr_y
	592	f_inv(i,k,j) = f_in(j,i,k)
	593	ENDDO
	594	ENDDO
	595	ENDDO
	596	!$OMP END PARALLEL
	597
	598	END SUBROUTINE resort_for_yz
	599
	600
	601	!------------------------------------------------------------------------------!
	602	! Description:
	603	! ------------
[1682]	604	!> Transposition of input array (f_in) from y to z. For the input array, all
	605	!> elements along y reside on the same PE, while after transposition, all
	606	!> elements along z reside on the same PE.
[1]	607	!------------------------------------------------------------------------------!
[1682]	608	SUBROUTINE transpose_yz( f_inv, f_out )
[1]	609
[1682]	610
[1320]	611	USE cpulog, &
	612	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	613
[1320]	614	USE indices, &
	615	ONLY: ny, nz
	616
	617	USE kinds
	618
[1324]	619	USE pegrid
[1320]	620
	621	USE transpose_indices, &
	622	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
	623
[1]	624	IMPLICIT NONE
	625
[1682]	626	INTEGER(iwp) :: i !<
	627	INTEGER(iwp) :: j !<
	628	INTEGER(iwp) :: k !<
	629	INTEGER(iwp) :: l !<
	630	INTEGER(iwp) :: zs !<
[1]	631
[1682]	632	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
	633	REAL(wp) :: f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
[1111]	634
[1682]	635	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
[1111]	636
[1320]	637
[1]	638	!
	639	!-- If the PE grid is one-dimensional along y, only local reordering
	640	!-- of the data is necessary and no transposition has to be done.
	641	IF ( pdims(1) == 1 ) THEN
[1106]	642
[683]	643	!$OMP PARALLEL PRIVATE ( i, j, k )
	644	!$OMP DO
[1003]	645	DO j = 0, ny
	646	DO k = nzb_y, nzt_y
	647	DO i = nxl_y, nxr_y
[164]	648	f_out(i,j,k) = f_inv(i,k,j)
[1]	649	ENDDO
	650	ENDDO
	651	ENDDO
[683]	652	!$OMP END PARALLEL
[1]	653
[1106]	654	ELSE
	655
	656	#if defined( __parallel )
[1]	657	!
[1106]	658	!-- Transpose array
[1318]	659	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	660	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	661	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
	662	work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
[1106]	663	comm1dx, ierr )
	664	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	665
	666	!
[1106]	667	!-- Reorder transposed array
[1111]	668	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
[683]	669	!$OMP DO
[1106]	670	DO l = 0, pdims(1) - 1
	671	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	672	DO j = nys_z, nyn_z
	673	DO k = zs, zs + nzt_y - nzb_y
	674	DO i = nxl_z, nxr_z
[1111]	675	f_out(i,j,k) = work(i,k-zs+1,j,l)
[1106]	676	ENDDO
[1]	677	ENDDO
	678	ENDDO
	679	ENDDO
[683]	680	!$OMP END PARALLEL
[1]	681	#endif
	682
[1106]	683	ENDIF
	684
[1]	685	END SUBROUTINE transpose_yz
	686
	687
	688	!------------------------------------------------------------------------------!
	689	! Description:
	690	! ------------
[1682]	691	!> Resorting data for the transposition from z to x. The transposition itself
	692	!> is carried out in transpose_zx
[1216]	693	!------------------------------------------------------------------------------!
[1682]	694	SUBROUTINE resort_for_zx( f_in, f_inv )
[1216]	695
[1682]	696
[1320]	697	USE indices, &
	698	ONLY: nxl, nxr, nyn, nys, nz
[1216]	699
[1320]	700	USE kinds
	701
[1216]	702	IMPLICIT NONE
	703
[1682]	704	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
	705	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
[1216]	706
[1682]	707	INTEGER(iwp) :: i !<
	708	INTEGER(iwp) :: j !<
	709	INTEGER(iwp) :: k !<
[1216]	710
	711	!
	712	!-- Rearrange indices of input array in order to make data to be send
	713	!-- by MPI contiguous
	714	!$OMP PARALLEL PRIVATE ( i, j, k )
	715	!$OMP DO
	716	DO k = 1,nz
	717	DO i = nxl, nxr
	718	DO j = nys, nyn
	719	f_inv(j,i,k) = f_in(k,j,i)
	720	ENDDO
	721	ENDDO
	722	ENDDO
	723	!$OMP END PARALLEL
	724
	725	END SUBROUTINE resort_for_zx
	726
	727
	728	!------------------------------------------------------------------------------!
	729	! Description:
	730	! ------------
[1682]	731	!> Transposition of input array (f_in) from z to x. For the input array, all
	732	!> elements along z reside on the same PE, while after transposition, all
	733	!> elements along x reside on the same PE.
[1]	734	!------------------------------------------------------------------------------!
[1682]	735	SUBROUTINE transpose_zx( f_inv, f_out )
[1]	736
[1682]	737
[1320]	738	USE cpulog, &
	739	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	740
[1320]	741	USE indices, &
	742	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
	743
	744	USE kinds
	745
[1324]	746	USE pegrid
[1320]	747
	748	USE transpose_indices, &
	749	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	750
[1]	751	IMPLICIT NONE
	752
[1682]	753	INTEGER(iwp) :: i !<
	754	INTEGER(iwp) :: j !<
	755	INTEGER(iwp) :: k !<
	756	INTEGER(iwp) :: l !<
	757	INTEGER(iwp) :: xs !<
[1]	758
[1682]	759	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
	760	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
[1111]	761
[1682]	762	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
[1]	763
[1320]	764
[1]	765	!
	766	!-- If the PE grid is one-dimensional along y, only local reordering
	767	!-- of the data is necessary and no transposition has to be done.
	768	IF ( pdims(1) == 1 ) THEN
[1106]	769
[683]	770	!$OMP PARALLEL PRIVATE ( i, j, k )
	771	!$OMP DO
[1003]	772	DO k = 1, nz
	773	DO i = nxl, nxr
	774	DO j = nys, nyn
[164]	775	f_out(i,j,k) = f_inv(j,i,k)
[1]	776	ENDDO
	777	ENDDO
	778	ENDDO
[683]	779	!$OMP END PARALLEL
[1]	780
[1106]	781	ELSE
	782
	783	#if defined( __parallel )
[1]	784	!
[1106]	785	!-- Transpose array
[1318]	786	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	787	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	788	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
	789	work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
[1106]	790	comm1dx, ierr )
	791	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	792
	793	!
[1106]	794	!-- Reorder transposed array
[1111]	795	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
[683]	796	!$OMP DO
[1106]	797	DO l = 0, pdims(1) - 1
	798	xs = 0 + l * nnx
	799	DO k = nzb_x, nzt_x
	800	DO i = xs, xs + nnx - 1
	801	DO j = nys_x, nyn_x
[1111]	802	f_out(i,j,k) = work(j,i-xs+1,k,l)
[1106]	803	ENDDO
[1]	804	ENDDO
	805	ENDDO
	806	ENDDO
[683]	807	!$OMP END PARALLEL
[1]	808	#endif
	809
[1106]	810	ENDIF
	811
[1]	812	END SUBROUTINE transpose_zx
	813
	814
	815	!------------------------------------------------------------------------------!
	816	! Description:
	817	! ------------
[1682]	818	!> Resorting data after the transposition from z to y. The transposition itself
	819	!> is carried out in transpose_zy
[1216]	820	!------------------------------------------------------------------------------!
[1682]	821	SUBROUTINE resort_for_zy( f_inv, f_out )
[1216]	822
[1682]	823
[1320]	824	USE indices, &
	825	ONLY: ny
[1216]	826
[1320]	827	USE kinds
	828
	829	USE transpose_indices, &
	830	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
	831
[1216]	832	IMPLICIT NONE
	833
[1682]	834	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
	835	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
[1216]	836
	837
[1682]	838	INTEGER(iwp) :: i !<
	839	INTEGER(iwp) :: j !<
	840	INTEGER(iwp) :: k !<
[1216]	841
	842	!
	843	!-- Rearrange indices of input array in order to make data to be send
	844	!-- by MPI contiguous
	845	!$OMP PARALLEL PRIVATE ( i, j, k )
	846	!$OMP DO
	847	DO k = nzb_y, nzt_y
	848	DO j = 0, ny
	849	DO i = nxl_y, nxr_y
	850	f_out(j,i,k) = f_inv(i,k,j)
	851	ENDDO
	852	ENDDO
	853	ENDDO
	854	!$OMP END PARALLEL
	855
	856	END SUBROUTINE resort_for_zy
	857
	858
	859	!------------------------------------------------------------------------------!
[3241]	860	! Description:cpu_log_nowait
[1216]	861	! ------------
[1682]	862	!> Transposition of input array (f_in) from z to y. For the input array, all
	863	!> elements along z reside on the same PE, while after transposition, all
	864	!> elements along y reside on the same PE.
[1]	865	!------------------------------------------------------------------------------!
[1682]	866	SUBROUTINE transpose_zy( f_in, f_inv )
[1]	867
[1682]	868
[1320]	869	USE cpulog, &
	870	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	871
[1320]	872	USE indices, &
	873	ONLY: ny, nz
	874
	875	USE kinds
	876
[1324]	877	USE pegrid
[1320]	878
	879	USE transpose_indices, &
	880	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
	881
[1]	882	IMPLICIT NONE
	883
[1682]	884	INTEGER(iwp) :: i !<
	885	INTEGER(iwp) :: j !<
	886	INTEGER(iwp) :: k !<
	887	INTEGER(iwp) :: l !<
	888	INTEGER(iwp) :: zs !<
[1]	889
[1682]	890	REAL(wp) :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
	891	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
[1111]	892
[1682]	893	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
[1111]	894
[1]	895	!
	896	!-- If the PE grid is one-dimensional along y, the array has only to be
	897	!-- reordered locally and therefore no transposition has to be done.
	898	IF ( pdims(1) /= 1 ) THEN
[1106]	899
	900	#if defined( __parallel )
[1]	901	!
	902	!-- Reorder input array for transposition
[1111]	903	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
[683]	904	!$OMP DO
[1]	905	DO l = 0, pdims(1) - 1
[1003]	906	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	907	DO j = nys_z, nyn_z
	908	DO k = zs, zs + nzt_y - nzb_y
	909	DO i = nxl_z, nxr_z
[1111]	910	work(i,k-zs+1,j,l) = f_in(i,j,k)
[1]	911	ENDDO
	912	ENDDO
	913	ENDDO
	914	ENDDO
[683]	915	!$OMP END PARALLEL
[1]	916
	917	!
	918	!-- Transpose array
[1318]	919	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[622]	920	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	921	CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
	922	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
[1]	923	comm1dx, ierr )
	924	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1106]	925	#endif
[1]	926
	927	ELSE
	928	!
[1106]	929	!-- Reorder the array in the same way like ALLTOALL did it
[683]	930	!$OMP PARALLEL PRIVATE ( i, j, k )
	931	!$OMP DO
[1003]	932	DO k = nzb_y, nzt_y
	933	DO j = 0, ny
	934	DO i = nxl_y, nxr_y
[164]	935	f_inv(i,k,j) = f_in(i,j,k)
	936	ENDDO
	937	ENDDO
	938	ENDDO
[683]	939	!$OMP END PARALLEL
[1106]	940
	941	ENDIF
	942
[1]	943	END SUBROUTINE transpose_zy
	944
	945
	946	!------------------------------------------------------------------------------!
	947	! Description:
	948	! ------------
[1682]	949	!> Transposition of input array (f_in) from z to y. For the input array, all
	950	!> elements along z reside on the same PE, while after transposition, all
	951	!> elements along y reside on the same PE.
	952	!> This is a direct transposition for arrays with indices in regular order
	953	!> (k,j,i) (cf. transpose_zy).
[1]	954	!------------------------------------------------------------------------------!
[1682]	955	SUBROUTINE transpose_zyd( f_in, f_out )
[1]	956
[1682]	957
[1320]	958	USE cpulog, &
[3241]	959	ONLY: cpu_log, log_point_s
[1]	960
[1320]	961	USE indices, &
	962	ONLY: nnx, nny, nnz, nxl, nxr, nyn, nys, ny, nz
	963
	964	USE kinds
	965
[1324]	966	USE pegrid
[1320]	967
	968	USE transpose_indices, &
[3241]	969	ONLY: nxl_yd, nxr_yd, nzb_yd, nzt_yd
[1320]	970
[1]	971	IMPLICIT NONE
	972
[1682]	973	INTEGER(iwp) :: i !<
	974	INTEGER(iwp) :: j !<
	975	INTEGER(iwp) :: k !<
	976	INTEGER(iwp) :: l !<
	977	INTEGER(iwp) :: m !<
	978	INTEGER(iwp) :: ys !<
[1]	979
[1682]	980	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
	981	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
	982	REAL(wp) :: f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd) !<
	983	REAL(wp) :: work(nnxnnynnz) !<
[1320]	984
[1]	985	#if defined( __parallel )
	986
	987	!
	988	!-- Rearrange indices of input array in order to make data to be send
	989	!-- by MPI contiguous
[1003]	990	DO i = nxl, nxr
	991	DO j = nys, nyn
	992	DO k = 1, nz
[164]	993	f_inv(j,i,k) = f_in(k,j,i)
[1]	994	ENDDO
	995	ENDDO
	996	ENDDO
	997
	998	!
	999	!-- Move data to different array, because memory location of work1 is
	1000	!-- needed further below (work1 = work2).
	1001	!-- If the PE grid is one-dimensional along x, only local reordering
	1002	!-- of the data is necessary and no transposition has to be done.
	1003	IF ( pdims(2) == 1 ) THEN
[1003]	1004	DO k = 1, nz
	1005	DO i = nxl, nxr
	1006	DO j = nys, nyn
[164]	1007	f_out(j,i,k) = f_inv(j,i,k)
[1]	1008	ENDDO
	1009	ENDDO
	1010	ENDDO
	1011	RETURN
	1012	ENDIF
	1013
	1014	!
	1015	!-- Transpose array
	1016	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	1017	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	1018	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
[164]	1019	work(1), sendrecvcount_zyd, MPI_REAL, &
[1]	1020	comm1dy, ierr )
	1021	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	1022
	1023	!
	1024	!-- Reorder transposed array
	1025	m = 0
	1026	DO l = 0, pdims(2) - 1
	1027	ys = 0 + l * nny
[1003]	1028	DO k = nzb_yd, nzt_yd
	1029	DO i = nxl_yd, nxr_yd
[1]	1030	DO j = ys, ys + nny - 1
	1031	m = m + 1
[164]	1032	f_out(j,i,k) = work(m)
[1]	1033	ENDDO
	1034	ENDDO
	1035	ENDDO
	1036	ENDDO
	1037
	1038	#endif
	1039
	1040	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |