Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 1069

Last change on this file since 1069 was 1037, checked in by raasch, 12 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 20.7 KB

Rev	Line
[164]	1	SUBROUTINE transpose_xy( f_in, work, f_out )
[1]	2
[1036]	3	!--------------------------------------------------------------------------------!
	4	! This file is part of PALM.
	5	!
	6	! PALM is free software: you can redistribute it and/or modify it under the terms
	7	! of the GNU General Public License as published by the Free Software Foundation,
	8	! either version 3 of the License, or (at your option) any later version.
	9	!
	10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
	11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
	12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
	13	!
	14	! You should have received a copy of the GNU General Public License along with
	15	! PALM. If not, see <http://www.gnu.org/licenses/>.
	16	!
	17	! Copyright 1997-2012 Leibniz University Hannover
	18	!--------------------------------------------------------------------------------!
	19	!
[484]	20	! Current revisions:
[1]	21	! -----------------
[198]	22	!
[1004]	23	!
[198]	24	! Former revisions:
	25	! -----------------
	26	! $Id: transpose.f90 1037 2012-10-22 14:10:22Z maronga $
	27	!
[1037]	28	! 1036 2012-10-22 13:43:42Z raasch
	29	! code put under GPL (PALM 3.9)
	30	!
[1004]	31	! 1003 2012-09-14 14:35:53Z raasch
	32	! indices nxa, nya, etc. replaced by nx, ny, etc.
	33	!
[684]	34	! 683 2011-02-09 14:25:15Z raasch
	35	! openMP parallelization of transpositions for 2d-domain-decomposition
	36	!
[623]	37	! 622 2010-12-10 08:08:13Z raasch
	38	! optional barriers included in order to speed up collective operations
	39	!
[198]	40	! 164 2008-05-15 08:46:15Z raasch
[164]	41	! f_inv changed from subroutine argument to automatic array in order to do
	42	! re-ordering from f_in to f_inv in one step, one array work is needed instead
	43	! of work1 and work2
[1]	44	!
[198]	45	! February 2007
[3]	46	! RCS Log replace by Id keyword, revision history cleaned up
	47	!
[1]	48	! Revision 1.2 2004/04/30 13:12:17 raasch
	49	! Switched from mpi_alltoallv to the simpler mpi_alltoall,
	50	! all former transpose-routine files collected in this file, enlarged
	51	! transposition arrays introduced
	52	!
	53	! Revision 1.1 2004/04/30 13:08:16 raasch
	54	! Initial revision (collection of former routines transpose_xy, transpose_xz,
	55	! transpose_yx, transpose_yz, transpose_zx, transpose_zy)
	56	!
	57	! Revision 1.1 1997/07/24 11:25:18 raasch
	58	! Initial revision
	59	!
	60	!
	61	! Description:
	62	! ------------
	63	! Transposition of input array (f_in) from x to y. For the input array, all
	64	! elements along x reside on the same PE, while after transposition, all
	65	! elements along y reside on the same PE.
	66	!------------------------------------------------------------------------------!
	67
	68	USE cpulog
	69	USE indices
	70	USE interfaces
	71	USE pegrid
	72	USE transpose_indices
	73
	74	IMPLICIT NONE
	75
	76	INTEGER :: i, j, k, l, m, ys
	77
[1003]	78	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
	79	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
	80	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
[164]	81	work(nnxnnynnz)
[1]	82
	83	#if defined( __parallel )
	84
	85	!
	86	!-- Rearrange indices of input array in order to make data to be send
	87	!-- by MPI contiguous
[683]	88	!$OMP PARALLEL PRIVATE ( i, j, k )
	89	!$OMP DO
[1003]	90	DO i = 0, nx
	91	DO k = nzb_x, nzt_x
	92	DO j = nys_x, nyn_x
[164]	93	f_inv(j,k,i) = f_in(i,j,k)
[1]	94	ENDDO
	95	ENDDO
	96	ENDDO
[683]	97	!$OMP END PARALLEL
[1]	98
	99	!
	100	!-- Transpose array
	101	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	102	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	103	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
[164]	104	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	105	comm1dy, ierr )
	106	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	107
	108	!
	109	!-- Reorder transposed array
[683]	110	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
	111	!$OMP DO
[1]	112	DO l = 0, pdims(2) - 1
[1003]	113	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	114	( nyn_x - nys_x + 1 )
	115	ys = 0 + l * ( nyn_x - nys_x + 1 )
	116	DO i = nxl_y, nxr_y
	117	DO k = nzb_y, nzt_y
	118	DO j = ys, ys + nyn_x - nys_x
[1]	119	m = m + 1
[164]	120	f_out(j,i,k) = work(m)
[1]	121	ENDDO
	122	ENDDO
	123	ENDDO
	124	ENDDO
[683]	125	!$OMP END PARALLEL
[1]	126
	127	#endif
	128
	129	END SUBROUTINE transpose_xy
	130
	131
[164]	132	SUBROUTINE transpose_xz( f_in, work, f_out )
[1]	133
	134	!------------------------------------------------------------------------------!
	135	! Description:
	136	! ------------
	137	! Transposition of input array (f_in) from x to z. For the input array, all
	138	! elements along x reside on the same PE, while after transposition, all
	139	! elements along z reside on the same PE.
	140	!------------------------------------------------------------------------------!
	141
	142	USE cpulog
	143	USE indices
	144	USE interfaces
	145	USE pegrid
	146	USE transpose_indices
	147
	148	IMPLICIT NONE
	149
	150	INTEGER :: i, j, k, l, m, xs
	151
[1003]	152	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
	153	f_inv(nys:nyn,nxl:nxr,1:nz), &
	154	f_out(1:nz,nys:nyn,nxl:nxr), &
[164]	155	work(nnxnnynnz)
[1]	156
	157	#if defined( __parallel )
	158
	159	!
	160	!-- If the PE grid is one-dimensional along y, the array has only to be
	161	!-- reordered locally and therefore no transposition has to be done.
	162	IF ( pdims(1) /= 1 ) THEN
	163	!
	164	!-- Reorder input array for transposition
[683]	165	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
	166	!$OMP DO
[1]	167	DO l = 0, pdims(1) - 1
[1003]	168	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
[1]	169	xs = 0 + l * nnx
[1003]	170	DO k = nzb_x, nzt_x
[164]	171	DO i = xs, xs + nnx - 1
[1003]	172	DO j = nys_x, nyn_x
[1]	173	m = m + 1
[164]	174	work(m) = f_in(i,j,k)
[1]	175	ENDDO
	176	ENDDO
	177	ENDDO
	178	ENDDO
[683]	179	!$OMP END PARALLEL
[1]	180
	181	!
	182	!-- Transpose array
	183	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	184	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	185	CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, &
	186	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
[1]	187	comm1dx, ierr )
	188	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	189
	190	!
	191	!-- Reorder transposed array in a way that the z index is in first position
[683]	192	!$OMP PARALLEL PRIVATE ( i, j, k )
	193	!$OMP DO
[1003]	194	DO k = 1, nz
	195	DO i = nxl, nxr
	196	DO j = nys, nyn
[164]	197	f_out(k,j,i) = f_inv(j,i,k)
[1]	198	ENDDO
	199	ENDDO
	200	ENDDO
[683]	201	!$OMP END PARALLEL
[1]	202	ELSE
	203	!
	204	!-- Reorder the array in a way that the z index is in first position
[683]	205	!$OMP PARALLEL PRIVATE ( i, j, k )
	206	!$OMP DO
[1003]	207	DO i = nxl, nxr
	208	DO j = nys, nyn
	209	DO k = 1, nz
[164]	210	f_inv(j,i,k) = f_in(i,j,k)
[1]	211	ENDDO
	212	ENDDO
	213	ENDDO
[683]	214	!$OMP END PARALLEL
[1]	215
[683]	216	!$OMP PARALLEL PRIVATE ( i, j, k )
	217	!$OMP DO
[1003]	218	DO k = 1, nz
	219	DO i = nxl, nxr
	220	DO j = nys, nyn
[164]	221	f_out(k,j,i) = f_inv(j,i,k)
	222	ENDDO
[1]	223	ENDDO
	224	ENDDO
[683]	225	!$OMP END PARALLEL
[1]	226
[164]	227	ENDIF
	228
	229
[1]	230	#endif
	231
	232	END SUBROUTINE transpose_xz
	233
	234
[164]	235	SUBROUTINE transpose_yx( f_in, work, f_out )
[1]	236
	237	!------------------------------------------------------------------------------!
	238	! Description:
	239	! ------------
	240	! Transposition of input array (f_in) from y to x. For the input array, all
	241	! elements along y reside on the same PE, while after transposition, all
	242	! elements along x reside on the same PE.
	243	!------------------------------------------------------------------------------!
	244
	245	USE cpulog
	246	USE indices
	247	USE interfaces
	248	USE pegrid
	249	USE transpose_indices
	250
	251	IMPLICIT NONE
	252
	253	INTEGER :: i, j, k, l, m, ys
	254
[1003]	255	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
	256	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
	257	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
[164]	258	work(nnxnnynnz)
[1]	259
	260	#if defined( __parallel )
	261
	262	!
	263	!-- Reorder input array for transposition
[683]	264	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
	265	!$OMP DO
[1]	266	DO l = 0, pdims(2) - 1
[1003]	267	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	268	( nyn_x - nys_x + 1 )
	269	ys = 0 + l * ( nyn_x - nys_x + 1 )
	270	DO i = nxl_y, nxr_y
	271	DO k = nzb_y, nzt_y
	272	DO j = ys, ys + nyn_x - nys_x
[1]	273	m = m + 1
[164]	274	work(m) = f_in(j,i,k)
[1]	275	ENDDO
	276	ENDDO
	277	ENDDO
	278	ENDDO
[683]	279	!$OMP END PARALLEL
[1]	280
	281	!
	282	!-- Transpose array
	283	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	284	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	285	CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, &
[1]	286	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
	287	comm1dy, ierr )
	288	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	289
	290	!
	291	!-- Reorder transposed array in a way that the x index is in first position
[683]	292	!$OMP PARALLEL PRIVATE ( i, j, k )
	293	!$OMP DO
[1003]	294	DO i = 0, nx
	295	DO k = nzb_x, nzt_x
	296	DO j = nys_x, nyn_x
[164]	297	f_out(i,j,k) = f_inv(j,k,i)
[1]	298	ENDDO
	299	ENDDO
	300	ENDDO
[683]	301	!$OMP END PARALLEL
[1]	302
	303	#endif
	304
	305	END SUBROUTINE transpose_yx
	306
	307
[164]	308	SUBROUTINE transpose_yxd( f_in, work, f_out )
[1]	309
	310	!------------------------------------------------------------------------------!
	311	! Description:
	312	! ------------
	313	! Transposition of input array (f_in) from y to x. For the input array, all
	314	! elements along y reside on the same PE, while after transposition, all
	315	! elements along x reside on the same PE.
	316	! This is a direct transposition for arrays with indices in regular order
	317	! (k,j,i) (cf. transpose_yx).
	318	!------------------------------------------------------------------------------!
	319
	320	USE cpulog
	321	USE indices
	322	USE interfaces
	323	USE pegrid
	324	USE transpose_indices
	325
	326	IMPLICIT NONE
	327
	328	INTEGER :: i, j, k, l, m, recvcount_yx, sendcount_yx, xs
	329
[1003]	330	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nxl:nxr,1:nz,nys:nyn), &
	331	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
[164]	332	work(nnxnnynnz)
[1]	333
	334	#if defined( __parallel )
	335
	336	!
	337	!-- Rearrange indices of input array in order to make data to be send
	338	!-- by MPI contiguous
[1003]	339	DO k = 1, nz
	340	DO j = nys, nyn
	341	DO i = nxl, nxr
[164]	342	f_inv(i,k,j) = f_in(k,j,i)
[1]	343	ENDDO
	344	ENDDO
	345	ENDDO
	346
	347	!
	348	!-- Transpose array
	349	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	350	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	351	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
[164]	352	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	353	comm1dx, ierr )
	354	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	355
	356	!
	357	!-- Reorder transposed array
	358	m = 0
	359	DO l = 0, pdims(1) - 1
	360	xs = 0 + l * nnx
[1003]	361	DO j = nys_x, nyn_x
	362	DO k = 1, nz
[1]	363	DO i = xs, xs + nnx - 1
	364	m = m + 1
[164]	365	f_out(i,j,k) = work(m)
[1]	366	ENDDO
	367	ENDDO
	368	ENDDO
	369	ENDDO
	370
	371	#endif
	372
	373	END SUBROUTINE transpose_yxd
	374
	375
[164]	376	SUBROUTINE transpose_yz( f_in, work, f_out )
[1]	377
	378	!------------------------------------------------------------------------------!
	379	! Description:
	380	! ------------
	381	! Transposition of input array (f_in) from y to z. For the input array, all
	382	! elements along y reside on the same PE, while after transposition, all
	383	! elements along z reside on the same PE.
	384	!------------------------------------------------------------------------------!
	385
	386	USE cpulog
	387	USE indices
	388	USE interfaces
	389	USE pegrid
	390	USE transpose_indices
	391
	392	IMPLICIT NONE
	393
	394	INTEGER :: i, j, k, l, m, zs
	395
[1003]	396	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
	397	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
	398	f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
[164]	399	work(nnxnnynnz)
[1]	400
	401	#if defined( __parallel )
	402
	403	!
	404	!-- Rearrange indices of input array in order to make data to be send
	405	!-- by MPI contiguous
[683]	406	!$OMP PARALLEL PRIVATE ( i, j, k )
	407	!$OMP DO
[1003]	408	DO j = 0, ny
	409	DO k = nzb_y, nzt_y
	410	DO i = nxl_y, nxr_y
[164]	411	f_inv(i,k,j) = f_in(j,i,k)
[1]	412	ENDDO
	413	ENDDO
	414	ENDDO
[683]	415	!$OMP END PARALLEL
[1]	416
	417	!
	418	!-- Move data to different array, because memory location of work1 is
	419	!-- needed further below (work1 = work2).
	420	!-- If the PE grid is one-dimensional along y, only local reordering
	421	!-- of the data is necessary and no transposition has to be done.
	422	IF ( pdims(1) == 1 ) THEN
[683]	423	!$OMP PARALLEL PRIVATE ( i, j, k )
	424	!$OMP DO
[1003]	425	DO j = 0, ny
	426	DO k = nzb_y, nzt_y
	427	DO i = nxl_y, nxr_y
[164]	428	f_out(i,j,k) = f_inv(i,k,j)
[1]	429	ENDDO
	430	ENDDO
	431	ENDDO
[683]	432	!$OMP END PARALLEL
[1]	433	RETURN
	434	ENDIF
	435
	436	!
	437	!-- Transpose array
	438	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	439	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	440	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
[164]	441	work(1), sendrecvcount_yz, MPI_REAL, &
[1]	442	comm1dx, ierr )
	443	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	444
	445	!
	446	!-- Reorder transposed array
[683]	447	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
	448	!$OMP DO
[1]	449	DO l = 0, pdims(1) - 1
[1003]	450	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	451	( nxr_z - nxl_z + 1 )
	452	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	453	DO j = nys_z, nyn_z
	454	DO k = zs, zs + nzt_y - nzb_y
	455	DO i = nxl_z, nxr_z
[1]	456	m = m + 1
[164]	457	f_out(i,j,k) = work(m)
[1]	458	ENDDO
	459	ENDDO
	460	ENDDO
	461	ENDDO
[683]	462	!$OMP END PARALLEL
[1]	463
	464	#endif
	465
	466	END SUBROUTINE transpose_yz
	467
	468
[164]	469	SUBROUTINE transpose_zx( f_in, work, f_out )
[1]	470
	471	!------------------------------------------------------------------------------!
	472	! Description:
	473	! ------------
	474	! Transposition of input array (f_in) from z to x. For the input array, all
	475	! elements along z reside on the same PE, while after transposition, all
	476	! elements along x reside on the same PE.
	477	!------------------------------------------------------------------------------!
	478
	479	USE cpulog
	480	USE indices
	481	USE interfaces
	482	USE pegrid
	483	USE transpose_indices
	484
	485	IMPLICIT NONE
	486
	487	INTEGER :: i, j, k, l, m, xs
	488
[1003]	489	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
	490	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
[164]	491	work(nnxnnynnz)
[1]	492
	493	#if defined( __parallel )
	494
	495	!
	496	!-- Rearrange indices of input array in order to make data to be send
	497	!-- by MPI contiguous
[683]	498	!$OMP PARALLEL PRIVATE ( i, j, k )
	499	!$OMP DO
[1003]	500	DO k = 1,nz
	501	DO i = nxl, nxr
	502	DO j = nys, nyn
[164]	503	f_inv(j,i,k) = f_in(k,j,i)
[1]	504	ENDDO
	505	ENDDO
	506	ENDDO
[683]	507	!$OMP END PARALLEL
[1]	508
	509	!
	510	!-- Move data to different array, because memory location of work1 is
	511	!-- needed further below (work1 = work2).
	512	!-- If the PE grid is one-dimensional along y, only local reordering
	513	!-- of the data is necessary and no transposition has to be done.
	514	IF ( pdims(1) == 1 ) THEN
[683]	515	!$OMP PARALLEL PRIVATE ( i, j, k )
	516	!$OMP DO
[1003]	517	DO k = 1, nz
	518	DO i = nxl, nxr
	519	DO j = nys, nyn
[164]	520	f_out(i,j,k) = f_inv(j,i,k)
[1]	521	ENDDO
	522	ENDDO
	523	ENDDO
[683]	524	!$OMP END PARALLEL
[1]	525	RETURN
	526	ENDIF
	527
	528	!
	529	!-- Transpose array
	530	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	531	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	532	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
	533	work(1), sendrecvcount_zx, MPI_REAL, &
[1]	534	comm1dx, ierr )
	535	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	536
	537	!
	538	!-- Reorder transposed array
[683]	539	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
	540	!$OMP DO
[1]	541	DO l = 0, pdims(1) - 1
[1003]	542	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
[1]	543	xs = 0 + l * nnx
[1003]	544	DO k = nzb_x, nzt_x
[164]	545	DO i = xs, xs + nnx - 1
[1003]	546	DO j = nys_x, nyn_x
[1]	547	m = m + 1
[164]	548	f_out(i,j,k) = work(m)
[1]	549	ENDDO
	550	ENDDO
	551	ENDDO
	552	ENDDO
[683]	553	!$OMP END PARALLEL
[1]	554
	555	#endif
	556
	557	END SUBROUTINE transpose_zx
	558
	559
[164]	560	SUBROUTINE transpose_zy( f_in, work, f_out )
[1]	561
	562	!------------------------------------------------------------------------------!
	563	! Description:
	564	! ------------
	565	! Transposition of input array (f_in) from z to y. For the input array, all
	566	! elements along z reside on the same PE, while after transposition, all
	567	! elements along y reside on the same PE.
	568	!------------------------------------------------------------------------------!
	569
	570	USE cpulog
	571	USE indices
	572	USE interfaces
	573	USE pegrid
	574	USE transpose_indices
	575
	576	IMPLICIT NONE
	577
	578	INTEGER :: i, j, k, l, m, zs
	579
[1003]	580	REAL :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
	581	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
	582	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
[164]	583	work(nnxnnynnz)
[1]	584
	585	#if defined( __parallel )
	586
	587	!
	588	!-- If the PE grid is one-dimensional along y, the array has only to be
	589	!-- reordered locally and therefore no transposition has to be done.
	590	IF ( pdims(1) /= 1 ) THEN
	591	!
	592	!-- Reorder input array for transposition
[683]	593	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
	594	!$OMP DO
[1]	595	DO l = 0, pdims(1) - 1
[1003]	596	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	597	( nxr_z - nxl_z + 1 )
	598	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	599	DO j = nys_z, nyn_z
	600	DO k = zs, zs + nzt_y - nzb_y
	601	DO i = nxl_z, nxr_z
[1]	602	m = m + 1
[164]	603	work(m) = f_in(i,j,k)
[1]	604	ENDDO
	605	ENDDO
	606	ENDDO
	607	ENDDO
[683]	608	!$OMP END PARALLEL
[1]	609
	610	!
	611	!-- Transpose array
	612	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	613	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	614	CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, &
[1]	615	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
	616	comm1dx, ierr )
	617	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	618
	619	!
	620	!-- Reorder transposed array in a way that the y index is in first position
[683]	621	!$OMP PARALLEL PRIVATE ( i, j, k )
	622	!$OMP DO
[1003]	623	DO j = 0, ny
	624	DO k = nzb_y, nzt_y
	625	DO i = nxl_y, nxr_y
[164]	626	f_out(j,i,k) = f_inv(i,k,j)
[1]	627	ENDDO
	628	ENDDO
	629	ENDDO
[683]	630	!$OMP END PARALLEL
[1]	631	ELSE
	632	!
	633	!-- Reorder the array in a way that the y index is in first position
[683]	634	!$OMP PARALLEL PRIVATE ( i, j, k )
	635	!$OMP DO
[1003]	636	DO k = nzb_y, nzt_y
	637	DO j = 0, ny
	638	DO i = nxl_y, nxr_y
[164]	639	f_inv(i,k,j) = f_in(i,j,k)
	640	ENDDO
	641	ENDDO
	642	ENDDO
[683]	643	!$OMP END PARALLEL
[164]	644	!
	645	!-- Move data to output array
[683]	646	!$OMP PARALLEL PRIVATE ( i, j, k )
	647	!$OMP DO
[1003]	648	DO k = nzb_y, nzt_y
	649	DO i = nxl_y, nxr_y
	650	DO j = 0, ny
[164]	651	f_out(j,i,k) = f_inv(i,k,j)
[1]	652	ENDDO
	653	ENDDO
	654	ENDDO
[683]	655	!$OMP END PARALLEL
[164]	656
[1]	657	ENDIF
	658
	659	#endif
	660
	661	END SUBROUTINE transpose_zy
	662
	663
[164]	664	SUBROUTINE transpose_zyd( f_in, work, f_out )
[1]	665
	666	!------------------------------------------------------------------------------!
	667	! Description:
	668	! ------------
	669	! Transposition of input array (f_in) from z to y. For the input array, all
	670	! elements along z reside on the same PE, while after transposition, all
	671	! elements along y reside on the same PE.
	672	! This is a direct transposition for arrays with indices in regular order
	673	! (k,j,i) (cf. transpose_zy).
	674	!------------------------------------------------------------------------------!
	675
	676	USE cpulog
	677	USE indices
	678	USE interfaces
	679	USE pegrid
	680	USE transpose_indices
	681
	682	IMPLICIT NONE
	683
	684	INTEGER :: i, j, k, l, m, ys
	685
[1003]	686	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
	687	f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd), &
[164]	688	work(nnxnnynnz)
[1]	689
	690	#if defined( __parallel )
	691
	692	!
	693	!-- Rearrange indices of input array in order to make data to be send
	694	!-- by MPI contiguous
[1003]	695	DO i = nxl, nxr
	696	DO j = nys, nyn
	697	DO k = 1, nz
[164]	698	f_inv(j,i,k) = f_in(k,j,i)
[1]	699	ENDDO
	700	ENDDO
	701	ENDDO
	702
	703	!
	704	!-- Move data to different array, because memory location of work1 is
	705	!-- needed further below (work1 = work2).
	706	!-- If the PE grid is one-dimensional along x, only local reordering
	707	!-- of the data is necessary and no transposition has to be done.
	708	IF ( pdims(2) == 1 ) THEN
[1003]	709	DO k = 1, nz
	710	DO i = nxl, nxr
	711	DO j = nys, nyn
[164]	712	f_out(j,i,k) = f_inv(j,i,k)
[1]	713	ENDDO
	714	ENDDO
	715	ENDDO
	716	RETURN
	717	ENDIF
	718
	719	!
	720	!-- Transpose array
	721	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	722	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	723	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
[164]	724	work(1), sendrecvcount_zyd, MPI_REAL, &
[1]	725	comm1dy, ierr )
	726	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	727
	728	!
	729	!-- Reorder transposed array
	730	m = 0
	731	DO l = 0, pdims(2) - 1
	732	ys = 0 + l * nny
[1003]	733	DO k = nzb_yd, nzt_yd
	734	DO i = nxl_yd, nxr_yd
[1]	735	DO j = ys, ys + nny - 1
	736	m = m + 1
[164]	737	f_out(j,i,k) = work(m)
[1]	738	ENDDO
	739	ENDDO
	740	ENDDO
	741	ENDDO
	742
	743	#endif
	744
	745	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |