Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 1099

Last change on this file since 1099 was 1093, checked in by raasch, 12 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 20.7 KB

Rev	Line
[164]	1	SUBROUTINE transpose_xy( f_in, work, f_out )
[1]	2
[1036]	3	!--------------------------------------------------------------------------------!
	4	! This file is part of PALM.
	5	!
	6	! PALM is free software: you can redistribute it and/or modify it under the terms
	7	! of the GNU General Public License as published by the Free Software Foundation,
	8	! either version 3 of the License, or (at your option) any later version.
	9	!
	10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
	11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
	12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
	13	!
	14	! You should have received a copy of the GNU General Public License along with
	15	! PALM. If not, see <http://www.gnu.org/licenses/>.
	16	!
	17	! Copyright 1997-2012 Leibniz University Hannover
	18	!--------------------------------------------------------------------------------!
	19	!
[484]	20	! Current revisions:
[1]	21	! -----------------
[1093]	22	!
[198]	23	!
	24	! Former revisions:
	25	! -----------------
	26	! $Id: transpose.f90 1093 2013-02-02 12:58:49Z raasch $
	27	!
[1093]	28	! 1092 2013-02-02 11:24:22Z raasch
	29	! unused variables removed
	30	!
[1037]	31	! 1036 2012-10-22 13:43:42Z raasch
	32	! code put under GPL (PALM 3.9)
	33	!
[1004]	34	! 1003 2012-09-14 14:35:53Z raasch
	35	! indices nxa, nya, etc. replaced by nx, ny, etc.
	36	!
[684]	37	! 683 2011-02-09 14:25:15Z raasch
	38	! openMP parallelization of transpositions for 2d-domain-decomposition
	39	!
[623]	40	! 622 2010-12-10 08:08:13Z raasch
	41	! optional barriers included in order to speed up collective operations
	42	!
[198]	43	! 164 2008-05-15 08:46:15Z raasch
[164]	44	! f_inv changed from subroutine argument to automatic array in order to do
	45	! re-ordering from f_in to f_inv in one step, one array work is needed instead
	46	! of work1 and work2
[1]	47	!
[198]	48	! February 2007
[3]	49	! RCS Log replace by Id keyword, revision history cleaned up
	50	!
[1]	51	! Revision 1.2 2004/04/30 13:12:17 raasch
	52	! Switched from mpi_alltoallv to the simpler mpi_alltoall,
	53	! all former transpose-routine files collected in this file, enlarged
	54	! transposition arrays introduced
	55	!
	56	! Revision 1.1 2004/04/30 13:08:16 raasch
	57	! Initial revision (collection of former routines transpose_xy, transpose_xz,
	58	! transpose_yx, transpose_yz, transpose_zx, transpose_zy)
	59	!
	60	! Revision 1.1 1997/07/24 11:25:18 raasch
	61	! Initial revision
	62	!
	63	!
	64	! Description:
	65	! ------------
	66	! Transposition of input array (f_in) from x to y. For the input array, all
	67	! elements along x reside on the same PE, while after transposition, all
	68	! elements along y reside on the same PE.
	69	!------------------------------------------------------------------------------!
	70
	71	USE cpulog
	72	USE indices
	73	USE interfaces
	74	USE pegrid
	75	USE transpose_indices
	76
	77	IMPLICIT NONE
	78
	79	INTEGER :: i, j, k, l, m, ys
	80
[1003]	81	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
	82	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
	83	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
[164]	84	work(nnxnnynnz)
[1]	85
	86	#if defined( __parallel )
	87
	88	!
	89	!-- Rearrange indices of input array in order to make data to be send
	90	!-- by MPI contiguous
[683]	91	!$OMP PARALLEL PRIVATE ( i, j, k )
	92	!$OMP DO
[1003]	93	DO i = 0, nx
	94	DO k = nzb_x, nzt_x
	95	DO j = nys_x, nyn_x
[164]	96	f_inv(j,k,i) = f_in(i,j,k)
[1]	97	ENDDO
	98	ENDDO
	99	ENDDO
[683]	100	!$OMP END PARALLEL
[1]	101
	102	!
	103	!-- Transpose array
	104	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	105	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	106	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
[164]	107	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	108	comm1dy, ierr )
	109	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	110
	111	!
	112	!-- Reorder transposed array
[683]	113	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
	114	!$OMP DO
[1]	115	DO l = 0, pdims(2) - 1
[1003]	116	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	117	( nyn_x - nys_x + 1 )
	118	ys = 0 + l * ( nyn_x - nys_x + 1 )
	119	DO i = nxl_y, nxr_y
	120	DO k = nzb_y, nzt_y
	121	DO j = ys, ys + nyn_x - nys_x
[1]	122	m = m + 1
[164]	123	f_out(j,i,k) = work(m)
[1]	124	ENDDO
	125	ENDDO
	126	ENDDO
	127	ENDDO
[683]	128	!$OMP END PARALLEL
[1]	129
	130	#endif
	131
	132	END SUBROUTINE transpose_xy
	133
	134
[164]	135	SUBROUTINE transpose_xz( f_in, work, f_out )
[1]	136
	137	!------------------------------------------------------------------------------!
	138	! Description:
	139	! ------------
	140	! Transposition of input array (f_in) from x to z. For the input array, all
	141	! elements along x reside on the same PE, while after transposition, all
	142	! elements along z reside on the same PE.
	143	!------------------------------------------------------------------------------!
	144
	145	USE cpulog
	146	USE indices
	147	USE interfaces
	148	USE pegrid
	149	USE transpose_indices
	150
	151	IMPLICIT NONE
	152
	153	INTEGER :: i, j, k, l, m, xs
	154
[1003]	155	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
	156	f_inv(nys:nyn,nxl:nxr,1:nz), &
	157	f_out(1:nz,nys:nyn,nxl:nxr), &
[164]	158	work(nnxnnynnz)
[1]	159
	160	#if defined( __parallel )
	161
	162	!
	163	!-- If the PE grid is one-dimensional along y, the array has only to be
	164	!-- reordered locally and therefore no transposition has to be done.
	165	IF ( pdims(1) /= 1 ) THEN
	166	!
	167	!-- Reorder input array for transposition
[683]	168	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
	169	!$OMP DO
[1]	170	DO l = 0, pdims(1) - 1
[1003]	171	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
[1]	172	xs = 0 + l * nnx
[1003]	173	DO k = nzb_x, nzt_x
[164]	174	DO i = xs, xs + nnx - 1
[1003]	175	DO j = nys_x, nyn_x
[1]	176	m = m + 1
[164]	177	work(m) = f_in(i,j,k)
[1]	178	ENDDO
	179	ENDDO
	180	ENDDO
	181	ENDDO
[683]	182	!$OMP END PARALLEL
[1]	183
	184	!
	185	!-- Transpose array
	186	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	187	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	188	CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, &
	189	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
[1]	190	comm1dx, ierr )
	191	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	192
	193	!
	194	!-- Reorder transposed array in a way that the z index is in first position
[683]	195	!$OMP PARALLEL PRIVATE ( i, j, k )
	196	!$OMP DO
[1003]	197	DO k = 1, nz
	198	DO i = nxl, nxr
	199	DO j = nys, nyn
[164]	200	f_out(k,j,i) = f_inv(j,i,k)
[1]	201	ENDDO
	202	ENDDO
	203	ENDDO
[683]	204	!$OMP END PARALLEL
[1]	205	ELSE
	206	!
	207	!-- Reorder the array in a way that the z index is in first position
[683]	208	!$OMP PARALLEL PRIVATE ( i, j, k )
	209	!$OMP DO
[1003]	210	DO i = nxl, nxr
	211	DO j = nys, nyn
	212	DO k = 1, nz
[164]	213	f_inv(j,i,k) = f_in(i,j,k)
[1]	214	ENDDO
	215	ENDDO
	216	ENDDO
[683]	217	!$OMP END PARALLEL
[1]	218
[683]	219	!$OMP PARALLEL PRIVATE ( i, j, k )
	220	!$OMP DO
[1003]	221	DO k = 1, nz
	222	DO i = nxl, nxr
	223	DO j = nys, nyn
[164]	224	f_out(k,j,i) = f_inv(j,i,k)
	225	ENDDO
[1]	226	ENDDO
	227	ENDDO
[683]	228	!$OMP END PARALLEL
[1]	229
[164]	230	ENDIF
	231
	232
[1]	233	#endif
	234
	235	END SUBROUTINE transpose_xz
	236
	237
[164]	238	SUBROUTINE transpose_yx( f_in, work, f_out )
[1]	239
	240	!------------------------------------------------------------------------------!
	241	! Description:
	242	! ------------
	243	! Transposition of input array (f_in) from y to x. For the input array, all
	244	! elements along y reside on the same PE, while after transposition, all
	245	! elements along x reside on the same PE.
	246	!------------------------------------------------------------------------------!
	247
	248	USE cpulog
	249	USE indices
	250	USE interfaces
	251	USE pegrid
	252	USE transpose_indices
	253
	254	IMPLICIT NONE
	255
	256	INTEGER :: i, j, k, l, m, ys
	257
[1003]	258	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
	259	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
	260	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
[164]	261	work(nnxnnynnz)
[1]	262
	263	#if defined( __parallel )
	264
	265	!
	266	!-- Reorder input array for transposition
[683]	267	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
	268	!$OMP DO
[1]	269	DO l = 0, pdims(2) - 1
[1003]	270	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	271	( nyn_x - nys_x + 1 )
	272	ys = 0 + l * ( nyn_x - nys_x + 1 )
	273	DO i = nxl_y, nxr_y
	274	DO k = nzb_y, nzt_y
	275	DO j = ys, ys + nyn_x - nys_x
[1]	276	m = m + 1
[164]	277	work(m) = f_in(j,i,k)
[1]	278	ENDDO
	279	ENDDO
	280	ENDDO
	281	ENDDO
[683]	282	!$OMP END PARALLEL
[1]	283
	284	!
	285	!-- Transpose array
	286	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	287	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	288	CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, &
[1]	289	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
	290	comm1dy, ierr )
	291	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	292
	293	!
	294	!-- Reorder transposed array in a way that the x index is in first position
[683]	295	!$OMP PARALLEL PRIVATE ( i, j, k )
	296	!$OMP DO
[1003]	297	DO i = 0, nx
	298	DO k = nzb_x, nzt_x
	299	DO j = nys_x, nyn_x
[164]	300	f_out(i,j,k) = f_inv(j,k,i)
[1]	301	ENDDO
	302	ENDDO
	303	ENDDO
[683]	304	!$OMP END PARALLEL
[1]	305
	306	#endif
	307
	308	END SUBROUTINE transpose_yx
	309
	310
[164]	311	SUBROUTINE transpose_yxd( f_in, work, f_out )
[1]	312
	313	!------------------------------------------------------------------------------!
	314	! Description:
	315	! ------------
	316	! Transposition of input array (f_in) from y to x. For the input array, all
	317	! elements along y reside on the same PE, while after transposition, all
	318	! elements along x reside on the same PE.
	319	! This is a direct transposition for arrays with indices in regular order
	320	! (k,j,i) (cf. transpose_yx).
	321	!------------------------------------------------------------------------------!
	322
	323	USE cpulog
	324	USE indices
	325	USE interfaces
	326	USE pegrid
	327	USE transpose_indices
	328
	329	IMPLICIT NONE
	330
[1092]	331	INTEGER :: i, j, k, l, m, xs
[1]	332
[1003]	333	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nxl:nxr,1:nz,nys:nyn), &
	334	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
[164]	335	work(nnxnnynnz)
[1]	336
	337	#if defined( __parallel )
	338
	339	!
	340	!-- Rearrange indices of input array in order to make data to be send
	341	!-- by MPI contiguous
[1003]	342	DO k = 1, nz
	343	DO j = nys, nyn
	344	DO i = nxl, nxr
[164]	345	f_inv(i,k,j) = f_in(k,j,i)
[1]	346	ENDDO
	347	ENDDO
	348	ENDDO
	349
	350	!
	351	!-- Transpose array
	352	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	353	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	354	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
[164]	355	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	356	comm1dx, ierr )
	357	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	358
	359	!
	360	!-- Reorder transposed array
	361	m = 0
	362	DO l = 0, pdims(1) - 1
	363	xs = 0 + l * nnx
[1003]	364	DO j = nys_x, nyn_x
	365	DO k = 1, nz
[1]	366	DO i = xs, xs + nnx - 1
	367	m = m + 1
[164]	368	f_out(i,j,k) = work(m)
[1]	369	ENDDO
	370	ENDDO
	371	ENDDO
	372	ENDDO
	373
	374	#endif
	375
	376	END SUBROUTINE transpose_yxd
	377
	378
[164]	379	SUBROUTINE transpose_yz( f_in, work, f_out )
[1]	380
	381	!------------------------------------------------------------------------------!
	382	! Description:
	383	! ------------
	384	! Transposition of input array (f_in) from y to z. For the input array, all
	385	! elements along y reside on the same PE, while after transposition, all
	386	! elements along z reside on the same PE.
	387	!------------------------------------------------------------------------------!
	388
	389	USE cpulog
	390	USE indices
	391	USE interfaces
	392	USE pegrid
	393	USE transpose_indices
	394
	395	IMPLICIT NONE
	396
	397	INTEGER :: i, j, k, l, m, zs
	398
[1003]	399	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
	400	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
	401	f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
[164]	402	work(nnxnnynnz)
[1]	403
	404	#if defined( __parallel )
	405
	406	!
	407	!-- Rearrange indices of input array in order to make data to be send
	408	!-- by MPI contiguous
[683]	409	!$OMP PARALLEL PRIVATE ( i, j, k )
	410	!$OMP DO
[1003]	411	DO j = 0, ny
	412	DO k = nzb_y, nzt_y
	413	DO i = nxl_y, nxr_y
[164]	414	f_inv(i,k,j) = f_in(j,i,k)
[1]	415	ENDDO
	416	ENDDO
	417	ENDDO
[683]	418	!$OMP END PARALLEL
[1]	419
	420	!
	421	!-- Move data to different array, because memory location of work1 is
	422	!-- needed further below (work1 = work2).
	423	!-- If the PE grid is one-dimensional along y, only local reordering
	424	!-- of the data is necessary and no transposition has to be done.
	425	IF ( pdims(1) == 1 ) THEN
[683]	426	!$OMP PARALLEL PRIVATE ( i, j, k )
	427	!$OMP DO
[1003]	428	DO j = 0, ny
	429	DO k = nzb_y, nzt_y
	430	DO i = nxl_y, nxr_y
[164]	431	f_out(i,j,k) = f_inv(i,k,j)
[1]	432	ENDDO
	433	ENDDO
	434	ENDDO
[683]	435	!$OMP END PARALLEL
[1]	436	RETURN
	437	ENDIF
	438
	439	!
	440	!-- Transpose array
	441	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	442	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	443	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
[164]	444	work(1), sendrecvcount_yz, MPI_REAL, &
[1]	445	comm1dx, ierr )
	446	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	447
	448	!
	449	!-- Reorder transposed array
[683]	450	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
	451	!$OMP DO
[1]	452	DO l = 0, pdims(1) - 1
[1003]	453	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	454	( nxr_z - nxl_z + 1 )
	455	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	456	DO j = nys_z, nyn_z
	457	DO k = zs, zs + nzt_y - nzb_y
	458	DO i = nxl_z, nxr_z
[1]	459	m = m + 1
[164]	460	f_out(i,j,k) = work(m)
[1]	461	ENDDO
	462	ENDDO
	463	ENDDO
	464	ENDDO
[683]	465	!$OMP END PARALLEL
[1]	466
	467	#endif
	468
	469	END SUBROUTINE transpose_yz
	470
	471
[164]	472	SUBROUTINE transpose_zx( f_in, work, f_out )
[1]	473
	474	!------------------------------------------------------------------------------!
	475	! Description:
	476	! ------------
	477	! Transposition of input array (f_in) from z to x. For the input array, all
	478	! elements along z reside on the same PE, while after transposition, all
	479	! elements along x reside on the same PE.
	480	!------------------------------------------------------------------------------!
	481
	482	USE cpulog
	483	USE indices
	484	USE interfaces
	485	USE pegrid
	486	USE transpose_indices
	487
	488	IMPLICIT NONE
	489
	490	INTEGER :: i, j, k, l, m, xs
	491
[1003]	492	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
	493	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
[164]	494	work(nnxnnynnz)
[1]	495
	496	#if defined( __parallel )
	497
	498	!
	499	!-- Rearrange indices of input array in order to make data to be send
	500	!-- by MPI contiguous
[683]	501	!$OMP PARALLEL PRIVATE ( i, j, k )
	502	!$OMP DO
[1003]	503	DO k = 1,nz
	504	DO i = nxl, nxr
	505	DO j = nys, nyn
[164]	506	f_inv(j,i,k) = f_in(k,j,i)
[1]	507	ENDDO
	508	ENDDO
	509	ENDDO
[683]	510	!$OMP END PARALLEL
[1]	511
	512	!
	513	!-- Move data to different array, because memory location of work1 is
	514	!-- needed further below (work1 = work2).
	515	!-- If the PE grid is one-dimensional along y, only local reordering
	516	!-- of the data is necessary and no transposition has to be done.
	517	IF ( pdims(1) == 1 ) THEN
[683]	518	!$OMP PARALLEL PRIVATE ( i, j, k )
	519	!$OMP DO
[1003]	520	DO k = 1, nz
	521	DO i = nxl, nxr
	522	DO j = nys, nyn
[164]	523	f_out(i,j,k) = f_inv(j,i,k)
[1]	524	ENDDO
	525	ENDDO
	526	ENDDO
[683]	527	!$OMP END PARALLEL
[1]	528	RETURN
	529	ENDIF
	530
	531	!
	532	!-- Transpose array
	533	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	534	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	535	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
	536	work(1), sendrecvcount_zx, MPI_REAL, &
[1]	537	comm1dx, ierr )
	538	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	539
	540	!
	541	!-- Reorder transposed array
[683]	542	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
	543	!$OMP DO
[1]	544	DO l = 0, pdims(1) - 1
[1003]	545	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
[1]	546	xs = 0 + l * nnx
[1003]	547	DO k = nzb_x, nzt_x
[164]	548	DO i = xs, xs + nnx - 1
[1003]	549	DO j = nys_x, nyn_x
[1]	550	m = m + 1
[164]	551	f_out(i,j,k) = work(m)
[1]	552	ENDDO
	553	ENDDO
	554	ENDDO
	555	ENDDO
[683]	556	!$OMP END PARALLEL
[1]	557
	558	#endif
	559
	560	END SUBROUTINE transpose_zx
	561
	562
[164]	563	SUBROUTINE transpose_zy( f_in, work, f_out )
[1]	564
	565	!------------------------------------------------------------------------------!
	566	! Description:
	567	! ------------
	568	! Transposition of input array (f_in) from z to y. For the input array, all
	569	! elements along z reside on the same PE, while after transposition, all
	570	! elements along y reside on the same PE.
	571	!------------------------------------------------------------------------------!
	572
	573	USE cpulog
	574	USE indices
	575	USE interfaces
	576	USE pegrid
	577	USE transpose_indices
	578
	579	IMPLICIT NONE
	580
	581	INTEGER :: i, j, k, l, m, zs
	582
[1003]	583	REAL :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
	584	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
	585	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
[164]	586	work(nnxnnynnz)
[1]	587
	588	#if defined( __parallel )
	589
	590	!
	591	!-- If the PE grid is one-dimensional along y, the array has only to be
	592	!-- reordered locally and therefore no transposition has to be done.
	593	IF ( pdims(1) /= 1 ) THEN
	594	!
	595	!-- Reorder input array for transposition
[683]	596	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
	597	!$OMP DO
[1]	598	DO l = 0, pdims(1) - 1
[1003]	599	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	600	( nxr_z - nxl_z + 1 )
	601	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	602	DO j = nys_z, nyn_z
	603	DO k = zs, zs + nzt_y - nzb_y
	604	DO i = nxl_z, nxr_z
[1]	605	m = m + 1
[164]	606	work(m) = f_in(i,j,k)
[1]	607	ENDDO
	608	ENDDO
	609	ENDDO
	610	ENDDO
[683]	611	!$OMP END PARALLEL
[1]	612
	613	!
	614	!-- Transpose array
	615	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	616	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	617	CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, &
[1]	618	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
	619	comm1dx, ierr )
	620	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	621
	622	!
	623	!-- Reorder transposed array in a way that the y index is in first position
[683]	624	!$OMP PARALLEL PRIVATE ( i, j, k )
	625	!$OMP DO
[1003]	626	DO j = 0, ny
	627	DO k = nzb_y, nzt_y
	628	DO i = nxl_y, nxr_y
[164]	629	f_out(j,i,k) = f_inv(i,k,j)
[1]	630	ENDDO
	631	ENDDO
	632	ENDDO
[683]	633	!$OMP END PARALLEL
[1]	634	ELSE
	635	!
	636	!-- Reorder the array in a way that the y index is in first position
[683]	637	!$OMP PARALLEL PRIVATE ( i, j, k )
	638	!$OMP DO
[1003]	639	DO k = nzb_y, nzt_y
	640	DO j = 0, ny
	641	DO i = nxl_y, nxr_y
[164]	642	f_inv(i,k,j) = f_in(i,j,k)
	643	ENDDO
	644	ENDDO
	645	ENDDO
[683]	646	!$OMP END PARALLEL
[164]	647	!
	648	!-- Move data to output array
[683]	649	!$OMP PARALLEL PRIVATE ( i, j, k )
	650	!$OMP DO
[1003]	651	DO k = nzb_y, nzt_y
	652	DO i = nxl_y, nxr_y
	653	DO j = 0, ny
[164]	654	f_out(j,i,k) = f_inv(i,k,j)
[1]	655	ENDDO
	656	ENDDO
	657	ENDDO
[683]	658	!$OMP END PARALLEL
[164]	659
[1]	660	ENDIF
	661
	662	#endif
	663
	664	END SUBROUTINE transpose_zy
	665
	666
[164]	667	SUBROUTINE transpose_zyd( f_in, work, f_out )
[1]	668
	669	!------------------------------------------------------------------------------!
	670	! Description:
	671	! ------------
	672	! Transposition of input array (f_in) from z to y. For the input array, all
	673	! elements along z reside on the same PE, while after transposition, all
	674	! elements along y reside on the same PE.
	675	! This is a direct transposition for arrays with indices in regular order
	676	! (k,j,i) (cf. transpose_zy).
	677	!------------------------------------------------------------------------------!
	678
	679	USE cpulog
	680	USE indices
	681	USE interfaces
	682	USE pegrid
	683	USE transpose_indices
	684
	685	IMPLICIT NONE
	686
	687	INTEGER :: i, j, k, l, m, ys
	688
[1003]	689	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
	690	f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd), &
[164]	691	work(nnxnnynnz)
[1]	692
	693	#if defined( __parallel )
	694
	695	!
	696	!-- Rearrange indices of input array in order to make data to be send
	697	!-- by MPI contiguous
[1003]	698	DO i = nxl, nxr
	699	DO j = nys, nyn
	700	DO k = 1, nz
[164]	701	f_inv(j,i,k) = f_in(k,j,i)
[1]	702	ENDDO
	703	ENDDO
	704	ENDDO
	705
	706	!
	707	!-- Move data to different array, because memory location of work1 is
	708	!-- needed further below (work1 = work2).
	709	!-- If the PE grid is one-dimensional along x, only local reordering
	710	!-- of the data is necessary and no transposition has to be done.
	711	IF ( pdims(2) == 1 ) THEN
[1003]	712	DO k = 1, nz
	713	DO i = nxl, nxr
	714	DO j = nys, nyn
[164]	715	f_out(j,i,k) = f_inv(j,i,k)
[1]	716	ENDDO
	717	ENDDO
	718	ENDDO
	719	RETURN
	720	ENDIF
	721
	722	!
	723	!-- Transpose array
	724	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	725	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	726	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
[164]	727	work(1), sendrecvcount_zyd, MPI_REAL, &
[1]	728	comm1dy, ierr )
	729	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	730
	731	!
	732	!-- Reorder transposed array
	733	m = 0
	734	DO l = 0, pdims(2) - 1
	735	ys = 0 + l * nny
[1003]	736	DO k = nzb_yd, nzt_yd
	737	DO i = nxl_yd, nxr_yd
[1]	738	DO j = ys, ys + nny - 1
	739	m = m + 1
[164]	740	f_out(j,i,k) = work(m)
[1]	741	ENDDO
	742	ENDDO
	743	ENDDO
	744	ENDDO
	745
	746	#endif
	747
	748	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |