Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 1004

Last change on this file since 1004 was 1004, checked in by raasch, 12 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 19.9 KB

Rev	Line
[164]	1	SUBROUTINE transpose_xy( f_in, work, f_out )
[1]	2
	3	!------------------------------------------------------------------------------!
[484]	4	! Current revisions:
[1]	5	! -----------------
[198]	6	!
[1004]	7	!
[198]	8	! Former revisions:
	9	! -----------------
	10	! $Id: transpose.f90 1004 2012-09-14 14:56:50Z raasch $
	11	!
[1004]	12	! 1003 2012-09-14 14:35:53Z raasch
	13	! indices nxa, nya, etc. replaced by nx, ny, etc.
	14	!
[684]	15	! 683 2011-02-09 14:25:15Z raasch
	16	! openMP parallelization of transpositions for 2d-domain-decomposition
	17	!
[623]	18	! 622 2010-12-10 08:08:13Z raasch
	19	! optional barriers included in order to speed up collective operations
	20	!
[198]	21	! 164 2008-05-15 08:46:15Z raasch
[164]	22	! f_inv changed from subroutine argument to automatic array in order to do
	23	! re-ordering from f_in to f_inv in one step, one array work is needed instead
	24	! of work1 and work2
[1]	25	!
[198]	26	! February 2007
[3]	27	! RCS Log replace by Id keyword, revision history cleaned up
	28	!
[1]	29	! Revision 1.2 2004/04/30 13:12:17 raasch
	30	! Switched from mpi_alltoallv to the simpler mpi_alltoall,
	31	! all former transpose-routine files collected in this file, enlarged
	32	! transposition arrays introduced
	33	!
	34	! Revision 1.1 2004/04/30 13:08:16 raasch
	35	! Initial revision (collection of former routines transpose_xy, transpose_xz,
	36	! transpose_yx, transpose_yz, transpose_zx, transpose_zy)
	37	!
	38	! Revision 1.1 1997/07/24 11:25:18 raasch
	39	! Initial revision
	40	!
	41	!
	42	! Description:
	43	! ------------
	44	! Transposition of input array (f_in) from x to y. For the input array, all
	45	! elements along x reside on the same PE, while after transposition, all
	46	! elements along y reside on the same PE.
	47	!------------------------------------------------------------------------------!
	48
	49	USE cpulog
	50	USE indices
	51	USE interfaces
	52	USE pegrid
	53	USE transpose_indices
	54
	55	IMPLICIT NONE
	56
	57	INTEGER :: i, j, k, l, m, ys
	58
[1003]	59	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
	60	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
	61	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
[164]	62	work(nnxnnynnz)
[1]	63
	64	#if defined( __parallel )
	65
	66	!
	67	!-- Rearrange indices of input array in order to make data to be send
	68	!-- by MPI contiguous
[683]	69	!$OMP PARALLEL PRIVATE ( i, j, k )
	70	!$OMP DO
[1003]	71	DO i = 0, nx
	72	DO k = nzb_x, nzt_x
	73	DO j = nys_x, nyn_x
[164]	74	f_inv(j,k,i) = f_in(i,j,k)
[1]	75	ENDDO
	76	ENDDO
	77	ENDDO
[683]	78	!$OMP END PARALLEL
[1]	79
	80	!
	81	!-- Transpose array
	82	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	83	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	84	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
[164]	85	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	86	comm1dy, ierr )
	87	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	88
	89	!
	90	!-- Reorder transposed array
[683]	91	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
	92	!$OMP DO
[1]	93	DO l = 0, pdims(2) - 1
[1003]	94	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	95	( nyn_x - nys_x + 1 )
	96	ys = 0 + l * ( nyn_x - nys_x + 1 )
	97	DO i = nxl_y, nxr_y
	98	DO k = nzb_y, nzt_y
	99	DO j = ys, ys + nyn_x - nys_x
[1]	100	m = m + 1
[164]	101	f_out(j,i,k) = work(m)
[1]	102	ENDDO
	103	ENDDO
	104	ENDDO
	105	ENDDO
[683]	106	!$OMP END PARALLEL
[1]	107
	108	#endif
	109
	110	END SUBROUTINE transpose_xy
	111
	112
[164]	113	SUBROUTINE transpose_xz( f_in, work, f_out )
[1]	114
	115	!------------------------------------------------------------------------------!
	116	! Description:
	117	! ------------
	118	! Transposition of input array (f_in) from x to z. For the input array, all
	119	! elements along x reside on the same PE, while after transposition, all
	120	! elements along z reside on the same PE.
	121	!------------------------------------------------------------------------------!
	122
	123	USE cpulog
	124	USE indices
	125	USE interfaces
	126	USE pegrid
	127	USE transpose_indices
	128
	129	IMPLICIT NONE
	130
	131	INTEGER :: i, j, k, l, m, xs
	132
[1003]	133	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
	134	f_inv(nys:nyn,nxl:nxr,1:nz), &
	135	f_out(1:nz,nys:nyn,nxl:nxr), &
[164]	136	work(nnxnnynnz)
[1]	137
	138	#if defined( __parallel )
	139
	140	!
	141	!-- If the PE grid is one-dimensional along y, the array has only to be
	142	!-- reordered locally and therefore no transposition has to be done.
	143	IF ( pdims(1) /= 1 ) THEN
	144	!
	145	!-- Reorder input array for transposition
[683]	146	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
	147	!$OMP DO
[1]	148	DO l = 0, pdims(1) - 1
[1003]	149	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
[1]	150	xs = 0 + l * nnx
[1003]	151	DO k = nzb_x, nzt_x
[164]	152	DO i = xs, xs + nnx - 1
[1003]	153	DO j = nys_x, nyn_x
[1]	154	m = m + 1
[164]	155	work(m) = f_in(i,j,k)
[1]	156	ENDDO
	157	ENDDO
	158	ENDDO
	159	ENDDO
[683]	160	!$OMP END PARALLEL
[1]	161
	162	!
	163	!-- Transpose array
	164	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	165	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	166	CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, &
	167	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
[1]	168	comm1dx, ierr )
	169	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	170
	171	!
	172	!-- Reorder transposed array in a way that the z index is in first position
[683]	173	!$OMP PARALLEL PRIVATE ( i, j, k )
	174	!$OMP DO
[1003]	175	DO k = 1, nz
	176	DO i = nxl, nxr
	177	DO j = nys, nyn
[164]	178	f_out(k,j,i) = f_inv(j,i,k)
[1]	179	ENDDO
	180	ENDDO
	181	ENDDO
[683]	182	!$OMP END PARALLEL
[1]	183	ELSE
	184	!
	185	!-- Reorder the array in a way that the z index is in first position
[683]	186	!$OMP PARALLEL PRIVATE ( i, j, k )
	187	!$OMP DO
[1003]	188	DO i = nxl, nxr
	189	DO j = nys, nyn
	190	DO k = 1, nz
[164]	191	f_inv(j,i,k) = f_in(i,j,k)
[1]	192	ENDDO
	193	ENDDO
	194	ENDDO
[683]	195	!$OMP END PARALLEL
[1]	196
[683]	197	!$OMP PARALLEL PRIVATE ( i, j, k )
	198	!$OMP DO
[1003]	199	DO k = 1, nz
	200	DO i = nxl, nxr
	201	DO j = nys, nyn
[164]	202	f_out(k,j,i) = f_inv(j,i,k)
	203	ENDDO
[1]	204	ENDDO
	205	ENDDO
[683]	206	!$OMP END PARALLEL
[1]	207
[164]	208	ENDIF
	209
	210
[1]	211	#endif
	212
	213	END SUBROUTINE transpose_xz
	214
	215
[164]	216	SUBROUTINE transpose_yx( f_in, work, f_out )
[1]	217
	218	!------------------------------------------------------------------------------!
	219	! Description:
	220	! ------------
	221	! Transposition of input array (f_in) from y to x. For the input array, all
	222	! elements along y reside on the same PE, while after transposition, all
	223	! elements along x reside on the same PE.
	224	!------------------------------------------------------------------------------!
	225
	226	USE cpulog
	227	USE indices
	228	USE interfaces
	229	USE pegrid
	230	USE transpose_indices
	231
	232	IMPLICIT NONE
	233
	234	INTEGER :: i, j, k, l, m, ys
	235
[1003]	236	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
	237	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
	238	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
[164]	239	work(nnxnnynnz)
[1]	240
	241	#if defined( __parallel )
	242
	243	!
	244	!-- Reorder input array for transposition
[683]	245	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
	246	!$OMP DO
[1]	247	DO l = 0, pdims(2) - 1
[1003]	248	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	249	( nyn_x - nys_x + 1 )
	250	ys = 0 + l * ( nyn_x - nys_x + 1 )
	251	DO i = nxl_y, nxr_y
	252	DO k = nzb_y, nzt_y
	253	DO j = ys, ys + nyn_x - nys_x
[1]	254	m = m + 1
[164]	255	work(m) = f_in(j,i,k)
[1]	256	ENDDO
	257	ENDDO
	258	ENDDO
	259	ENDDO
[683]	260	!$OMP END PARALLEL
[1]	261
	262	!
	263	!-- Transpose array
	264	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	265	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	266	CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, &
[1]	267	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
	268	comm1dy, ierr )
	269	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	270
	271	!
	272	!-- Reorder transposed array in a way that the x index is in first position
[683]	273	!$OMP PARALLEL PRIVATE ( i, j, k )
	274	!$OMP DO
[1003]	275	DO i = 0, nx
	276	DO k = nzb_x, nzt_x
	277	DO j = nys_x, nyn_x
[164]	278	f_out(i,j,k) = f_inv(j,k,i)
[1]	279	ENDDO
	280	ENDDO
	281	ENDDO
[683]	282	!$OMP END PARALLEL
[1]	283
	284	#endif
	285
	286	END SUBROUTINE transpose_yx
	287
	288
[164]	289	SUBROUTINE transpose_yxd( f_in, work, f_out )
[1]	290
	291	!------------------------------------------------------------------------------!
	292	! Description:
	293	! ------------
	294	! Transposition of input array (f_in) from y to x. For the input array, all
	295	! elements along y reside on the same PE, while after transposition, all
	296	! elements along x reside on the same PE.
	297	! This is a direct transposition for arrays with indices in regular order
	298	! (k,j,i) (cf. transpose_yx).
	299	!------------------------------------------------------------------------------!
	300
	301	USE cpulog
	302	USE indices
	303	USE interfaces
	304	USE pegrid
	305	USE transpose_indices
	306
	307	IMPLICIT NONE
	308
	309	INTEGER :: i, j, k, l, m, recvcount_yx, sendcount_yx, xs
	310
[1003]	311	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nxl:nxr,1:nz,nys:nyn), &
	312	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
[164]	313	work(nnxnnynnz)
[1]	314
	315	#if defined( __parallel )
	316
	317	!
	318	!-- Rearrange indices of input array in order to make data to be send
	319	!-- by MPI contiguous
[1003]	320	DO k = 1, nz
	321	DO j = nys, nyn
	322	DO i = nxl, nxr
[164]	323	f_inv(i,k,j) = f_in(k,j,i)
[1]	324	ENDDO
	325	ENDDO
	326	ENDDO
	327
	328	!
	329	!-- Transpose array
	330	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	331	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	332	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
[164]	333	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	334	comm1dx, ierr )
	335	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	336
	337	!
	338	!-- Reorder transposed array
	339	m = 0
	340	DO l = 0, pdims(1) - 1
	341	xs = 0 + l * nnx
[1003]	342	DO j = nys_x, nyn_x
	343	DO k = 1, nz
[1]	344	DO i = xs, xs + nnx - 1
	345	m = m + 1
[164]	346	f_out(i,j,k) = work(m)
[1]	347	ENDDO
	348	ENDDO
	349	ENDDO
	350	ENDDO
	351
	352	#endif
	353
	354	END SUBROUTINE transpose_yxd
	355
	356
[164]	357	SUBROUTINE transpose_yz( f_in, work, f_out )
[1]	358
	359	!------------------------------------------------------------------------------!
	360	! Description:
	361	! ------------
	362	! Transposition of input array (f_in) from y to z. For the input array, all
	363	! elements along y reside on the same PE, while after transposition, all
	364	! elements along z reside on the same PE.
	365	!------------------------------------------------------------------------------!
	366
	367	USE cpulog
	368	USE indices
	369	USE interfaces
	370	USE pegrid
	371	USE transpose_indices
	372
	373	IMPLICIT NONE
	374
	375	INTEGER :: i, j, k, l, m, zs
	376
[1003]	377	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
	378	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
	379	f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
[164]	380	work(nnxnnynnz)
[1]	381
	382	#if defined( __parallel )
	383
	384	!
	385	!-- Rearrange indices of input array in order to make data to be send
	386	!-- by MPI contiguous
[683]	387	!$OMP PARALLEL PRIVATE ( i, j, k )
	388	!$OMP DO
[1003]	389	DO j = 0, ny
	390	DO k = nzb_y, nzt_y
	391	DO i = nxl_y, nxr_y
[164]	392	f_inv(i,k,j) = f_in(j,i,k)
[1]	393	ENDDO
	394	ENDDO
	395	ENDDO
[683]	396	!$OMP END PARALLEL
[1]	397
	398	!
	399	!-- Move data to different array, because memory location of work1 is
	400	!-- needed further below (work1 = work2).
	401	!-- If the PE grid is one-dimensional along y, only local reordering
	402	!-- of the data is necessary and no transposition has to be done.
	403	IF ( pdims(1) == 1 ) THEN
[683]	404	!$OMP PARALLEL PRIVATE ( i, j, k )
	405	!$OMP DO
[1003]	406	DO j = 0, ny
	407	DO k = nzb_y, nzt_y
	408	DO i = nxl_y, nxr_y
[164]	409	f_out(i,j,k) = f_inv(i,k,j)
[1]	410	ENDDO
	411	ENDDO
	412	ENDDO
[683]	413	!$OMP END PARALLEL
[1]	414	RETURN
	415	ENDIF
	416
	417	!
	418	!-- Transpose array
	419	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	420	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	421	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
[164]	422	work(1), sendrecvcount_yz, MPI_REAL, &
[1]	423	comm1dx, ierr )
	424	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	425
	426	!
	427	!-- Reorder transposed array
[683]	428	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
	429	!$OMP DO
[1]	430	DO l = 0, pdims(1) - 1
[1003]	431	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	432	( nxr_z - nxl_z + 1 )
	433	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	434	DO j = nys_z, nyn_z
	435	DO k = zs, zs + nzt_y - nzb_y
	436	DO i = nxl_z, nxr_z
[1]	437	m = m + 1
[164]	438	f_out(i,j,k) = work(m)
[1]	439	ENDDO
	440	ENDDO
	441	ENDDO
	442	ENDDO
[683]	443	!$OMP END PARALLEL
[1]	444
	445	#endif
	446
	447	END SUBROUTINE transpose_yz
	448
	449
[164]	450	SUBROUTINE transpose_zx( f_in, work, f_out )
[1]	451
	452	!------------------------------------------------------------------------------!
	453	! Description:
	454	! ------------
	455	! Transposition of input array (f_in) from z to x. For the input array, all
	456	! elements along z reside on the same PE, while after transposition, all
	457	! elements along x reside on the same PE.
	458	!------------------------------------------------------------------------------!
	459
	460	USE cpulog
	461	USE indices
	462	USE interfaces
	463	USE pegrid
	464	USE transpose_indices
	465
	466	IMPLICIT NONE
	467
	468	INTEGER :: i, j, k, l, m, xs
	469
[1003]	470	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
	471	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
[164]	472	work(nnxnnynnz)
[1]	473
	474	#if defined( __parallel )
	475
	476	!
	477	!-- Rearrange indices of input array in order to make data to be send
	478	!-- by MPI contiguous
[683]	479	!$OMP PARALLEL PRIVATE ( i, j, k )
	480	!$OMP DO
[1003]	481	DO k = 1,nz
	482	DO i = nxl, nxr
	483	DO j = nys, nyn
[164]	484	f_inv(j,i,k) = f_in(k,j,i)
[1]	485	ENDDO
	486	ENDDO
	487	ENDDO
[683]	488	!$OMP END PARALLEL
[1]	489
	490	!
	491	!-- Move data to different array, because memory location of work1 is
	492	!-- needed further below (work1 = work2).
	493	!-- If the PE grid is one-dimensional along y, only local reordering
	494	!-- of the data is necessary and no transposition has to be done.
	495	IF ( pdims(1) == 1 ) THEN
[683]	496	!$OMP PARALLEL PRIVATE ( i, j, k )
	497	!$OMP DO
[1003]	498	DO k = 1, nz
	499	DO i = nxl, nxr
	500	DO j = nys, nyn
[164]	501	f_out(i,j,k) = f_inv(j,i,k)
[1]	502	ENDDO
	503	ENDDO
	504	ENDDO
[683]	505	!$OMP END PARALLEL
[1]	506	RETURN
	507	ENDIF
	508
	509	!
	510	!-- Transpose array
	511	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	512	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	513	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
	514	work(1), sendrecvcount_zx, MPI_REAL, &
[1]	515	comm1dx, ierr )
	516	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	517
	518	!
	519	!-- Reorder transposed array
[683]	520	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
	521	!$OMP DO
[1]	522	DO l = 0, pdims(1) - 1
[1003]	523	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
[1]	524	xs = 0 + l * nnx
[1003]	525	DO k = nzb_x, nzt_x
[164]	526	DO i = xs, xs + nnx - 1
[1003]	527	DO j = nys_x, nyn_x
[1]	528	m = m + 1
[164]	529	f_out(i,j,k) = work(m)
[1]	530	ENDDO
	531	ENDDO
	532	ENDDO
	533	ENDDO
[683]	534	!$OMP END PARALLEL
[1]	535
	536	#endif
	537
	538	END SUBROUTINE transpose_zx
	539
	540
[164]	541	SUBROUTINE transpose_zy( f_in, work, f_out )
[1]	542
	543	!------------------------------------------------------------------------------!
	544	! Description:
	545	! ------------
	546	! Transposition of input array (f_in) from z to y. For the input array, all
	547	! elements along z reside on the same PE, while after transposition, all
	548	! elements along y reside on the same PE.
	549	!------------------------------------------------------------------------------!
	550
	551	USE cpulog
	552	USE indices
	553	USE interfaces
	554	USE pegrid
	555	USE transpose_indices
	556
	557	IMPLICIT NONE
	558
	559	INTEGER :: i, j, k, l, m, zs
	560
[1003]	561	REAL :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
	562	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
	563	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
[164]	564	work(nnxnnynnz)
[1]	565
	566	#if defined( __parallel )
	567
	568	!
	569	!-- If the PE grid is one-dimensional along y, the array has only to be
	570	!-- reordered locally and therefore no transposition has to be done.
	571	IF ( pdims(1) /= 1 ) THEN
	572	!
	573	!-- Reorder input array for transposition
[683]	574	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
	575	!$OMP DO
[1]	576	DO l = 0, pdims(1) - 1
[1003]	577	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	578	( nxr_z - nxl_z + 1 )
	579	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	580	DO j = nys_z, nyn_z
	581	DO k = zs, zs + nzt_y - nzb_y
	582	DO i = nxl_z, nxr_z
[1]	583	m = m + 1
[164]	584	work(m) = f_in(i,j,k)
[1]	585	ENDDO
	586	ENDDO
	587	ENDDO
	588	ENDDO
[683]	589	!$OMP END PARALLEL
[1]	590
	591	!
	592	!-- Transpose array
	593	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	594	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	595	CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, &
[1]	596	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
	597	comm1dx, ierr )
	598	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	599
	600	!
	601	!-- Reorder transposed array in a way that the y index is in first position
[683]	602	!$OMP PARALLEL PRIVATE ( i, j, k )
	603	!$OMP DO
[1003]	604	DO j = 0, ny
	605	DO k = nzb_y, nzt_y
	606	DO i = nxl_y, nxr_y
[164]	607	f_out(j,i,k) = f_inv(i,k,j)
[1]	608	ENDDO
	609	ENDDO
	610	ENDDO
[683]	611	!$OMP END PARALLEL
[1]	612	ELSE
	613	!
	614	!-- Reorder the array in a way that the y index is in first position
[683]	615	!$OMP PARALLEL PRIVATE ( i, j, k )
	616	!$OMP DO
[1003]	617	DO k = nzb_y, nzt_y
	618	DO j = 0, ny
	619	DO i = nxl_y, nxr_y
[164]	620	f_inv(i,k,j) = f_in(i,j,k)
	621	ENDDO
	622	ENDDO
	623	ENDDO
[683]	624	!$OMP END PARALLEL
[164]	625	!
	626	!-- Move data to output array
[683]	627	!$OMP PARALLEL PRIVATE ( i, j, k )
	628	!$OMP DO
[1003]	629	DO k = nzb_y, nzt_y
	630	DO i = nxl_y, nxr_y
	631	DO j = 0, ny
[164]	632	f_out(j,i,k) = f_inv(i,k,j)
[1]	633	ENDDO
	634	ENDDO
	635	ENDDO
[683]	636	!$OMP END PARALLEL
[164]	637
[1]	638	ENDIF
	639
	640	#endif
	641
	642	END SUBROUTINE transpose_zy
	643
	644
[164]	645	SUBROUTINE transpose_zyd( f_in, work, f_out )
[1]	646
	647	!------------------------------------------------------------------------------!
	648	! Description:
	649	! ------------
	650	! Transposition of input array (f_in) from z to y. For the input array, all
	651	! elements along z reside on the same PE, while after transposition, all
	652	! elements along y reside on the same PE.
	653	! This is a direct transposition for arrays with indices in regular order
	654	! (k,j,i) (cf. transpose_zy).
	655	!------------------------------------------------------------------------------!
	656
	657	USE cpulog
	658	USE indices
	659	USE interfaces
	660	USE pegrid
	661	USE transpose_indices
	662
	663	IMPLICIT NONE
	664
	665	INTEGER :: i, j, k, l, m, ys
	666
[1003]	667	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
	668	f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd), &
[164]	669	work(nnxnnynnz)
[1]	670
	671	#if defined( __parallel )
	672
	673	!
	674	!-- Rearrange indices of input array in order to make data to be send
	675	!-- by MPI contiguous
[1003]	676	DO i = nxl, nxr
	677	DO j = nys, nyn
	678	DO k = 1, nz
[164]	679	f_inv(j,i,k) = f_in(k,j,i)
[1]	680	ENDDO
	681	ENDDO
	682	ENDDO
	683
	684	!
	685	!-- Move data to different array, because memory location of work1 is
	686	!-- needed further below (work1 = work2).
	687	!-- If the PE grid is one-dimensional along x, only local reordering
	688	!-- of the data is necessary and no transposition has to be done.
	689	IF ( pdims(2) == 1 ) THEN
[1003]	690	DO k = 1, nz
	691	DO i = nxl, nxr
	692	DO j = nys, nyn
[164]	693	f_out(j,i,k) = f_inv(j,i,k)
[1]	694	ENDDO
	695	ENDDO
	696	ENDDO
	697	RETURN
	698	ENDIF
	699
	700	!
	701	!-- Transpose array
	702	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	703	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	704	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
[164]	705	work(1), sendrecvcount_zyd, MPI_REAL, &
[1]	706	comm1dy, ierr )
	707	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	708
	709	!
	710	!-- Reorder transposed array
	711	m = 0
	712	DO l = 0, pdims(2) - 1
	713	ys = 0 + l * nny
[1003]	714	DO k = nzb_yd, nzt_yd
	715	DO i = nxl_yd, nxr_yd
[1]	716	DO j = ys, ys + nny - 1
	717	m = m + 1
[164]	718	f_out(j,i,k) = work(m)
[1]	719	ENDDO
	720	ENDDO
	721	ENDDO
	722	ENDDO
	723
	724	#endif
	725
	726	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |