Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 759

Last change on this file since 759 was 684, checked in by raasch, 14 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 20.2 KB

Rev	Line
[164]	1	SUBROUTINE transpose_xy( f_in, work, f_out )
[1]	2
	3	!------------------------------------------------------------------------------!
[484]	4	! Current revisions:
[1]	5	! -----------------
[684]	6	!
[198]	7	!
	8	! Former revisions:
	9	! -----------------
	10	! $Id: transpose.f90 684 2011-02-09 14:49:31Z raasch $
	11	!
[684]	12	! 683 2011-02-09 14:25:15Z raasch
	13	! openMP parallelization of transpositions for 2d-domain-decomposition
	14	!
[623]	15	! 622 2010-12-10 08:08:13Z raasch
	16	! optional barriers included in order to speed up collective operations
	17	!
[198]	18	! 164 2008-05-15 08:46:15Z raasch
[164]	19	! f_inv changed from subroutine argument to automatic array in order to do
	20	! re-ordering from f_in to f_inv in one step, one array work is needed instead
	21	! of work1 and work2
[1]	22	!
[198]	23	! February 2007
[3]	24	! RCS Log replace by Id keyword, revision history cleaned up
	25	!
[1]	26	! Revision 1.2 2004/04/30 13:12:17 raasch
	27	! Switched from mpi_alltoallv to the simpler mpi_alltoall,
	28	! all former transpose-routine files collected in this file, enlarged
	29	! transposition arrays introduced
	30	!
	31	! Revision 1.1 2004/04/30 13:08:16 raasch
	32	! Initial revision (collection of former routines transpose_xy, transpose_xz,
	33	! transpose_yx, transpose_yz, transpose_zx, transpose_zy)
	34	!
	35	! Revision 1.1 1997/07/24 11:25:18 raasch
	36	! Initial revision
	37	!
	38	!
	39	! Description:
	40	! ------------
	41	! Transposition of input array (f_in) from x to y. For the input array, all
	42	! elements along x reside on the same PE, while after transposition, all
	43	! elements along y reside on the same PE.
	44	!------------------------------------------------------------------------------!
	45
	46	USE cpulog
	47	USE indices
	48	USE interfaces
	49	USE pegrid
	50	USE transpose_indices
	51
	52	IMPLICIT NONE
	53
	54	INTEGER :: i, j, k, l, m, ys
	55
	56	REAL :: f_in(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
	57	f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), &
	58	f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
[164]	59	work(nnxnnynnz)
[1]	60
	61	#if defined( __parallel )
	62
	63	!
	64	!-- Rearrange indices of input array in order to make data to be send
	65	!-- by MPI contiguous
[683]	66	!$OMP PARALLEL PRIVATE ( i, j, k )
	67	!$OMP DO
[1]	68	DO i = 0, nxa
	69	DO k = nzb_x, nzt_xa
	70	DO j = nys_x, nyn_xa
[164]	71	f_inv(j,k,i) = f_in(i,j,k)
[1]	72	ENDDO
	73	ENDDO
	74	ENDDO
[683]	75	!$OMP END PARALLEL
[1]	76
	77	!
	78	!-- Transpose array
	79	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	80	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	81	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
[164]	82	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	83	comm1dy, ierr )
	84	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	85
	86	!
	87	!-- Reorder transposed array
[683]	88	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
	89	!$OMP DO
[1]	90	DO l = 0, pdims(2) - 1
[683]	91	m = l * ( nxr_ya - nxl_y + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
	92	( nyn_xa - nys_x + 1 )
[1]	93	ys = 0 + l * ( nyn_xa - nys_x + 1 )
	94	DO i = nxl_y, nxr_ya
	95	DO k = nzb_y, nzt_ya
	96	DO j = ys, ys + nyn_xa - nys_x
	97	m = m + 1
[164]	98	f_out(j,i,k) = work(m)
[1]	99	ENDDO
	100	ENDDO
	101	ENDDO
	102	ENDDO
[683]	103	!$OMP END PARALLEL
[1]	104
	105	#endif
	106
	107	END SUBROUTINE transpose_xy
	108
	109
[164]	110	SUBROUTINE transpose_xz( f_in, work, f_out )
[1]	111
	112	!------------------------------------------------------------------------------!
	113	! Description:
	114	! ------------
	115	! Transposition of input array (f_in) from x to z. For the input array, all
	116	! elements along x reside on the same PE, while after transposition, all
	117	! elements along z reside on the same PE.
	118	!------------------------------------------------------------------------------!
	119
	120	USE cpulog
	121	USE indices
	122	USE interfaces
	123	USE pegrid
	124	USE transpose_indices
	125
	126	IMPLICIT NONE
	127
	128	INTEGER :: i, j, k, l, m, xs
	129
	130	REAL :: f_in(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
[164]	131	f_inv(nys:nyna,nxl:nxra,1:nza), &
[1]	132	f_out(1:nza,nys:nyna,nxl:nxra), &
[164]	133	work(nnxnnynnz)
[1]	134
	135	#if defined( __parallel )
	136
	137	!
	138	!-- If the PE grid is one-dimensional along y, the array has only to be
	139	!-- reordered locally and therefore no transposition has to be done.
	140	IF ( pdims(1) /= 1 ) THEN
	141	!
	142	!-- Reorder input array for transposition
[683]	143	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
	144	!$OMP DO
[1]	145	DO l = 0, pdims(1) - 1
[683]	146	m = l * ( nzt_xa - nzb_x + 1 ) * nnx * ( nyn_xa - nys_x + 1 )
[1]	147	xs = 0 + l * nnx
	148	DO k = nzb_x, nzt_xa
[164]	149	DO i = xs, xs + nnx - 1
	150	DO j = nys_x, nyn_xa
[1]	151	m = m + 1
[164]	152	work(m) = f_in(i,j,k)
[1]	153	ENDDO
	154	ENDDO
	155	ENDDO
	156	ENDDO
[683]	157	!$OMP END PARALLEL
[1]	158
	159	!
	160	!-- Transpose array
	161	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	162	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	163	CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, &
	164	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
[1]	165	comm1dx, ierr )
	166	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	167
	168	!
	169	!-- Reorder transposed array in a way that the z index is in first position
[683]	170	!$OMP PARALLEL PRIVATE ( i, j, k )
	171	!$OMP DO
[164]	172	DO k = 1, nza
	173	DO i = nxl, nxra
	174	DO j = nys, nyna
	175	f_out(k,j,i) = f_inv(j,i,k)
[1]	176	ENDDO
	177	ENDDO
	178	ENDDO
[683]	179	!$OMP END PARALLEL
[1]	180	ELSE
	181	!
	182	!-- Reorder the array in a way that the z index is in first position
[683]	183	!$OMP PARALLEL PRIVATE ( i, j, k )
	184	!$OMP DO
[1]	185	DO i = nxl, nxra
	186	DO j = nys, nyna
	187	DO k = 1, nza
[164]	188	f_inv(j,i,k) = f_in(i,j,k)
[1]	189	ENDDO
	190	ENDDO
	191	ENDDO
[683]	192	!$OMP END PARALLEL
[1]	193
[683]	194	!$OMP PARALLEL PRIVATE ( i, j, k )
	195	!$OMP DO
[164]	196	DO k = 1, nza
	197	DO i = nxl, nxra
	198	DO j = nys, nyna
	199	f_out(k,j,i) = f_inv(j,i,k)
	200	ENDDO
[1]	201	ENDDO
	202	ENDDO
[683]	203	!$OMP END PARALLEL
[1]	204
[164]	205	ENDIF
	206
	207
[1]	208	#endif
	209
	210	END SUBROUTINE transpose_xz
	211
	212
[164]	213	SUBROUTINE transpose_yx( f_in, work, f_out )
[1]	214
	215	!------------------------------------------------------------------------------!
	216	! Description:
	217	! ------------
	218	! Transposition of input array (f_in) from y to x. For the input array, all
	219	! elements along y reside on the same PE, while after transposition, all
	220	! elements along x reside on the same PE.
	221	!------------------------------------------------------------------------------!
	222
	223	USE cpulog
	224	USE indices
	225	USE interfaces
	226	USE pegrid
	227	USE transpose_indices
	228
	229	IMPLICIT NONE
	230
	231	INTEGER :: i, j, k, l, m, ys
	232
	233	REAL :: f_in(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
	234	f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), &
	235	f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
[164]	236	work(nnxnnynnz)
[1]	237
	238	#if defined( __parallel )
	239
	240	!
	241	!-- Reorder input array for transposition
[683]	242	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
	243	!$OMP DO
[1]	244	DO l = 0, pdims(2) - 1
[683]	245	m = l * ( nxr_ya - nxl_y + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
	246	( nyn_xa - nys_x + 1 )
[1]	247	ys = 0 + l * ( nyn_xa - nys_x + 1 )
	248	DO i = nxl_y, nxr_ya
	249	DO k = nzb_y, nzt_ya
	250	DO j = ys, ys + nyn_xa - nys_x
	251	m = m + 1
[164]	252	work(m) = f_in(j,i,k)
[1]	253	ENDDO
	254	ENDDO
	255	ENDDO
	256	ENDDO
[683]	257	!$OMP END PARALLEL
[1]	258
	259	!
	260	!-- Transpose array
	261	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	262	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	263	CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, &
[1]	264	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
	265	comm1dy, ierr )
	266	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	267
	268	!
	269	!-- Reorder transposed array in a way that the x index is in first position
[683]	270	!$OMP PARALLEL PRIVATE ( i, j, k )
	271	!$OMP DO
[1]	272	DO i = 0, nxa
	273	DO k = nzb_x, nzt_xa
	274	DO j = nys_x, nyn_xa
[164]	275	f_out(i,j,k) = f_inv(j,k,i)
[1]	276	ENDDO
	277	ENDDO
	278	ENDDO
[683]	279	!$OMP END PARALLEL
[1]	280
	281	#endif
	282
	283	END SUBROUTINE transpose_yx
	284
	285
[164]	286	SUBROUTINE transpose_yxd( f_in, work, f_out )
[1]	287
	288	!------------------------------------------------------------------------------!
	289	! Description:
	290	! ------------
	291	! Transposition of input array (f_in) from y to x. For the input array, all
	292	! elements along y reside on the same PE, while after transposition, all
	293	! elements along x reside on the same PE.
	294	! This is a direct transposition for arrays with indices in regular order
	295	! (k,j,i) (cf. transpose_yx).
	296	!------------------------------------------------------------------------------!
	297
	298	USE cpulog
	299	USE indices
	300	USE interfaces
	301	USE pegrid
	302	USE transpose_indices
	303
	304	IMPLICIT NONE
	305
	306	INTEGER :: i, j, k, l, m, recvcount_yx, sendcount_yx, xs
	307
	308	REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nxl:nxra,1:nza,nys:nyna), &
	309	f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
[164]	310	work(nnxnnynnz)
[1]	311
	312	#if defined( __parallel )
	313
	314	!
	315	!-- Rearrange indices of input array in order to make data to be send
	316	!-- by MPI contiguous
	317	DO k = 1, nza
	318	DO j = nys, nyna
	319	DO i = nxl, nxra
[164]	320	f_inv(i,k,j) = f_in(k,j,i)
[1]	321	ENDDO
	322	ENDDO
	323	ENDDO
	324
	325	!
	326	!-- Transpose array
	327	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	328	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	329	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
[164]	330	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	331	comm1dx, ierr )
	332	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	333
	334	!
	335	!-- Reorder transposed array
	336	m = 0
	337	DO l = 0, pdims(1) - 1
	338	xs = 0 + l * nnx
	339	DO j = nys_x, nyn_xa
	340	DO k = 1, nza
	341	DO i = xs, xs + nnx - 1
	342	m = m + 1
[164]	343	f_out(i,j,k) = work(m)
[1]	344	ENDDO
	345	ENDDO
	346	ENDDO
	347	ENDDO
	348
	349	#endif
	350
	351	END SUBROUTINE transpose_yxd
	352
	353
[164]	354	SUBROUTINE transpose_yz( f_in, work, f_out )
[1]	355
	356	!------------------------------------------------------------------------------!
	357	! Description:
	358	! ------------
	359	! Transposition of input array (f_in) from y to z. For the input array, all
	360	! elements along y reside on the same PE, while after transposition, all
	361	! elements along z reside on the same PE.
	362	!------------------------------------------------------------------------------!
	363
	364	USE cpulog
	365	USE indices
	366	USE interfaces
	367	USE pegrid
	368	USE transpose_indices
	369
	370	IMPLICIT NONE
	371
	372	INTEGER :: i, j, k, l, m, zs
	373
	374	REAL :: f_in(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
	375	f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), &
	376	f_out(nxl_z:nxr_za,nys_z:nyn_za,1:nza), &
[164]	377	work(nnxnnynnz)
[1]	378
	379	#if defined( __parallel )
	380
	381	!
	382	!-- Rearrange indices of input array in order to make data to be send
	383	!-- by MPI contiguous
[683]	384	!$OMP PARALLEL PRIVATE ( i, j, k )
	385	!$OMP DO
[164]	386	DO j = 0, nya
	387	DO k = nzb_y, nzt_ya
	388	DO i = nxl_y, nxr_ya
	389	f_inv(i,k,j) = f_in(j,i,k)
[1]	390	ENDDO
	391	ENDDO
	392	ENDDO
[683]	393	!$OMP END PARALLEL
[1]	394
	395	!
	396	!-- Move data to different array, because memory location of work1 is
	397	!-- needed further below (work1 = work2).
	398	!-- If the PE grid is one-dimensional along y, only local reordering
	399	!-- of the data is necessary and no transposition has to be done.
	400	IF ( pdims(1) == 1 ) THEN
[683]	401	!$OMP PARALLEL PRIVATE ( i, j, k )
	402	!$OMP DO
[1]	403	DO j = 0, nya
	404	DO k = nzb_y, nzt_ya
	405	DO i = nxl_y, nxr_ya
[164]	406	f_out(i,j,k) = f_inv(i,k,j)
[1]	407	ENDDO
	408	ENDDO
	409	ENDDO
[683]	410	!$OMP END PARALLEL
[1]	411	RETURN
	412	ENDIF
	413
	414	!
	415	!-- Transpose array
	416	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	417	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	418	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
[164]	419	work(1), sendrecvcount_yz, MPI_REAL, &
[1]	420	comm1dx, ierr )
	421	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	422
	423	!
	424	!-- Reorder transposed array
[683]	425	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
	426	!$OMP DO
[1]	427	DO l = 0, pdims(1) - 1
[683]	428	m = l * ( nyn_za - nys_z + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
	429	( nxr_za - nxl_z + 1 )
[1]	430	zs = 1 + l * ( nzt_ya - nzb_y + 1 )
	431	DO j = nys_z, nyn_za
	432	DO k = zs, zs + nzt_ya - nzb_y
	433	DO i = nxl_z, nxr_za
	434	m = m + 1
[164]	435	f_out(i,j,k) = work(m)
[1]	436	ENDDO
	437	ENDDO
	438	ENDDO
	439	ENDDO
[683]	440	!$OMP END PARALLEL
[1]	441
	442	#endif
	443
	444	END SUBROUTINE transpose_yz
	445
	446
[164]	447	SUBROUTINE transpose_zx( f_in, work, f_out )
[1]	448
	449	!------------------------------------------------------------------------------!
	450	! Description:
	451	! ------------
	452	! Transposition of input array (f_in) from z to x. For the input array, all
	453	! elements along z reside on the same PE, while after transposition, all
	454	! elements along x reside on the same PE.
	455	!------------------------------------------------------------------------------!
	456
	457	USE cpulog
	458	USE indices
	459	USE interfaces
	460	USE pegrid
	461	USE transpose_indices
	462
	463	IMPLICIT NONE
	464
	465	INTEGER :: i, j, k, l, m, xs
	466
[164]	467	REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), &
[1]	468	f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
[164]	469	work(nnxnnynnz)
[1]	470
	471	#if defined( __parallel )
	472
	473	!
	474	!-- Rearrange indices of input array in order to make data to be send
	475	!-- by MPI contiguous
[683]	476	!$OMP PARALLEL PRIVATE ( i, j, k )
	477	!$OMP DO
[164]	478	DO k = 1,nza
	479	DO i = nxl, nxra
	480	DO j = nys, nyna
	481	f_inv(j,i,k) = f_in(k,j,i)
[1]	482	ENDDO
	483	ENDDO
	484	ENDDO
[683]	485	!$OMP END PARALLEL
[1]	486
	487	!
	488	!-- Move data to different array, because memory location of work1 is
	489	!-- needed further below (work1 = work2).
	490	!-- If the PE grid is one-dimensional along y, only local reordering
	491	!-- of the data is necessary and no transposition has to be done.
	492	IF ( pdims(1) == 1 ) THEN
[683]	493	!$OMP PARALLEL PRIVATE ( i, j, k )
	494	!$OMP DO
[1]	495	DO k = 1, nza
[164]	496	DO i = nxl, nxra
	497	DO j = nys, nyna
	498	f_out(i,j,k) = f_inv(j,i,k)
[1]	499	ENDDO
	500	ENDDO
	501	ENDDO
[683]	502	!$OMP END PARALLEL
[1]	503	RETURN
	504	ENDIF
	505
	506	!
	507	!-- Transpose array
	508	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	509	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	510	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
	511	work(1), sendrecvcount_zx, MPI_REAL, &
[1]	512	comm1dx, ierr )
	513	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	514
	515	!
	516	!-- Reorder transposed array
[683]	517	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
	518	!$OMP DO
[1]	519	DO l = 0, pdims(1) - 1
[683]	520	m = l * ( nzt_xa - nzb_x + 1 ) * nnx * ( nyn_xa - nys_x + 1 )
[1]	521	xs = 0 + l * nnx
	522	DO k = nzb_x, nzt_xa
[164]	523	DO i = xs, xs + nnx - 1
	524	DO j = nys_x, nyn_xa
[1]	525	m = m + 1
[164]	526	f_out(i,j,k) = work(m)
[1]	527	ENDDO
	528	ENDDO
	529	ENDDO
	530	ENDDO
[683]	531	!$OMP END PARALLEL
[1]	532
	533	#endif
	534
	535	END SUBROUTINE transpose_zx
	536
	537
[164]	538	SUBROUTINE transpose_zy( f_in, work, f_out )
[1]	539
	540	!------------------------------------------------------------------------------!
	541	! Description:
	542	! ------------
	543	! Transposition of input array (f_in) from z to y. For the input array, all
	544	! elements along z reside on the same PE, while after transposition, all
	545	! elements along y reside on the same PE.
	546	!------------------------------------------------------------------------------!
	547
	548	USE cpulog
	549	USE indices
	550	USE interfaces
	551	USE pegrid
	552	USE transpose_indices
	553
	554	IMPLICIT NONE
	555
	556	INTEGER :: i, j, k, l, m, zs
	557
	558	REAL :: f_in(nxl_z:nxr_za,nys_z:nyn_za,1:nza), &
	559	f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), &
	560	f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
[164]	561	work(nnxnnynnz)
[1]	562
	563	#if defined( __parallel )
	564
	565	!
	566	!-- If the PE grid is one-dimensional along y, the array has only to be
	567	!-- reordered locally and therefore no transposition has to be done.
	568	IF ( pdims(1) /= 1 ) THEN
	569	!
	570	!-- Reorder input array for transposition
[683]	571	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
	572	!$OMP DO
[1]	573	DO l = 0, pdims(1) - 1
[683]	574	m = l * ( nyn_za - nys_z + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
	575	( nxr_za - nxl_z + 1 )
[1]	576	zs = 1 + l * ( nzt_ya - nzb_y + 1 )
	577	DO j = nys_z, nyn_za
	578	DO k = zs, zs + nzt_ya - nzb_y
	579	DO i = nxl_z, nxr_za
	580	m = m + 1
[164]	581	work(m) = f_in(i,j,k)
[1]	582	ENDDO
	583	ENDDO
	584	ENDDO
	585	ENDDO
[683]	586	!$OMP END PARALLEL
[1]	587
	588	!
	589	!-- Transpose array
	590	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	591	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	592	CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, &
[1]	593	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
	594	comm1dx, ierr )
	595	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	596
	597	!
	598	!-- Reorder transposed array in a way that the y index is in first position
[683]	599	!$OMP PARALLEL PRIVATE ( i, j, k )
	600	!$OMP DO
[164]	601	DO j = 0, nya
	602	DO k = nzb_y, nzt_ya
	603	DO i = nxl_y, nxr_ya
	604	f_out(j,i,k) = f_inv(i,k,j)
[1]	605	ENDDO
	606	ENDDO
	607	ENDDO
[683]	608	!$OMP END PARALLEL
[1]	609	ELSE
	610	!
	611	!-- Reorder the array in a way that the y index is in first position
[683]	612	!$OMP PARALLEL PRIVATE ( i, j, k )
	613	!$OMP DO
[1]	614	DO k = nzb_y, nzt_ya
[164]	615	DO j = 0, nya
	616	DO i = nxl_y, nxr_ya
	617	f_inv(i,k,j) = f_in(i,j,k)
	618	ENDDO
	619	ENDDO
	620	ENDDO
[683]	621	!$OMP END PARALLEL
[164]	622	!
	623	!-- Move data to output array
[683]	624	!$OMP PARALLEL PRIVATE ( i, j, k )
	625	!$OMP DO
[164]	626	DO k = nzb_y, nzt_ya
[1]	627	DO i = nxl_y, nxr_ya
	628	DO j = 0, nya
[164]	629	f_out(j,i,k) = f_inv(i,k,j)
[1]	630	ENDDO
	631	ENDDO
	632	ENDDO
[683]	633	!$OMP END PARALLEL
[164]	634
[1]	635	ENDIF
	636
	637	#endif
	638
	639	END SUBROUTINE transpose_zy
	640
	641
[164]	642	SUBROUTINE transpose_zyd( f_in, work, f_out )
[1]	643
	644	!------------------------------------------------------------------------------!
	645	! Description:
	646	! ------------
	647	! Transposition of input array (f_in) from z to y. For the input array, all
	648	! elements along z reside on the same PE, while after transposition, all
	649	! elements along y reside on the same PE.
	650	! This is a direct transposition for arrays with indices in regular order
	651	! (k,j,i) (cf. transpose_zy).
	652	!------------------------------------------------------------------------------!
	653
	654	USE cpulog
	655	USE indices
	656	USE interfaces
	657	USE pegrid
	658	USE transpose_indices
	659
	660	IMPLICIT NONE
	661
	662	INTEGER :: i, j, k, l, m, ys
	663
	664	REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), &
	665	f_out(0:nya,nxl_yd:nxr_yda,nzb_yd:nzt_yda), &
[164]	666	work(nnxnnynnz)
[1]	667
	668	#if defined( __parallel )
	669
	670	!
	671	!-- Rearrange indices of input array in order to make data to be send
	672	!-- by MPI contiguous
	673	DO i = nxl, nxra
	674	DO j = nys, nyna
	675	DO k = 1, nza
[164]	676	f_inv(j,i,k) = f_in(k,j,i)
[1]	677	ENDDO
	678	ENDDO
	679	ENDDO
	680
	681	!
	682	!-- Move data to different array, because memory location of work1 is
	683	!-- needed further below (work1 = work2).
	684	!-- If the PE grid is one-dimensional along x, only local reordering
	685	!-- of the data is necessary and no transposition has to be done.
	686	IF ( pdims(2) == 1 ) THEN
	687	DO k = 1, nza
	688	DO i = nxl, nxra
	689	DO j = nys, nyna
[164]	690	f_out(j,i,k) = f_inv(j,i,k)
[1]	691	ENDDO
	692	ENDDO
	693	ENDDO
	694	RETURN
	695	ENDIF
	696
	697	!
	698	!-- Transpose array
	699	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	700	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	701	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
[164]	702	work(1), sendrecvcount_zyd, MPI_REAL, &
[1]	703	comm1dy, ierr )
	704	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	705
	706	!
	707	!-- Reorder transposed array
	708	m = 0
	709	DO l = 0, pdims(2) - 1
	710	ys = 0 + l * nny
	711	DO k = nzb_yd, nzt_yda
	712	DO i = nxl_yd, nxr_yda
	713	DO j = ys, ys + nny - 1
	714	m = m + 1
[164]	715	f_out(j,i,k) = work(m)
[1]	716	ENDDO
	717	ENDDO
	718	ENDDO
	719	ENDDO
	720
	721	#endif
	722
	723	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |