Home

Context Navigation

transpose.f90 @ 683

Last change on this file since 683 was 683, checked in by raasch, 14 years ago

New:
---

optional exchange of ghost points in synchronous mode via MPI_SENDRCV,
steered by d3par parameter synchronous_exchange
(cpu_statistics, exchange_horiz, modules, parin)

openMP-parallelization of pressure solver (fft-method) for 2d-domain-decomposition
(poisfft, transpose)

Changed:

Errors:

mpt bugfix for netCDF4 usage (mrun)

Property svn:keywords set to Id

File size: 20.2 KB

Rev	Line
[164]	1	SUBROUTINE transpose_xy( f_in, work, f_out )
[1]	2
	3	!------------------------------------------------------------------------------!
[484]	4	! Current revisions:
[1]	5	! -----------------
[683]	6	! openMP parallelization of transpositions for 2d-domain-decomposition
[198]	7	!
	8	! Former revisions:
	9	! -----------------
	10	! $Id: transpose.f90 683 2011-02-09 14:25:15Z raasch $
	11	!
[623]	12	! 622 2010-12-10 08:08:13Z raasch
	13	! optional barriers included in order to speed up collective operations
	14	!
[198]	15	! 164 2008-05-15 08:46:15Z raasch
[164]	16	! f_inv changed from subroutine argument to automatic array in order to do
	17	! re-ordering from f_in to f_inv in one step, one array work is needed instead
	18	! of work1 and work2
[1]	19	!
[198]	20	! February 2007
[3]	21	! RCS Log replace by Id keyword, revision history cleaned up
	22	!
[1]	23	! Revision 1.2 2004/04/30 13:12:17 raasch
	24	! Switched from mpi_alltoallv to the simpler mpi_alltoall,
	25	! all former transpose-routine files collected in this file, enlarged
	26	! transposition arrays introduced
	27	!
	28	! Revision 1.1 2004/04/30 13:08:16 raasch
	29	! Initial revision (collection of former routines transpose_xy, transpose_xz,
	30	! transpose_yx, transpose_yz, transpose_zx, transpose_zy)
	31	!
	32	! Revision 1.1 1997/07/24 11:25:18 raasch
	33	! Initial revision
	34	!
	35	!
	36	! Description:
	37	! ------------
	38	! Transposition of input array (f_in) from x to y. For the input array, all
	39	! elements along x reside on the same PE, while after transposition, all
	40	! elements along y reside on the same PE.
	41	!------------------------------------------------------------------------------!
	42
	43	USE cpulog
	44	USE indices
	45	USE interfaces
	46	USE pegrid
	47	USE transpose_indices
	48
	49	IMPLICIT NONE
	50
	51	INTEGER :: i, j, k, l, m, ys
	52
	53	REAL :: f_in(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
	54	f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), &
	55	f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
[164]	56	work(nnxnnynnz)
[1]	57
	58	#if defined( __parallel )
	59
	60	!
	61	!-- Rearrange indices of input array in order to make data to be send
	62	!-- by MPI contiguous
[683]	63	!$OMP PARALLEL PRIVATE ( i, j, k )
	64	!$OMP DO
[1]	65	DO i = 0, nxa
	66	DO k = nzb_x, nzt_xa
	67	DO j = nys_x, nyn_xa
[164]	68	f_inv(j,k,i) = f_in(i,j,k)
[1]	69	ENDDO
	70	ENDDO
	71	ENDDO
[683]	72	!$OMP END PARALLEL
[1]	73
	74	!
	75	!-- Transpose array
	76	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	77	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	78	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
[164]	79	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	80	comm1dy, ierr )
	81	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	82
	83	!
	84	!-- Reorder transposed array
[683]	85	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
	86	!$OMP DO
[1]	87	DO l = 0, pdims(2) - 1
[683]	88	m = l * ( nxr_ya - nxl_y + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
	89	( nyn_xa - nys_x + 1 )
[1]	90	ys = 0 + l * ( nyn_xa - nys_x + 1 )
	91	DO i = nxl_y, nxr_ya
	92	DO k = nzb_y, nzt_ya
	93	DO j = ys, ys + nyn_xa - nys_x
	94	m = m + 1
[164]	95	f_out(j,i,k) = work(m)
[1]	96	ENDDO
	97	ENDDO
	98	ENDDO
	99	ENDDO
[683]	100	!$OMP END PARALLEL
[1]	101
	102	#endif
	103
	104	END SUBROUTINE transpose_xy
	105
	106
[164]	107	SUBROUTINE transpose_xz( f_in, work, f_out )
[1]	108
	109	!------------------------------------------------------------------------------!
	110	! Description:
	111	! ------------
	112	! Transposition of input array (f_in) from x to z. For the input array, all
	113	! elements along x reside on the same PE, while after transposition, all
	114	! elements along z reside on the same PE.
	115	!------------------------------------------------------------------------------!
	116
	117	USE cpulog
	118	USE indices
	119	USE interfaces
	120	USE pegrid
	121	USE transpose_indices
	122
	123	IMPLICIT NONE
	124
	125	INTEGER :: i, j, k, l, m, xs
	126
	127	REAL :: f_in(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
[164]	128	f_inv(nys:nyna,nxl:nxra,1:nza), &
[1]	129	f_out(1:nza,nys:nyna,nxl:nxra), &
[164]	130	work(nnxnnynnz)
[1]	131
	132	#if defined( __parallel )
	133
	134	!
	135	!-- If the PE grid is one-dimensional along y, the array has only to be
	136	!-- reordered locally and therefore no transposition has to be done.
	137	IF ( pdims(1) /= 1 ) THEN
	138	!
	139	!-- Reorder input array for transposition
[683]	140	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
	141	!$OMP DO
[1]	142	DO l = 0, pdims(1) - 1
[683]	143	m = l * ( nzt_xa - nzb_x + 1 ) * nnx * ( nyn_xa - nys_x + 1 )
[1]	144	xs = 0 + l * nnx
	145	DO k = nzb_x, nzt_xa
[164]	146	DO i = xs, xs + nnx - 1
	147	DO j = nys_x, nyn_xa
[1]	148	m = m + 1
[164]	149	work(m) = f_in(i,j,k)
[1]	150	ENDDO
	151	ENDDO
	152	ENDDO
	153	ENDDO
[683]	154	!$OMP END PARALLEL
[1]	155
	156	!
	157	!-- Transpose array
	158	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	159	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	160	CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, &
	161	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
[1]	162	comm1dx, ierr )
	163	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	164
	165	!
	166	!-- Reorder transposed array in a way that the z index is in first position
[683]	167	!$OMP PARALLEL PRIVATE ( i, j, k )
	168	!$OMP DO
[164]	169	DO k = 1, nza
	170	DO i = nxl, nxra
	171	DO j = nys, nyna
	172	f_out(k,j,i) = f_inv(j,i,k)
[1]	173	ENDDO
	174	ENDDO
	175	ENDDO
[683]	176	!$OMP END PARALLEL
[1]	177	ELSE
	178	!
	179	!-- Reorder the array in a way that the z index is in first position
[683]	180	!$OMP PARALLEL PRIVATE ( i, j, k )
	181	!$OMP DO
[1]	182	DO i = nxl, nxra
	183	DO j = nys, nyna
	184	DO k = 1, nza
[164]	185	f_inv(j,i,k) = f_in(i,j,k)
[1]	186	ENDDO
	187	ENDDO
	188	ENDDO
[683]	189	!$OMP END PARALLEL
[1]	190
[683]	191	!$OMP PARALLEL PRIVATE ( i, j, k )
	192	!$OMP DO
[164]	193	DO k = 1, nza
	194	DO i = nxl, nxra
	195	DO j = nys, nyna
	196	f_out(k,j,i) = f_inv(j,i,k)
	197	ENDDO
[1]	198	ENDDO
	199	ENDDO
[683]	200	!$OMP END PARALLEL
[1]	201
[164]	202	ENDIF
	203
	204
[1]	205	#endif
	206
	207	END SUBROUTINE transpose_xz
	208
	209
[164]	210	SUBROUTINE transpose_yx( f_in, work, f_out )
[1]	211
	212	!------------------------------------------------------------------------------!
	213	! Description:
	214	! ------------
	215	! Transposition of input array (f_in) from y to x. For the input array, all
	216	! elements along y reside on the same PE, while after transposition, all
	217	! elements along x reside on the same PE.
	218	!------------------------------------------------------------------------------!
	219
	220	USE cpulog
	221	USE indices
	222	USE interfaces
	223	USE pegrid
	224	USE transpose_indices
	225
	226	IMPLICIT NONE
	227
	228	INTEGER :: i, j, k, l, m, ys
	229
	230	REAL :: f_in(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
	231	f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), &
	232	f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
[164]	233	work(nnxnnynnz)
[1]	234
	235	#if defined( __parallel )
	236
	237	!
	238	!-- Reorder input array for transposition
[683]	239	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
	240	!$OMP DO
[1]	241	DO l = 0, pdims(2) - 1
[683]	242	m = l * ( nxr_ya - nxl_y + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
	243	( nyn_xa - nys_x + 1 )
[1]	244	ys = 0 + l * ( nyn_xa - nys_x + 1 )
	245	DO i = nxl_y, nxr_ya
	246	DO k = nzb_y, nzt_ya
	247	DO j = ys, ys + nyn_xa - nys_x
	248	m = m + 1
[164]	249	work(m) = f_in(j,i,k)
[1]	250	ENDDO
	251	ENDDO
	252	ENDDO
	253	ENDDO
[683]	254	!$OMP END PARALLEL
[1]	255
	256	!
	257	!-- Transpose array
	258	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	259	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	260	CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, &
[1]	261	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
	262	comm1dy, ierr )
	263	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	264
	265	!
	266	!-- Reorder transposed array in a way that the x index is in first position
[683]	267	!$OMP PARALLEL PRIVATE ( i, j, k )
	268	!$OMP DO
[1]	269	DO i = 0, nxa
	270	DO k = nzb_x, nzt_xa
	271	DO j = nys_x, nyn_xa
[164]	272	f_out(i,j,k) = f_inv(j,k,i)
[1]	273	ENDDO
	274	ENDDO
	275	ENDDO
[683]	276	!$OMP END PARALLEL
[1]	277
	278	#endif
	279
	280	END SUBROUTINE transpose_yx
	281
	282
[164]	283	SUBROUTINE transpose_yxd( f_in, work, f_out )
[1]	284
	285	!------------------------------------------------------------------------------!
	286	! Description:
	287	! ------------
	288	! Transposition of input array (f_in) from y to x. For the input array, all
	289	! elements along y reside on the same PE, while after transposition, all
	290	! elements along x reside on the same PE.
	291	! This is a direct transposition for arrays with indices in regular order
	292	! (k,j,i) (cf. transpose_yx).
	293	!------------------------------------------------------------------------------!
	294
	295	USE cpulog
	296	USE indices
	297	USE interfaces
	298	USE pegrid
	299	USE transpose_indices
	300
	301	IMPLICIT NONE
	302
	303	INTEGER :: i, j, k, l, m, recvcount_yx, sendcount_yx, xs
	304
	305	REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nxl:nxra,1:nza,nys:nyna), &
	306	f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
[164]	307	work(nnxnnynnz)
[1]	308
	309	#if defined( __parallel )
	310
	311	!
	312	!-- Rearrange indices of input array in order to make data to be send
	313	!-- by MPI contiguous
	314	DO k = 1, nza
	315	DO j = nys, nyna
	316	DO i = nxl, nxra
[164]	317	f_inv(i,k,j) = f_in(k,j,i)
[1]	318	ENDDO
	319	ENDDO
	320	ENDDO
	321
	322	!
	323	!-- Transpose array
	324	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	325	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	326	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
[164]	327	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	328	comm1dx, ierr )
	329	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	330
	331	!
	332	!-- Reorder transposed array
	333	m = 0
	334	DO l = 0, pdims(1) - 1
	335	xs = 0 + l * nnx
	336	DO j = nys_x, nyn_xa
	337	DO k = 1, nza
	338	DO i = xs, xs + nnx - 1
	339	m = m + 1
[164]	340	f_out(i,j,k) = work(m)
[1]	341	ENDDO
	342	ENDDO
	343	ENDDO
	344	ENDDO
	345
	346	#endif
	347
	348	END SUBROUTINE transpose_yxd
	349
	350
[164]	351	SUBROUTINE transpose_yz( f_in, work, f_out )
[1]	352
	353	!------------------------------------------------------------------------------!
	354	! Description:
	355	! ------------
	356	! Transposition of input array (f_in) from y to z. For the input array, all
	357	! elements along y reside on the same PE, while after transposition, all
	358	! elements along z reside on the same PE.
	359	!------------------------------------------------------------------------------!
	360
	361	USE cpulog
	362	USE indices
	363	USE interfaces
	364	USE pegrid
	365	USE transpose_indices
	366
	367	IMPLICIT NONE
	368
	369	INTEGER :: i, j, k, l, m, zs
	370
	371	REAL :: f_in(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
	372	f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), &
	373	f_out(nxl_z:nxr_za,nys_z:nyn_za,1:nza), &
[164]	374	work(nnxnnynnz)
[1]	375
	376	#if defined( __parallel )
	377
	378	!
	379	!-- Rearrange indices of input array in order to make data to be send
	380	!-- by MPI contiguous
[683]	381	!$OMP PARALLEL PRIVATE ( i, j, k )
	382	!$OMP DO
[164]	383	DO j = 0, nya
	384	DO k = nzb_y, nzt_ya
	385	DO i = nxl_y, nxr_ya
	386	f_inv(i,k,j) = f_in(j,i,k)
[1]	387	ENDDO
	388	ENDDO
	389	ENDDO
[683]	390	!$OMP END PARALLEL
[1]	391
	392	!
	393	!-- Move data to different array, because memory location of work1 is
	394	!-- needed further below (work1 = work2).
	395	!-- If the PE grid is one-dimensional along y, only local reordering
	396	!-- of the data is necessary and no transposition has to be done.
	397	IF ( pdims(1) == 1 ) THEN
[683]	398	!$OMP PARALLEL PRIVATE ( i, j, k )
	399	!$OMP DO
[1]	400	DO j = 0, nya
	401	DO k = nzb_y, nzt_ya
	402	DO i = nxl_y, nxr_ya
[164]	403	f_out(i,j,k) = f_inv(i,k,j)
[1]	404	ENDDO
	405	ENDDO
	406	ENDDO
[683]	407	!$OMP END PARALLEL
[1]	408	RETURN
	409	ENDIF
	410
	411	!
	412	!-- Transpose array
	413	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	414	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	415	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
[164]	416	work(1), sendrecvcount_yz, MPI_REAL, &
[1]	417	comm1dx, ierr )
	418	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	419
	420	!
	421	!-- Reorder transposed array
[683]	422	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
	423	!$OMP DO
[1]	424	DO l = 0, pdims(1) - 1
[683]	425	m = l * ( nyn_za - nys_z + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
	426	( nxr_za - nxl_z + 1 )
[1]	427	zs = 1 + l * ( nzt_ya - nzb_y + 1 )
	428	DO j = nys_z, nyn_za
	429	DO k = zs, zs + nzt_ya - nzb_y
	430	DO i = nxl_z, nxr_za
	431	m = m + 1
[164]	432	f_out(i,j,k) = work(m)
[1]	433	ENDDO
	434	ENDDO
	435	ENDDO
	436	ENDDO
[683]	437	!$OMP END PARALLEL
[1]	438
	439	#endif
	440
	441	END SUBROUTINE transpose_yz
	442
	443
[164]	444	SUBROUTINE transpose_zx( f_in, work, f_out )
[1]	445
	446	!------------------------------------------------------------------------------!
	447	! Description:
	448	! ------------
	449	! Transposition of input array (f_in) from z to x. For the input array, all
	450	! elements along z reside on the same PE, while after transposition, all
	451	! elements along x reside on the same PE.
	452	!------------------------------------------------------------------------------!
	453
	454	USE cpulog
	455	USE indices
	456	USE interfaces
	457	USE pegrid
	458	USE transpose_indices
	459
	460	IMPLICIT NONE
	461
	462	INTEGER :: i, j, k, l, m, xs
	463
[164]	464	REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), &
[1]	465	f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
[164]	466	work(nnxnnynnz)
[1]	467
	468	#if defined( __parallel )
	469
	470	!
	471	!-- Rearrange indices of input array in order to make data to be send
	472	!-- by MPI contiguous
[683]	473	!$OMP PARALLEL PRIVATE ( i, j, k )
	474	!$OMP DO
[164]	475	DO k = 1,nza
	476	DO i = nxl, nxra
	477	DO j = nys, nyna
	478	f_inv(j,i,k) = f_in(k,j,i)
[1]	479	ENDDO
	480	ENDDO
	481	ENDDO
[683]	482	!$OMP END PARALLEL
[1]	483
	484	!
	485	!-- Move data to different array, because memory location of work1 is
	486	!-- needed further below (work1 = work2).
	487	!-- If the PE grid is one-dimensional along y, only local reordering
	488	!-- of the data is necessary and no transposition has to be done.
	489	IF ( pdims(1) == 1 ) THEN
[683]	490	!$OMP PARALLEL PRIVATE ( i, j, k )
	491	!$OMP DO
[1]	492	DO k = 1, nza
[164]	493	DO i = nxl, nxra
	494	DO j = nys, nyna
	495	f_out(i,j,k) = f_inv(j,i,k)
[1]	496	ENDDO
	497	ENDDO
	498	ENDDO
[683]	499	!$OMP END PARALLEL
[1]	500	RETURN
	501	ENDIF
	502
	503	!
	504	!-- Transpose array
	505	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	506	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	507	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
	508	work(1), sendrecvcount_zx, MPI_REAL, &
[1]	509	comm1dx, ierr )
	510	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	511
	512	!
	513	!-- Reorder transposed array
[683]	514	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
	515	!$OMP DO
[1]	516	DO l = 0, pdims(1) - 1
[683]	517	m = l * ( nzt_xa - nzb_x + 1 ) * nnx * ( nyn_xa - nys_x + 1 )
[1]	518	xs = 0 + l * nnx
	519	DO k = nzb_x, nzt_xa
[164]	520	DO i = xs, xs + nnx - 1
	521	DO j = nys_x, nyn_xa
[1]	522	m = m + 1
[164]	523	f_out(i,j,k) = work(m)
[1]	524	ENDDO
	525	ENDDO
	526	ENDDO
	527	ENDDO
[683]	528	!$OMP END PARALLEL
[1]	529
	530	#endif
	531
	532	END SUBROUTINE transpose_zx
	533
	534
[164]	535	SUBROUTINE transpose_zy( f_in, work, f_out )
[1]	536
	537	!------------------------------------------------------------------------------!
	538	! Description:
	539	! ------------
	540	! Transposition of input array (f_in) from z to y. For the input array, all
	541	! elements along z reside on the same PE, while after transposition, all
	542	! elements along y reside on the same PE.
	543	!------------------------------------------------------------------------------!
	544
	545	USE cpulog
	546	USE indices
	547	USE interfaces
	548	USE pegrid
	549	USE transpose_indices
	550
	551	IMPLICIT NONE
	552
	553	INTEGER :: i, j, k, l, m, zs
	554
	555	REAL :: f_in(nxl_z:nxr_za,nys_z:nyn_za,1:nza), &
	556	f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), &
	557	f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
[164]	558	work(nnxnnynnz)
[1]	559
	560	#if defined( __parallel )
	561
	562	!
	563	!-- If the PE grid is one-dimensional along y, the array has only to be
	564	!-- reordered locally and therefore no transposition has to be done.
	565	IF ( pdims(1) /= 1 ) THEN
	566	!
	567	!-- Reorder input array for transposition
[683]	568	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
	569	!$OMP DO
[1]	570	DO l = 0, pdims(1) - 1
[683]	571	m = l * ( nyn_za - nys_z + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
	572	( nxr_za - nxl_z + 1 )
[1]	573	zs = 1 + l * ( nzt_ya - nzb_y + 1 )
	574	DO j = nys_z, nyn_za
	575	DO k = zs, zs + nzt_ya - nzb_y
	576	DO i = nxl_z, nxr_za
	577	m = m + 1
[164]	578	work(m) = f_in(i,j,k)
[1]	579	ENDDO
	580	ENDDO
	581	ENDDO
	582	ENDDO
[683]	583	!$OMP END PARALLEL
[1]	584
	585	!
	586	!-- Transpose array
	587	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	588	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	589	CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, &
[1]	590	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
	591	comm1dx, ierr )
	592	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	593
	594	!
	595	!-- Reorder transposed array in a way that the y index is in first position
[683]	596	!$OMP PARALLEL PRIVATE ( i, j, k )
	597	!$OMP DO
[164]	598	DO j = 0, nya
	599	DO k = nzb_y, nzt_ya
	600	DO i = nxl_y, nxr_ya
	601	f_out(j,i,k) = f_inv(i,k,j)
[1]	602	ENDDO
	603	ENDDO
	604	ENDDO
[683]	605	!$OMP END PARALLEL
[1]	606	ELSE
	607	!
	608	!-- Reorder the array in a way that the y index is in first position
[683]	609	!$OMP PARALLEL PRIVATE ( i, j, k )
	610	!$OMP DO
[1]	611	DO k = nzb_y, nzt_ya
[164]	612	DO j = 0, nya
	613	DO i = nxl_y, nxr_ya
	614	f_inv(i,k,j) = f_in(i,j,k)
	615	ENDDO
	616	ENDDO
	617	ENDDO
[683]	618	!$OMP END PARALLEL
[164]	619	!
	620	!-- Move data to output array
[683]	621	!$OMP PARALLEL PRIVATE ( i, j, k )
	622	!$OMP DO
[164]	623	DO k = nzb_y, nzt_ya
[1]	624	DO i = nxl_y, nxr_ya
	625	DO j = 0, nya
[164]	626	f_out(j,i,k) = f_inv(i,k,j)
[1]	627	ENDDO
	628	ENDDO
	629	ENDDO
[683]	630	!$OMP END PARALLEL
[164]	631
[1]	632	ENDIF
	633
	634	#endif
	635
	636	END SUBROUTINE transpose_zy
	637
	638
[164]	639	SUBROUTINE transpose_zyd( f_in, work, f_out )
[1]	640
	641	!------------------------------------------------------------------------------!
	642	! Description:
	643	! ------------
	644	! Transposition of input array (f_in) from z to y. For the input array, all
	645	! elements along z reside on the same PE, while after transposition, all
	646	! elements along y reside on the same PE.
	647	! This is a direct transposition for arrays with indices in regular order
	648	! (k,j,i) (cf. transpose_zy).
	649	!------------------------------------------------------------------------------!
	650
	651	USE cpulog
	652	USE indices
	653	USE interfaces
	654	USE pegrid
	655	USE transpose_indices
	656
	657	IMPLICIT NONE
	658
	659	INTEGER :: i, j, k, l, m, ys
	660
	661	REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), &
	662	f_out(0:nya,nxl_yd:nxr_yda,nzb_yd:nzt_yda), &
[164]	663	work(nnxnnynnz)
[1]	664
	665	#if defined( __parallel )
	666
	667	!
	668	!-- Rearrange indices of input array in order to make data to be send
	669	!-- by MPI contiguous
	670	DO i = nxl, nxra
	671	DO j = nys, nyna
	672	DO k = 1, nza
[164]	673	f_inv(j,i,k) = f_in(k,j,i)
[1]	674	ENDDO
	675	ENDDO
	676	ENDDO
	677
	678	!
	679	!-- Move data to different array, because memory location of work1 is
	680	!-- needed further below (work1 = work2).
	681	!-- If the PE grid is one-dimensional along x, only local reordering
	682	!-- of the data is necessary and no transposition has to be done.
	683	IF ( pdims(2) == 1 ) THEN
	684	DO k = 1, nza
	685	DO i = nxl, nxra
	686	DO j = nys, nyna
[164]	687	f_out(j,i,k) = f_inv(j,i,k)
[1]	688	ENDDO
	689	ENDDO
	690	ENDDO
	691	RETURN
	692	ENDIF
	693
	694	!
	695	!-- Transpose array
	696	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	697	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	698	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
[164]	699	work(1), sendrecvcount_zyd, MPI_REAL, &
[1]	700	comm1dy, ierr )
	701	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	702
	703	!
	704	!-- Reorder transposed array
	705	m = 0
	706	DO l = 0, pdims(2) - 1
	707	ys = 0 + l * nny
	708	DO k = nzb_yd, nzt_yda
	709	DO i = nxl_yd, nxr_yda
	710	DO j = ys, ys + nny - 1
	711	m = m + 1
[164]	712	f_out(j,i,k) = work(m)
[1]	713	ENDDO
	714	ENDDO
	715	ENDDO
	716	ENDDO
	717
	718	#endif
	719
	720	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 683

Download in other formats: