Home

Context Navigation

transpose.f90 @ 1106

Last change on this file since 1106 was 1106, checked in by raasch, 11 years ago

New:
---

Porting of FFT-solver for serial runs to GPU using CUDA FFT,
preprocessor lines in transpose routines rearranged, so that routines can also
be used in serial (non-parallel) mode,
transpositions also carried out in serial mode, routines fftx, fftxp replaced
by calls of fft_x, fft_x replaced by fft_x_1d in the 1D-decomposition routines
(Makefile, Makefile_check, fft_xy, poisfft, poisfft_hybrid, transpose, new: cuda_fft_interfaces)

--stdin argument for mpiexec on lckyuh, -y and -Y settings output to header (mrun)

Changed:

Module array_kind renamed precision_kind
(check_open, data_output_3d, fft_xy, modules, user_data_output_3d)

some format changes for coupled atmosphere-ocean runs (header)
small changes in code formatting (microphysics, prognostic_equations)

Errors:

bugfix: default value (0) assigned to coupling_start_time (modules)
bugfix: initial time for preruns of coupled runs is output as -coupling_start_time (data_output_profiles)

Property svn:keywords set to Id

File size: 21.6 KB

Rev	Line
[164]	1	SUBROUTINE transpose_xy( f_in, work, f_out )
[1]	2
[1036]	3	!--------------------------------------------------------------------------------!
	4	! This file is part of PALM.
	5	!
	6	! PALM is free software: you can redistribute it and/or modify it under the terms
	7	! of the GNU General Public License as published by the Free Software Foundation,
	8	! either version 3 of the License, or (at your option) any later version.
	9	!
	10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
	11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
	12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
	13	!
	14	! You should have received a copy of the GNU General Public License along with
	15	! PALM. If not, see <http://www.gnu.org/licenses/>.
	16	!
	17	! Copyright 1997-2012 Leibniz University Hannover
	18	!--------------------------------------------------------------------------------!
	19	!
[484]	20	! Current revisions:
[1]	21	! -----------------
[1106]	22	! preprocessor lines rearranged so that routines can also be used in serial
	23	! (non-parallel) mode
[198]	24	!
	25	! Former revisions:
	26	! -----------------
	27	! $Id: transpose.f90 1106 2013-03-04 05:31:38Z raasch $
	28	!
[1093]	29	! 1092 2013-02-02 11:24:22Z raasch
	30	! unused variables removed
	31	!
[1037]	32	! 1036 2012-10-22 13:43:42Z raasch
	33	! code put under GPL (PALM 3.9)
	34	!
[1004]	35	! 1003 2012-09-14 14:35:53Z raasch
	36	! indices nxa, nya, etc. replaced by nx, ny, etc.
	37	!
[684]	38	! 683 2011-02-09 14:25:15Z raasch
	39	! openMP parallelization of transpositions for 2d-domain-decomposition
	40	!
[623]	41	! 622 2010-12-10 08:08:13Z raasch
	42	! optional barriers included in order to speed up collective operations
	43	!
[198]	44	! 164 2008-05-15 08:46:15Z raasch
[164]	45	! f_inv changed from subroutine argument to automatic array in order to do
	46	! re-ordering from f_in to f_inv in one step, one array work is needed instead
	47	! of work1 and work2
[1]	48	!
[198]	49	! February 2007
[3]	50	! RCS Log replace by Id keyword, revision history cleaned up
	51	!
[1]	52	! Revision 1.2 2004/04/30 13:12:17 raasch
	53	! Switched from mpi_alltoallv to the simpler mpi_alltoall,
	54	! all former transpose-routine files collected in this file, enlarged
	55	! transposition arrays introduced
	56	!
	57	! Revision 1.1 2004/04/30 13:08:16 raasch
	58	! Initial revision (collection of former routines transpose_xy, transpose_xz,
	59	! transpose_yx, transpose_yz, transpose_zx, transpose_zy)
	60	!
	61	! Revision 1.1 1997/07/24 11:25:18 raasch
	62	! Initial revision
	63	!
	64	!
	65	! Description:
	66	! ------------
	67	! Transposition of input array (f_in) from x to y. For the input array, all
	68	! elements along x reside on the same PE, while after transposition, all
	69	! elements along y reside on the same PE.
	70	!------------------------------------------------------------------------------!
	71
	72	USE cpulog
	73	USE indices
	74	USE interfaces
	75	USE pegrid
	76	USE transpose_indices
	77
	78	IMPLICIT NONE
	79
	80	INTEGER :: i, j, k, l, m, ys
	81
[1003]	82	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
	83	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
	84	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
[164]	85	work(nnxnnynnz)
[1]	86
	87	!
	88	!-- Rearrange indices of input array in order to make data to be send
	89	!-- by MPI contiguous
[683]	90	!$OMP PARALLEL PRIVATE ( i, j, k )
	91	!$OMP DO
[1003]	92	DO i = 0, nx
	93	DO k = nzb_x, nzt_x
	94	DO j = nys_x, nyn_x
[164]	95	f_inv(j,k,i) = f_in(i,j,k)
[1]	96	ENDDO
	97	ENDDO
	98	ENDDO
[683]	99	!$OMP END PARALLEL
[1]	100
[1106]	101	IF ( numprocs /= 1 ) THEN
	102
	103	#if defined( __parallel )
[1]	104	!
[1106]	105	!-- Transpose array
	106	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
	107	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
	108	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
	109	work(1), sendrecvcount_xy, MPI_REAL, &
	110	comm1dy, ierr )
	111	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	112
	113	!
[1106]	114	!-- Reorder transposed array
[683]	115	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
	116	!$OMP DO
[1106]	117	DO l = 0, pdims(2) - 1
	118	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	119	( nyn_x - nys_x + 1 )
	120	ys = 0 + l * ( nyn_x - nys_x + 1 )
	121	DO i = nxl_y, nxr_y
	122	DO k = nzb_y, nzt_y
	123	DO j = ys, ys + nyn_x - nys_x
	124	m = m + 1
	125	f_out(j,i,k) = work(m)
	126	ENDDO
[1]	127	ENDDO
	128	ENDDO
	129	ENDDO
[683]	130	!$OMP END PARALLEL
[1]	131	#endif
	132
[1106]	133	ELSE
	134
	135	!
	136	!-- Reorder transposed array
	137	!$OMP PARALLEL PRIVATE ( i, j, k )
	138	!$OMP DO
	139	DO k = nzb_y, nzt_y
	140	DO i = nxl_y, nxr_y
	141	DO j = 0, ny
	142	f_out(j,i,k) = f_inv(j,k,i)
	143	ENDDO
	144	ENDDO
	145	ENDDO
	146	!$OMP END PARALLEL
	147
	148	ENDIF
	149
[1]	150	END SUBROUTINE transpose_xy
	151
	152
[164]	153	SUBROUTINE transpose_xz( f_in, work, f_out )
[1]	154
	155	!------------------------------------------------------------------------------!
	156	! Description:
	157	! ------------
	158	! Transposition of input array (f_in) from x to z. For the input array, all
	159	! elements along x reside on the same PE, while after transposition, all
	160	! elements along z reside on the same PE.
	161	!------------------------------------------------------------------------------!
	162
	163	USE cpulog
	164	USE indices
	165	USE interfaces
	166	USE pegrid
	167	USE transpose_indices
	168
	169	IMPLICIT NONE
	170
	171	INTEGER :: i, j, k, l, m, xs
	172
[1003]	173	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
	174	f_inv(nys:nyn,nxl:nxr,1:nz), &
	175	f_out(1:nz,nys:nyn,nxl:nxr), &
[164]	176	work(nnxnnynnz)
[1]	177
	178
	179	!
	180	!-- If the PE grid is one-dimensional along y, the array has only to be
	181	!-- reordered locally and therefore no transposition has to be done.
	182	IF ( pdims(1) /= 1 ) THEN
[1106]	183
	184	#if defined( __parallel )
[1]	185	!
	186	!-- Reorder input array for transposition
[683]	187	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
	188	!$OMP DO
[1]	189	DO l = 0, pdims(1) - 1
[1003]	190	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
[1]	191	xs = 0 + l * nnx
[1003]	192	DO k = nzb_x, nzt_x
[164]	193	DO i = xs, xs + nnx - 1
[1003]	194	DO j = nys_x, nyn_x
[1]	195	m = m + 1
[164]	196	work(m) = f_in(i,j,k)
[1]	197	ENDDO
	198	ENDDO
	199	ENDDO
	200	ENDDO
[683]	201	!$OMP END PARALLEL
[1]	202
	203	!
	204	!-- Transpose array
	205	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	206	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	207	CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, &
	208	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
[1]	209	comm1dx, ierr )
	210	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	211
	212	!
	213	!-- Reorder transposed array in a way that the z index is in first position
[683]	214	!$OMP PARALLEL PRIVATE ( i, j, k )
	215	!$OMP DO
[1003]	216	DO k = 1, nz
	217	DO i = nxl, nxr
	218	DO j = nys, nyn
[164]	219	f_out(k,j,i) = f_inv(j,i,k)
[1]	220	ENDDO
	221	ENDDO
	222	ENDDO
[683]	223	!$OMP END PARALLEL
[1106]	224	#endif
	225
[1]	226	ELSE
[1106]	227
[1]	228	!
	229	!-- Reorder the array in a way that the z index is in first position
[683]	230	!$OMP PARALLEL PRIVATE ( i, j, k )
	231	!$OMP DO
[1003]	232	DO i = nxl, nxr
	233	DO j = nys, nyn
	234	DO k = 1, nz
[164]	235	f_inv(j,i,k) = f_in(i,j,k)
[1]	236	ENDDO
	237	ENDDO
	238	ENDDO
[683]	239	!$OMP END PARALLEL
[1]	240
[683]	241	!$OMP PARALLEL PRIVATE ( i, j, k )
	242	!$OMP DO
[1003]	243	DO k = 1, nz
	244	DO i = nxl, nxr
	245	DO j = nys, nyn
[164]	246	f_out(k,j,i) = f_inv(j,i,k)
	247	ENDDO
[1]	248	ENDDO
	249	ENDDO
[683]	250	!$OMP END PARALLEL
[1]	251
[164]	252	ENDIF
	253
[1]	254	END SUBROUTINE transpose_xz
	255
	256
[164]	257	SUBROUTINE transpose_yx( f_in, work, f_out )
[1]	258
	259	!------------------------------------------------------------------------------!
	260	! Description:
	261	! ------------
	262	! Transposition of input array (f_in) from y to x. For the input array, all
	263	! elements along y reside on the same PE, while after transposition, all
	264	! elements along x reside on the same PE.
	265	!------------------------------------------------------------------------------!
	266
	267	USE cpulog
	268	USE indices
	269	USE interfaces
	270	USE pegrid
	271	USE transpose_indices
	272
	273	IMPLICIT NONE
	274
	275	INTEGER :: i, j, k, l, m, ys
	276
[1003]	277	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
	278	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
	279	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
[164]	280	work(nnxnnynnz)
[1]	281
[1106]	282	IF ( numprocs /= 1 ) THEN
	283
[1]	284	#if defined( __parallel )
	285	!
[1106]	286	!-- Reorder input array for transposition
[683]	287	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
	288	!$OMP DO
[1106]	289	DO l = 0, pdims(2) - 1
	290	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	291	( nyn_x - nys_x + 1 )
	292	ys = 0 + l * ( nyn_x - nys_x + 1 )
	293	DO i = nxl_y, nxr_y
	294	DO k = nzb_y, nzt_y
	295	DO j = ys, ys + nyn_x - nys_x
	296	m = m + 1
	297	work(m) = f_in(j,i,k)
	298	ENDDO
	299	ENDDO
	300	ENDDO
	301	ENDDO
	302	!$OMP END PARALLEL
	303
	304	!
	305	!-- Transpose array
	306	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
	307	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
	308	CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, &
	309	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
	310	comm1dy, ierr )
	311	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	312	#endif
	313
	314	ELSE
	315
	316	!
	317	!-- Reorder array f_in the same way as ALLTOALL did it
	318	!$OMP PARALLEL PRIVATE ( i, j, k )
	319	!$OMP DO
[1003]	320	DO i = nxl_y, nxr_y
	321	DO k = nzb_y, nzt_y
[1106]	322	DO j = 0, ny
	323	f_inv(j,k,i) = f_in(j,i,k)
[1]	324	ENDDO
	325	ENDDO
	326	ENDDO
[683]	327	!$OMP END PARALLEL
[1]	328
[1106]	329	ENDIF
[1]	330
	331	!
	332	!-- Reorder transposed array in a way that the x index is in first position
[683]	333	!$OMP PARALLEL PRIVATE ( i, j, k )
	334	!$OMP DO
[1003]	335	DO i = 0, nx
	336	DO k = nzb_x, nzt_x
	337	DO j = nys_x, nyn_x
[164]	338	f_out(i,j,k) = f_inv(j,k,i)
[1]	339	ENDDO
	340	ENDDO
	341	ENDDO
[683]	342	!$OMP END PARALLEL
[1]	343
	344	END SUBROUTINE transpose_yx
	345
	346
[164]	347	SUBROUTINE transpose_yxd( f_in, work, f_out )
[1]	348
	349	!------------------------------------------------------------------------------!
	350	! Description:
	351	! ------------
	352	! Transposition of input array (f_in) from y to x. For the input array, all
	353	! elements along y reside on the same PE, while after transposition, all
	354	! elements along x reside on the same PE.
	355	! This is a direct transposition for arrays with indices in regular order
	356	! (k,j,i) (cf. transpose_yx).
	357	!------------------------------------------------------------------------------!
	358
	359	USE cpulog
	360	USE indices
	361	USE interfaces
	362	USE pegrid
	363	USE transpose_indices
	364
	365	IMPLICIT NONE
	366
[1092]	367	INTEGER :: i, j, k, l, m, xs
[1]	368
[1003]	369	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nxl:nxr,1:nz,nys:nyn), &
	370	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
[164]	371	work(nnxnnynnz)
[1]	372
	373	#if defined( __parallel )
	374
	375	!
	376	!-- Rearrange indices of input array in order to make data to be send
	377	!-- by MPI contiguous
[1003]	378	DO k = 1, nz
	379	DO j = nys, nyn
	380	DO i = nxl, nxr
[164]	381	f_inv(i,k,j) = f_in(k,j,i)
[1]	382	ENDDO
	383	ENDDO
	384	ENDDO
	385
	386	!
	387	!-- Transpose array
	388	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	389	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	390	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
[164]	391	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	392	comm1dx, ierr )
	393	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	394
	395	!
	396	!-- Reorder transposed array
	397	m = 0
	398	DO l = 0, pdims(1) - 1
	399	xs = 0 + l * nnx
[1003]	400	DO j = nys_x, nyn_x
	401	DO k = 1, nz
[1]	402	DO i = xs, xs + nnx - 1
	403	m = m + 1
[164]	404	f_out(i,j,k) = work(m)
[1]	405	ENDDO
	406	ENDDO
	407	ENDDO
	408	ENDDO
	409
	410	#endif
	411
	412	END SUBROUTINE transpose_yxd
	413
	414
[164]	415	SUBROUTINE transpose_yz( f_in, work, f_out )
[1]	416
	417	!------------------------------------------------------------------------------!
	418	! Description:
	419	! ------------
	420	! Transposition of input array (f_in) from y to z. For the input array, all
	421	! elements along y reside on the same PE, while after transposition, all
	422	! elements along z reside on the same PE.
	423	!------------------------------------------------------------------------------!
	424
	425	USE cpulog
	426	USE indices
	427	USE interfaces
	428	USE pegrid
	429	USE transpose_indices
	430
	431	IMPLICIT NONE
	432
	433	INTEGER :: i, j, k, l, m, zs
	434
[1003]	435	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
	436	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
	437	f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
[164]	438	work(nnxnnynnz)
[1]	439
	440	!
	441	!-- Rearrange indices of input array in order to make data to be send
	442	!-- by MPI contiguous
[683]	443	!$OMP PARALLEL PRIVATE ( i, j, k )
	444	!$OMP DO
[1003]	445	DO j = 0, ny
	446	DO k = nzb_y, nzt_y
	447	DO i = nxl_y, nxr_y
[164]	448	f_inv(i,k,j) = f_in(j,i,k)
[1]	449	ENDDO
	450	ENDDO
	451	ENDDO
[683]	452	!$OMP END PARALLEL
[1]	453
	454	!
	455	!-- Move data to different array, because memory location of work1 is
	456	!-- needed further below (work1 = work2).
	457	!-- If the PE grid is one-dimensional along y, only local reordering
	458	!-- of the data is necessary and no transposition has to be done.
	459	IF ( pdims(1) == 1 ) THEN
[1106]	460
[683]	461	!$OMP PARALLEL PRIVATE ( i, j, k )
	462	!$OMP DO
[1003]	463	DO j = 0, ny
	464	DO k = nzb_y, nzt_y
	465	DO i = nxl_y, nxr_y
[164]	466	f_out(i,j,k) = f_inv(i,k,j)
[1]	467	ENDDO
	468	ENDDO
	469	ENDDO
[683]	470	!$OMP END PARALLEL
[1]	471
[1106]	472	ELSE
	473
	474	#if defined( __parallel )
[1]	475	!
[1106]	476	!-- Transpose array
	477	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
	478	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
	479	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
	480	work(1), sendrecvcount_yz, MPI_REAL, &
	481	comm1dx, ierr )
	482	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	483
	484	!
[1106]	485	!-- Reorder transposed array
[683]	486	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
	487	!$OMP DO
[1106]	488	DO l = 0, pdims(1) - 1
	489	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	490	( nxr_z - nxl_z + 1 )
	491	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	492	DO j = nys_z, nyn_z
	493	DO k = zs, zs + nzt_y - nzb_y
	494	DO i = nxl_z, nxr_z
	495	m = m + 1
	496	f_out(i,j,k) = work(m)
	497	ENDDO
[1]	498	ENDDO
	499	ENDDO
	500	ENDDO
[683]	501	!$OMP END PARALLEL
[1]	502	#endif
	503
[1106]	504	ENDIF
	505
[1]	506	END SUBROUTINE transpose_yz
	507
	508
[164]	509	SUBROUTINE transpose_zx( f_in, work, f_out )
[1]	510
	511	!------------------------------------------------------------------------------!
	512	! Description:
	513	! ------------
	514	! Transposition of input array (f_in) from z to x. For the input array, all
	515	! elements along z reside on the same PE, while after transposition, all
	516	! elements along x reside on the same PE.
	517	!------------------------------------------------------------------------------!
	518
	519	USE cpulog
	520	USE indices
	521	USE interfaces
	522	USE pegrid
	523	USE transpose_indices
	524
	525	IMPLICIT NONE
	526
	527	INTEGER :: i, j, k, l, m, xs
	528
[1106]	529	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
[164]	530	work(nnxnnynnz)
[1]	531
[1106]	532	!$acc declare create ( f_inv )
	533	REAL :: f_inv(nys:nyn,nxl:nxr,1:nz)
[1]	534
[1106]	535
[1]	536	!
	537	!-- Rearrange indices of input array in order to make data to be send
	538	!-- by MPI contiguous
[683]	539	!$OMP PARALLEL PRIVATE ( i, j, k )
	540	!$OMP DO
[1106]	541	!$acc kernels present( f_in )
	542	!$acc loop
[1003]	543	DO k = 1,nz
	544	DO i = nxl, nxr
[1106]	545	!$acc loop vector( 32 )
[1003]	546	DO j = nys, nyn
[164]	547	f_inv(j,i,k) = f_in(k,j,i)
[1]	548	ENDDO
	549	ENDDO
	550	ENDDO
[683]	551	!$OMP END PARALLEL
[1]	552
	553	!
	554	!-- Move data to different array, because memory location of work1 is
	555	!-- needed further below (work1 = work2).
	556	!-- If the PE grid is one-dimensional along y, only local reordering
	557	!-- of the data is necessary and no transposition has to be done.
	558	IF ( pdims(1) == 1 ) THEN
[1106]	559
[683]	560	!$OMP PARALLEL PRIVATE ( i, j, k )
	561	!$OMP DO
[1106]	562	!$acc kernels present( f_out )
	563	!$acc loop
[1003]	564	DO k = 1, nz
	565	DO i = nxl, nxr
[1106]	566	!$acc loop vector( 32 )
[1003]	567	DO j = nys, nyn
[164]	568	f_out(i,j,k) = f_inv(j,i,k)
[1]	569	ENDDO
	570	ENDDO
	571	ENDDO
[683]	572	!$OMP END PARALLEL
[1]	573
[1106]	574	ELSE
	575
	576	#if defined( __parallel )
[1]	577	!
[1106]	578	!-- Transpose array
	579	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
	580	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
	581	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
	582	work(1), sendrecvcount_zx, MPI_REAL, &
	583	comm1dx, ierr )
	584	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	585
	586	!
[1106]	587	!-- Reorder transposed array
[683]	588	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
	589	!$OMP DO
[1106]	590	DO l = 0, pdims(1) - 1
	591	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
	592	xs = 0 + l * nnx
	593	DO k = nzb_x, nzt_x
	594	DO i = xs, xs + nnx - 1
	595	DO j = nys_x, nyn_x
	596	m = m + 1
	597	f_out(i,j,k) = work(m)
	598	ENDDO
[1]	599	ENDDO
	600	ENDDO
	601	ENDDO
[683]	602	!$OMP END PARALLEL
[1]	603	#endif
	604
[1106]	605	ENDIF
	606
[1]	607	END SUBROUTINE transpose_zx
	608
	609
[164]	610	SUBROUTINE transpose_zy( f_in, work, f_out )
[1]	611
	612	!------------------------------------------------------------------------------!
	613	! Description:
	614	! ------------
	615	! Transposition of input array (f_in) from z to y. For the input array, all
	616	! elements along z reside on the same PE, while after transposition, all
	617	! elements along y reside on the same PE.
	618	!------------------------------------------------------------------------------!
	619
	620	USE cpulog
	621	USE indices
	622	USE interfaces
	623	USE pegrid
	624	USE transpose_indices
	625
	626	IMPLICIT NONE
	627
	628	INTEGER :: i, j, k, l, m, zs
	629
[1003]	630	REAL :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
	631	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
	632	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
[164]	633	work(nnxnnynnz)
[1]	634
	635	!
	636	!-- If the PE grid is one-dimensional along y, the array has only to be
	637	!-- reordered locally and therefore no transposition has to be done.
	638	IF ( pdims(1) /= 1 ) THEN
[1106]	639
	640	#if defined( __parallel )
[1]	641	!
	642	!-- Reorder input array for transposition
[683]	643	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
	644	!$OMP DO
[1]	645	DO l = 0, pdims(1) - 1
[1003]	646	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
	647	( nxr_z - nxl_z + 1 )
	648	zs = 1 + l * ( nzt_y - nzb_y + 1 )
	649	DO j = nys_z, nyn_z
	650	DO k = zs, zs + nzt_y - nzb_y
	651	DO i = nxl_z, nxr_z
[1]	652	m = m + 1
[164]	653	work(m) = f_in(i,j,k)
[1]	654	ENDDO
	655	ENDDO
	656	ENDDO
	657	ENDDO
[683]	658	!$OMP END PARALLEL
[1]	659
	660	!
	661	!-- Transpose array
	662	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	663	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[164]	664	CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, &
[1]	665	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
	666	comm1dx, ierr )
	667	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1106]	668	#endif
[1]	669
	670	ELSE
	671	!
[1106]	672	!-- Reorder the array in the same way like ALLTOALL did it
[683]	673	!$OMP PARALLEL PRIVATE ( i, j, k )
	674	!$OMP DO
[1003]	675	DO k = nzb_y, nzt_y
	676	DO j = 0, ny
	677	DO i = nxl_y, nxr_y
[164]	678	f_inv(i,k,j) = f_in(i,j,k)
	679	ENDDO
	680	ENDDO
	681	ENDDO
[683]	682	!$OMP END PARALLEL
[1106]	683
	684	ENDIF
	685
[164]	686	!
[1106]	687	!-- Reorder transposed array in a way that the y index is in first position
[683]	688	!$OMP PARALLEL PRIVATE ( i, j, k )
	689	!$OMP DO
[1106]	690	DO k = nzb_y, nzt_y
	691	DO i = nxl_y, nxr_y
	692	DO j = 0, ny
	693	f_out(j,i,k) = f_inv(i,k,j)
[1]	694	ENDDO
	695	ENDDO
[1106]	696	ENDDO
[683]	697	!$OMP END PARALLEL
[164]	698
[1]	699	END SUBROUTINE transpose_zy
	700
	701
[164]	702	SUBROUTINE transpose_zyd( f_in, work, f_out )
[1]	703
	704	!------------------------------------------------------------------------------!
	705	! Description:
	706	! ------------
	707	! Transposition of input array (f_in) from z to y. For the input array, all
	708	! elements along z reside on the same PE, while after transposition, all
	709	! elements along y reside on the same PE.
	710	! This is a direct transposition for arrays with indices in regular order
	711	! (k,j,i) (cf. transpose_zy).
	712	!------------------------------------------------------------------------------!
	713
	714	USE cpulog
	715	USE indices
	716	USE interfaces
	717	USE pegrid
	718	USE transpose_indices
	719
	720	IMPLICIT NONE
	721
	722	INTEGER :: i, j, k, l, m, ys
	723
[1003]	724	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
	725	f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd), &
[164]	726	work(nnxnnynnz)
[1]	727
	728	#if defined( __parallel )
	729
	730	!
	731	!-- Rearrange indices of input array in order to make data to be send
	732	!-- by MPI contiguous
[1003]	733	DO i = nxl, nxr
	734	DO j = nys, nyn
	735	DO k = 1, nz
[164]	736	f_inv(j,i,k) = f_in(k,j,i)
[1]	737	ENDDO
	738	ENDDO
	739	ENDDO
	740
	741	!
	742	!-- Move data to different array, because memory location of work1 is
	743	!-- needed further below (work1 = work2).
	744	!-- If the PE grid is one-dimensional along x, only local reordering
	745	!-- of the data is necessary and no transposition has to be done.
	746	IF ( pdims(2) == 1 ) THEN
[1003]	747	DO k = 1, nz
	748	DO i = nxl, nxr
	749	DO j = nys, nyn
[164]	750	f_out(j,i,k) = f_inv(j,i,k)
[1]	751	ENDDO
	752	ENDDO
	753	ENDDO
	754	RETURN
	755	ENDIF
	756
	757	!
	758	!-- Transpose array
	759	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	760	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	761	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
[164]	762	work(1), sendrecvcount_zyd, MPI_REAL, &
[1]	763	comm1dy, ierr )
	764	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	765
	766	!
	767	!-- Reorder transposed array
	768	m = 0
	769	DO l = 0, pdims(2) - 1
	770	ys = 0 + l * nny
[1003]	771	DO k = nzb_yd, nzt_yd
	772	DO i = nxl_yd, nxr_yd
[1]	773	DO j = ys, ys + nny - 1
	774	m = m + 1
[164]	775	f_out(j,i,k) = work(m)
[1]	776	ENDDO
	777	ENDDO
	778	ENDDO
	779	ENDDO
	780
	781	#endif
	782
	783	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 1106

Download in other formats: