Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 1324

Last change on this file since 1324 was 1324, checked in by suehring, 10 years ago
Bugfixes in ONLY statements
Property svn:keywords set to `Id`
File size: 31.0 KB

Rev	Line
[1216]	1	SUBROUTINE resort_for_xy( f_in, f_inv )
[1]	2
[1036]	3	!--------------------------------------------------------------------------------!
	4	! This file is part of PALM.
	5	!
	6	! PALM is free software: you can redistribute it and/or modify it under the terms
	7	! of the GNU General Public License as published by the Free Software Foundation,
	8	! either version 3 of the License, or (at your option) any later version.
	9	!
	10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
	11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
	12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
	13	!
	14	! You should have received a copy of the GNU General Public License along with
	15	! PALM. If not, see <http://www.gnu.org/licenses/>.
	16	!
[1310]	17	! Copyright 1997-2014 Leibniz Universitaet Hannover
[1036]	18	!--------------------------------------------------------------------------------!
	19	!
[484]	20	! Current revisions:
[1]	21	! -----------------
[1324]	22	! Bugfix: ONLY statement for module pegrid removed
[1321]	23	!
	24	! Former revisions:
	25	! -----------------
	26	! $Id: transpose.f90 1324 2014-03-21 09:13:16Z suehring $
	27	!
	28	! 1320 2014-03-20 08:40:49Z raasch
[1320]	29	! ONLY-attribute added to USE-statements,
	30	! kind-parameters added to all INTEGER and REAL declaration statements,
	31	! kinds are defined in new module kinds,
	32	! old module precision_kind is removed,
	33	! revision history before 2012 removed,
	34	! comment fields (!:) to be used for variable explanations added to
	35	! all variable declaration statements
[198]	36	!
[1319]	37	! 1318 2014-03-17 13:35:16Z raasch
	38	! cpu_log_nowait parameter added to cpu measurements of the transpositions
	39	! required for solving the Poisson equation (poisfft),
	40	! module interfaces removed
	41	!
[1258]	42	! 1257 2013-11-08 15:18:40Z raasch
	43	! openacc loop and loop vector clauses removed
	44	!
[1217]	45	! 1216 2013-08-26 09:31:42Z raasch
	46	! re-sorting of the transposed / to be transposed arrays moved to separate
	47	! routines resort_for_...
	48	!
[1112]	49	! 1111 2013-03-08 23:54:10Z raasch
	50	! openACC directives added,
	51	! resorting data from/to work changed, work got 4 dimensions instead of 1
	52	!
[1107]	53	! 1106 2013-03-04 05:31:38Z raasch
	54	! preprocessor lines rearranged so that routines can also be used in serial
	55	! (non-parallel) mode
	56	!
[1093]	57	! 1092 2013-02-02 11:24:22Z raasch
	58	! unused variables removed
	59	!
[1037]	60	! 1036 2012-10-22 13:43:42Z raasch
	61	! code put under GPL (PALM 3.9)
	62	!
[1004]	63	! 1003 2012-09-14 14:35:53Z raasch
	64	! indices nxa, nya, etc. replaced by nx, ny, etc.
	65	!
[1]	66	! Revision 1.1 1997/07/24 11:25:18 raasch
	67	! Initial revision
	68	!
[1216]	69	!------------------------------------------------------------------------------!
	70	! Description:
	71	! ------------
	72	! Resorting data for the transposition from x to y. The transposition itself
	73	! is carried out in transpose_xy
	74	!------------------------------------------------------------------------------!
	75
[1320]	76	USE indices, &
	77	ONLY: nx
[1216]	78
[1320]	79	USE kinds
	80
	81	USE transpose_indices, &
	82	ONLY: nxl_z, nxr_z, nyn_x, nyn_z, nys_x, nys_z, nzb_x, nzt_x
	83
[1216]	84	IMPLICIT NONE
	85
[1320]	86	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
	87	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
[1216]	88
	89
[1320]	90	INTEGER(iwp) :: i !:
	91	INTEGER(iwp) :: j !:
	92	INTEGER(iwp) :: k !:
[1]	93	!
[1216]	94	!-- Rearrange indices of input array in order to make data to be send
	95	!-- by MPI contiguous
	96	!$OMP PARALLEL PRIVATE ( i, j, k )
	97	!$OMP DO
	98	!$acc kernels present( f_in, f_inv )
	99	DO i = 0, nx
	100	DO k = nzb_x, nzt_x
	101	DO j = nys_x, nyn_x
	102	f_inv(j,k,i) = f_in(i,j,k)
	103	ENDDO
	104	ENDDO
	105	ENDDO
	106	!$acc end kernels
	107	!$OMP END PARALLEL
	108
	109	END SUBROUTINE resort_for_xy
	110
	111
	112	SUBROUTINE transpose_xy( f_inv, f_out )
	113
	114	!------------------------------------------------------------------------------!
[1]	115	! Description:
	116	! ------------
	117	! Transposition of input array (f_in) from x to y. For the input array, all
	118	! elements along x reside on the same PE, while after transposition, all
	119	! elements along y reside on the same PE.
	120	!------------------------------------------------------------------------------!
	121
[1320]	122	USE cpulog, &
	123	ONLY: cpu_log, cpu_log_nowait, log_point_s
	124
	125	USE indices, &
	126	ONLY: nx, ny
	127
	128	USE kinds
	129
[1]	130	USE pegrid
	131
[1320]	132	USE transpose_indices, &
	133	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
	134
[1]	135	IMPLICIT NONE
	136
[1320]	137	INTEGER(iwp) :: i !:
	138	INTEGER(iwp) :: j !:
	139	INTEGER(iwp) :: k !:
	140	INTEGER(iwp) :: l !:
	141	INTEGER(iwp) :: ys !:
	142
	143	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
	144	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
[1]	145
[1320]	146	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !:
[1111]	147
	148
[1106]	149	IF ( numprocs /= 1 ) THEN
	150
	151	#if defined( __parallel )
[1]	152	!
[1106]	153	!-- Transpose array
[1318]	154	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	155	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	156	!$acc update host( f_inv )
	157	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
	158	work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
[1106]	159	comm1dy, ierr )
	160	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	161
	162	!
[1106]	163	!-- Reorder transposed array
[1111]	164	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
[683]	165	!$OMP DO
[1216]	166	!$acc data copyin( work )
[1106]	167	DO l = 0, pdims(2) - 1
	168	ys = 0 + l * ( nyn_x - nys_x + 1 )
[1111]	169	!$acc kernels present( f_out, work )
[1106]	170	DO i = nxl_y, nxr_y
	171	DO k = nzb_y, nzt_y
	172	DO j = ys, ys + nyn_x - nys_x
[1111]	173	f_out(j,i,k) = work(j-ys+1,k,i,l)
[1106]	174	ENDDO
[1]	175	ENDDO
	176	ENDDO
[1111]	177	!$acc end kernels
[1]	178	ENDDO
[1216]	179	!$acc end data
[683]	180	!$OMP END PARALLEL
[1]	181	#endif
	182
[1106]	183	ELSE
	184
	185	!
	186	!-- Reorder transposed array
	187	!$OMP PARALLEL PRIVATE ( i, j, k )
	188	!$OMP DO
[1216]	189	!$acc kernels present( f_inv, f_out )
[1106]	190	DO k = nzb_y, nzt_y
	191	DO i = nxl_y, nxr_y
	192	DO j = 0, ny
	193	f_out(j,i,k) = f_inv(j,k,i)
	194	ENDDO
	195	ENDDO
	196	ENDDO
[1111]	197	!$acc end kernels
[1106]	198	!$OMP END PARALLEL
	199
	200	ENDIF
	201
[1]	202	END SUBROUTINE transpose_xy
	203
	204
[1216]	205	SUBROUTINE resort_for_xz( f_inv, f_out )
[1]	206
	207	!------------------------------------------------------------------------------!
	208	! Description:
	209	! ------------
[1216]	210	! Resorting data after the transposition from x to z. The transposition itself
	211	! is carried out in transpose_xz
	212	!------------------------------------------------------------------------------!
	213
[1320]	214	USE indices, &
	215	ONLY: nxl, nxr, nyn, nys, nz
[1216]	216
[1320]	217	USE kinds
	218
[1216]	219	IMPLICIT NONE
	220
[1320]	221	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
	222	REAL(wp) :: f_out(1:nz,nys:nyn,nxl:nxr) !:
[1216]	223
[1320]	224	INTEGER(iwp) :: i !:
	225	INTEGER(iwp) :: j !:
	226	INTEGER(iwp) :: k !:
[1216]	227	!
	228	!-- Rearrange indices of input array in order to make data to be send
	229	!-- by MPI contiguous.
	230	!-- In case of parallel fft/transposition, scattered store is faster in
	231	!-- backward direction!!!
	232	!$OMP PARALLEL PRIVATE ( i, j, k )
	233	!$OMP DO
	234	!$acc kernels present( f_inv, f_out )
	235	DO k = 1, nz
	236	DO i = nxl, nxr
	237	DO j = nys, nyn
	238	f_out(k,j,i) = f_inv(j,i,k)
	239	ENDDO
	240	ENDDO
	241	ENDDO
	242	!$acc end kernels
	243	!$OMP END PARALLEL
	244
	245	END SUBROUTINE resort_for_xz
	246
	247
	248	SUBROUTINE transpose_xz( f_in, f_inv )
	249
	250	!------------------------------------------------------------------------------!
	251	! Description:
	252	! ------------
[1]	253	! Transposition of input array (f_in) from x to z. For the input array, all
	254	! elements along x reside on the same PE, while after transposition, all
	255	! elements along z reside on the same PE.
	256	!------------------------------------------------------------------------------!
	257
[1320]	258	USE cpulog, &
	259	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	260
[1320]	261	USE indices, &
	262	ONLY: nnx, nx, nxl, nxr, ny, nyn, nys, nz
	263
	264	USE kinds
	265
[1324]	266	USE pegrid
[1320]	267
	268	USE transpose_indices, &
	269	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	270
[1]	271	IMPLICIT NONE
	272
[1320]	273	INTEGER(iwp) :: i !:
	274	INTEGER(iwp) :: j !:
	275	INTEGER(iwp) :: k !:
	276	INTEGER(iwp) :: l !:
	277	INTEGER(iwp) :: xs !:
[1]	278
[1320]	279	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
	280	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
[1]	281
[1320]	282	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !:
[1111]	283
[1320]	284
[1]	285	!
	286	!-- If the PE grid is one-dimensional along y, the array has only to be
	287	!-- reordered locally and therefore no transposition has to be done.
	288	IF ( pdims(1) /= 1 ) THEN
[1106]	289
	290	#if defined( __parallel )
[1]	291	!
	292	!-- Reorder input array for transposition
[1111]	293	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
[683]	294	!$OMP DO
[1216]	295	!$acc data copyout( work )
[1]	296	DO l = 0, pdims(1) - 1
	297	xs = 0 + l * nnx
[1111]	298	!$acc kernels present( f_in, work )
[1003]	299	DO k = nzb_x, nzt_x
[164]	300	DO i = xs, xs + nnx - 1
[1003]	301	DO j = nys_x, nyn_x
[1111]	302	work(j,i-xs+1,k,l) = f_in(i,j,k)
[1]	303	ENDDO
	304	ENDDO
	305	ENDDO
[1111]	306	!$acc end kernels
[1]	307	ENDDO
[1216]	308	!$acc end data
[683]	309	!$OMP END PARALLEL
[1]	310
	311	!
	312	!-- Transpose array
[1318]	313	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[622]	314	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	315	CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
	316	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
[1]	317	comm1dx, ierr )
[1111]	318	!$acc update device( f_inv )
[1]	319	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1106]	320	#endif
	321
[1]	322	ELSE
[1106]	323
[1]	324	!
	325	!-- Reorder the array in a way that the z index is in first position
[683]	326	!$OMP PARALLEL PRIVATE ( i, j, k )
	327	!$OMP DO
[1216]	328	!$acc kernels present( f_in, f_inv )
[1003]	329	DO i = nxl, nxr
	330	DO j = nys, nyn
	331	DO k = 1, nz
[164]	332	f_inv(j,i,k) = f_in(i,j,k)
[1]	333	ENDDO
	334	ENDDO
	335	ENDDO
[1111]	336	!$acc end kernels
[683]	337	!$OMP END PARALLEL
[1]	338
[164]	339	ENDIF
	340
[1]	341	END SUBROUTINE transpose_xz
	342
	343
[1216]	344	SUBROUTINE resort_for_yx( f_inv, f_out )
[1]	345
	346	!------------------------------------------------------------------------------!
	347	! Description:
	348	! ------------
[1216]	349	! Resorting data after the transposition from y to x. The transposition itself
	350	! is carried out in transpose_yx
	351	!------------------------------------------------------------------------------!
	352
[1320]	353	USE indices, &
	354	ONLY: nx
[1216]	355
[1320]	356	USE kinds
	357
	358	USE transpose_indices, &
	359	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	360
[1216]	361	IMPLICIT NONE
	362
[1320]	363	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
	364	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
[1216]	365
	366
[1320]	367	INTEGER(iwp) :: i !:
	368	INTEGER(iwp) :: j !:
	369	INTEGER(iwp) :: k !:
[1216]	370	!
	371	!-- Rearrange indices of input array in order to make data to be send
	372	!-- by MPI contiguous
	373	!$OMP PARALLEL PRIVATE ( i, j, k )
	374	!$OMP DO
	375	!$acc kernels present( f_inv, f_out )
	376	DO i = 0, nx
	377	DO k = nzb_x, nzt_x
	378	DO j = nys_x, nyn_x
	379	f_out(i,j,k) = f_inv(j,k,i)
	380	ENDDO
	381	ENDDO
	382	ENDDO
	383	!$acc end kernels
	384	!$OMP END PARALLEL
	385
	386	END SUBROUTINE resort_for_yx
	387
	388
	389	SUBROUTINE transpose_yx( f_in, f_inv )
	390
	391	!------------------------------------------------------------------------------!
	392	! Description:
	393	! ------------
[1]	394	! Transposition of input array (f_in) from y to x. For the input array, all
	395	! elements along y reside on the same PE, while after transposition, all
	396	! elements along x reside on the same PE.
	397	!------------------------------------------------------------------------------!
	398
[1320]	399	USE cpulog, &
	400	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	401
[1320]	402	USE indices, &
	403	ONLY: nx, ny
	404
	405	USE kinds
	406
[1324]	407	USE pegrid
[1320]	408
	409	USE transpose_indices, &
	410	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
	411
[1]	412	IMPLICIT NONE
	413
[1320]	414	INTEGER(iwp) :: i !:
	415	INTEGER(iwp) :: j !:
	416	INTEGER(iwp) :: k !:
	417	INTEGER(iwp) :: l !:
	418	INTEGER(iwp) :: ys !:
[1]	419
[1320]	420	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
	421	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
[1111]	422
[1320]	423	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !:
[1111]	424
[1320]	425
[1106]	426	IF ( numprocs /= 1 ) THEN
	427
[1]	428	#if defined( __parallel )
	429	!
[1106]	430	!-- Reorder input array for transposition
[1111]	431	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
[683]	432	!$OMP DO
[1216]	433	!$acc data copyout( work )
[1106]	434	DO l = 0, pdims(2) - 1
	435	ys = 0 + l * ( nyn_x - nys_x + 1 )
[1111]	436	!$acc kernels present( f_in, work )
[1106]	437	DO i = nxl_y, nxr_y
	438	DO k = nzb_y, nzt_y
	439	DO j = ys, ys + nyn_x - nys_x
[1111]	440	work(j-ys+1,k,i,l) = f_in(j,i,k)
[1106]	441	ENDDO
	442	ENDDO
	443	ENDDO
[1111]	444	!$acc end kernels
[1106]	445	ENDDO
[1216]	446	!$acc end data
[1106]	447	!$OMP END PARALLEL
	448
	449	!
	450	!-- Transpose array
[1318]	451	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	452	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	453	CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
	454	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
[1106]	455	comm1dy, ierr )
[1111]	456	!$acc update device( f_inv )
[1106]	457	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	458	#endif
	459
	460	ELSE
	461
	462	!
	463	!-- Reorder array f_in the same way as ALLTOALL did it
	464	!$OMP PARALLEL PRIVATE ( i, j, k )
	465	!$OMP DO
[1216]	466	!$acc kernels present( f_in, f_inv )
[1003]	467	DO i = nxl_y, nxr_y
	468	DO k = nzb_y, nzt_y
[1106]	469	DO j = 0, ny
	470	f_inv(j,k,i) = f_in(j,i,k)
[1]	471	ENDDO
	472	ENDDO
	473	ENDDO
[1111]	474	!$acc end kernels
[683]	475	!$OMP END PARALLEL
[1]	476
[1106]	477	ENDIF
[1]	478
	479	END SUBROUTINE transpose_yx
	480
	481
[1216]	482	SUBROUTINE transpose_yxd( f_in, f_out )
[1]	483
	484	!------------------------------------------------------------------------------!
	485	! Description:
	486	! ------------
	487	! Transposition of input array (f_in) from y to x. For the input array, all
	488	! elements along y reside on the same PE, while after transposition, all
	489	! elements along x reside on the same PE.
	490	! This is a direct transposition for arrays with indices in regular order
	491	! (k,j,i) (cf. transpose_yx).
	492	!------------------------------------------------------------------------------!
	493
[1320]	494	USE cpulog, &
	495	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	496
[1320]	497	USE indices, &
	498	ONLY: nnx, nny, nnz, nx, nxl, nxr, nyn, nys, nz
	499
	500	USE kinds
	501
[1324]	502	USE pegrid
[1320]	503
	504	USE transpose_indices, &
	505	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	506
[1]	507	IMPLICIT NONE
	508
[1320]	509	INTEGER(iwp) :: i !:
	510	INTEGER(iwp) :: j !:
	511	INTEGER(iwp) :: k !:
	512	INTEGER(iwp) :: l !:
	513	INTEGER(iwp) :: m !:
	514	INTEGER(iwp) :: xs !:
[1]	515
[1320]	516	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !:
	517	REAL(wp) :: f_inv(nxl:nxr,1:nz,nys:nyn) !:
	518	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
	519	REAL(wp) :: work(nnxnnynnz) !:
[1]	520	#if defined( __parallel )
	521
	522	!
	523	!-- Rearrange indices of input array in order to make data to be send
	524	!-- by MPI contiguous
[1003]	525	DO k = 1, nz
	526	DO j = nys, nyn
	527	DO i = nxl, nxr
[164]	528	f_inv(i,k,j) = f_in(k,j,i)
[1]	529	ENDDO
	530	ENDDO
	531	ENDDO
	532
	533	!
	534	!-- Transpose array
	535	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	536	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	537	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
[164]	538	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	539	comm1dx, ierr )
	540	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	541
	542	!
	543	!-- Reorder transposed array
	544	m = 0
	545	DO l = 0, pdims(1) - 1
	546	xs = 0 + l * nnx
[1003]	547	DO j = nys_x, nyn_x
	548	DO k = 1, nz
[1]	549	DO i = xs, xs + nnx - 1
	550	m = m + 1
[164]	551	f_out(i,j,k) = work(m)
[1]	552	ENDDO
	553	ENDDO
	554	ENDDO
	555	ENDDO
	556
	557	#endif
	558
	559	END SUBROUTINE transpose_yxd
	560
	561
[1216]	562	SUBROUTINE resort_for_yz( f_in, f_inv )
[1]	563
	564	!------------------------------------------------------------------------------!
	565	! Description:
	566	! ------------
[1216]	567	! Resorting data for the transposition from y to z. The transposition itself
	568	! is carried out in transpose_yz
	569	!------------------------------------------------------------------------------!
	570
[1320]	571	USE indices, &
	572	ONLY: ny
[1216]	573
[1320]	574	USE kinds
	575
	576	USE transpose_indices, &
	577	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
	578
[1216]	579	IMPLICIT NONE
	580
[1320]	581	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
	582	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
[1216]	583
[1320]	584	INTEGER(iwp) :: i !:
	585	INTEGER(iwp) :: j !:
	586	INTEGER(iwp) :: k !:
[1216]	587
	588	!
	589	!-- Rearrange indices of input array in order to make data to be send
	590	!-- by MPI contiguous
	591	!$OMP PARALLEL PRIVATE ( i, j, k )
	592	!$OMP DO
	593	!$acc kernels present( f_in, f_inv )
	594	DO j = 0, ny
	595	DO k = nzb_y, nzt_y
	596	DO i = nxl_y, nxr_y
	597	f_inv(i,k,j) = f_in(j,i,k)
	598	ENDDO
	599	ENDDO
	600	ENDDO
	601	!$acc end kernels
	602	!$OMP END PARALLEL
	603
	604	END SUBROUTINE resort_for_yz
	605
	606
	607	SUBROUTINE transpose_yz( f_inv, f_out )
	608
	609	!------------------------------------------------------------------------------!
	610	! Description:
	611	! ------------
[1]	612	! Transposition of input array (f_in) from y to z. For the input array, all
	613	! elements along y reside on the same PE, while after transposition, all
	614	! elements along z reside on the same PE.
	615	!------------------------------------------------------------------------------!
	616
[1320]	617	USE cpulog, &
	618	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	619
[1320]	620	USE indices, &
	621	ONLY: ny, nz
	622
	623	USE kinds
	624
[1324]	625	USE pegrid
[1320]	626
	627	USE transpose_indices, &
	628	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
	629
[1]	630	IMPLICIT NONE
	631
[1320]	632	INTEGER(iwp) :: i !:
	633	INTEGER(iwp) :: j !:
	634	INTEGER(iwp) :: k !:
	635	INTEGER(iwp) :: l !:
	636	INTEGER(iwp) :: zs !:
[1]	637
[1320]	638	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
	639	REAL(wp) :: f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !:
[1111]	640
[1320]	641	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !:
[1111]	642
[1320]	643
[1]	644	!
	645	!-- If the PE grid is one-dimensional along y, only local reordering
	646	!-- of the data is necessary and no transposition has to be done.
	647	IF ( pdims(1) == 1 ) THEN
[1106]	648
[683]	649	!$OMP PARALLEL PRIVATE ( i, j, k )
	650	!$OMP DO
[1216]	651	!$acc kernels present( f_inv, f_out )
[1003]	652	DO j = 0, ny
	653	DO k = nzb_y, nzt_y
	654	DO i = nxl_y, nxr_y
[164]	655	f_out(i,j,k) = f_inv(i,k,j)
[1]	656	ENDDO
	657	ENDDO
	658	ENDDO
[1111]	659	!$acc end kernels
[683]	660	!$OMP END PARALLEL
[1]	661
[1106]	662	ELSE
	663
	664	#if defined( __parallel )
[1]	665	!
[1106]	666	!-- Transpose array
[1318]	667	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	668	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	669	!$acc update host( f_inv )
	670	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
	671	work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
[1106]	672	comm1dx, ierr )
	673	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	674
	675	!
[1106]	676	!-- Reorder transposed array
[1111]	677	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
[683]	678	!$OMP DO
[1216]	679	!$acc data copyin( work )
[1106]	680	DO l = 0, pdims(1) - 1
	681	zs = 1 + l * ( nzt_y - nzb_y + 1 )
[1216]	682	!$acc kernels present( f_out )
[1106]	683	DO j = nys_z, nyn_z
	684	DO k = zs, zs + nzt_y - nzb_y
	685	DO i = nxl_z, nxr_z
[1111]	686	f_out(i,j,k) = work(i,k-zs+1,j,l)
[1106]	687	ENDDO
[1]	688	ENDDO
	689	ENDDO
[1111]	690	!$acc end kernels
[1]	691	ENDDO
[1216]	692	!$acc end data
[683]	693	!$OMP END PARALLEL
[1]	694	#endif
	695
[1106]	696	ENDIF
	697
[1]	698	END SUBROUTINE transpose_yz
	699
	700
[1216]	701	SUBROUTINE resort_for_zx( f_in, f_inv )
[1]	702
	703	!------------------------------------------------------------------------------!
	704	! Description:
	705	! ------------
[1216]	706	! Resorting data for the transposition from z to x. The transposition itself
	707	! is carried out in transpose_zx
	708	!------------------------------------------------------------------------------!
	709
[1320]	710	USE indices, &
	711	ONLY: nxl, nxr, nyn, nys, nz
[1216]	712
[1320]	713	USE kinds
	714
[1216]	715	IMPLICIT NONE
	716
[1320]	717	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !:
	718	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
[1216]	719
[1320]	720	INTEGER(iwp) :: i !:
	721	INTEGER(iwp) :: j !:
	722	INTEGER(iwp) :: k !:
[1216]	723
	724	!
	725	!-- Rearrange indices of input array in order to make data to be send
	726	!-- by MPI contiguous
	727	!$OMP PARALLEL PRIVATE ( i, j, k )
	728	!$OMP DO
	729	!$acc kernels present( f_in, f_inv )
	730	DO k = 1,nz
	731	DO i = nxl, nxr
	732	DO j = nys, nyn
	733	f_inv(j,i,k) = f_in(k,j,i)
	734	ENDDO
	735	ENDDO
	736	ENDDO
	737	!$acc end kernels
	738	!$OMP END PARALLEL
	739
	740	END SUBROUTINE resort_for_zx
	741
	742
	743	SUBROUTINE transpose_zx( f_inv, f_out )
	744
	745	!------------------------------------------------------------------------------!
	746	! Description:
	747	! ------------
[1]	748	! Transposition of input array (f_in) from z to x. For the input array, all
	749	! elements along z reside on the same PE, while after transposition, all
	750	! elements along x reside on the same PE.
	751	!------------------------------------------------------------------------------!
	752
[1320]	753	USE cpulog, &
	754	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	755
[1320]	756	USE indices, &
	757	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
	758
	759	USE kinds
	760
[1324]	761	USE pegrid
[1320]	762
	763	USE transpose_indices, &
	764	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	765
[1]	766	IMPLICIT NONE
	767
[1320]	768	INTEGER(iwp) :: i !:
	769	INTEGER(iwp) :: j !:
	770	INTEGER(iwp) :: k !:
	771	INTEGER(iwp) :: l !:
	772	INTEGER(iwp) :: xs !:
[1]	773
[1320]	774	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
	775	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
[1111]	776
[1320]	777	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !:
[1]	778
[1320]	779
[1]	780	!
	781	!-- If the PE grid is one-dimensional along y, only local reordering
	782	!-- of the data is necessary and no transposition has to be done.
	783	IF ( pdims(1) == 1 ) THEN
[1106]	784
[683]	785	!$OMP PARALLEL PRIVATE ( i, j, k )
	786	!$OMP DO
[1216]	787	!$acc kernels present( f_inv, f_out )
[1003]	788	DO k = 1, nz
	789	DO i = nxl, nxr
	790	DO j = nys, nyn
[164]	791	f_out(i,j,k) = f_inv(j,i,k)
[1]	792	ENDDO
	793	ENDDO
	794	ENDDO
[1111]	795	!$acc end kernels
[683]	796	!$OMP END PARALLEL
[1]	797
[1106]	798	ELSE
	799
	800	#if defined( __parallel )
[1]	801	!
[1106]	802	!-- Transpose array
[1318]	803	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	804	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	805	!$acc update host( f_inv )
	806	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
	807	work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
[1106]	808	comm1dx, ierr )
	809	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	810
	811	!
[1106]	812	!-- Reorder transposed array
[1111]	813	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
[683]	814	!$OMP DO
[1216]	815	!$acc data copyin( work )
[1106]	816	DO l = 0, pdims(1) - 1
	817	xs = 0 + l * nnx
[1216]	818	!$acc kernels present( f_out )
[1106]	819	DO k = nzb_x, nzt_x
	820	DO i = xs, xs + nnx - 1
	821	DO j = nys_x, nyn_x
[1111]	822	f_out(i,j,k) = work(j,i-xs+1,k,l)
[1106]	823	ENDDO
[1]	824	ENDDO
	825	ENDDO
[1111]	826	!$acc end kernels
[1]	827	ENDDO
[1216]	828	!$acc end data
[683]	829	!$OMP END PARALLEL
[1]	830	#endif
	831
[1106]	832	ENDIF
	833
[1]	834	END SUBROUTINE transpose_zx
	835
	836
[1216]	837	SUBROUTINE resort_for_zy( f_inv, f_out )
[1]	838
	839	!------------------------------------------------------------------------------!
	840	! Description:
	841	! ------------
[1216]	842	! Resorting data after the transposition from z to y. The transposition itself
	843	! is carried out in transpose_zy
	844	!------------------------------------------------------------------------------!
	845
[1320]	846	USE indices, &
	847	ONLY: ny
[1216]	848
[1320]	849	USE kinds
	850
	851	USE transpose_indices, &
	852	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
	853
[1216]	854	IMPLICIT NONE
	855
[1320]	856	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
	857	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
[1216]	858
	859
[1320]	860	INTEGER(iwp) :: i !:
	861	INTEGER(iwp) :: j !:
	862	INTEGER(iwp) :: k !:
[1216]	863
	864	!
	865	!-- Rearrange indices of input array in order to make data to be send
	866	!-- by MPI contiguous
	867	!$OMP PARALLEL PRIVATE ( i, j, k )
	868	!$OMP DO
	869	!$acc kernels present( f_inv, f_out )
	870	DO k = nzb_y, nzt_y
	871	DO j = 0, ny
	872	DO i = nxl_y, nxr_y
	873	f_out(j,i,k) = f_inv(i,k,j)
	874	ENDDO
	875	ENDDO
	876	ENDDO
	877	!$acc end kernels
	878	!$OMP END PARALLEL
	879
	880	END SUBROUTINE resort_for_zy
	881
	882
	883	SUBROUTINE transpose_zy( f_in, f_inv )
	884
	885	!------------------------------------------------------------------------------!
	886	! Description:
	887	! ------------
[1]	888	! Transposition of input array (f_in) from z to y. For the input array, all
	889	! elements along z reside on the same PE, while after transposition, all
	890	! elements along y reside on the same PE.
	891	!------------------------------------------------------------------------------!
	892
[1320]	893	USE cpulog, &
	894	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	895
[1320]	896	USE indices, &
	897	ONLY: ny, nz
	898
	899	USE kinds
	900
[1324]	901	USE pegrid
[1320]	902
	903	USE transpose_indices, &
	904	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
	905
[1]	906	IMPLICIT NONE
	907
[1320]	908	INTEGER(iwp) :: i !:
	909	INTEGER(iwp) :: j !:
	910	INTEGER(iwp) :: k !:
	911	INTEGER(iwp) :: l !:
	912	INTEGER(iwp) :: zs !:
[1]	913
[1320]	914	REAL(wp) :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !:
	915	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
[1111]	916
[1320]	917	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !:
[1111]	918
[1]	919	!
	920	!-- If the PE grid is one-dimensional along y, the array has only to be
	921	!-- reordered locally and therefore no transposition has to be done.
	922	IF ( pdims(1) /= 1 ) THEN
[1106]	923
	924	#if defined( __parallel )
[1]	925	!
	926	!-- Reorder input array for transposition
[1111]	927	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
[683]	928	!$OMP DO
[1216]	929	!$acc data copyout( work )
[1]	930	DO l = 0, pdims(1) - 1
[1003]	931	zs = 1 + l * ( nzt_y - nzb_y + 1 )
[1111]	932	!$acc kernels present( f_in, work )
[1003]	933	DO j = nys_z, nyn_z
	934	DO k = zs, zs + nzt_y - nzb_y
	935	DO i = nxl_z, nxr_z
[1111]	936	work(i,k-zs+1,j,l) = f_in(i,j,k)
[1]	937	ENDDO
	938	ENDDO
	939	ENDDO
[1111]	940	!$acc end kernels
[1]	941	ENDDO
[1216]	942	!$acc end data
[683]	943	!$OMP END PARALLEL
[1]	944
	945	!
	946	!-- Transpose array
[1318]	947	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[622]	948	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	949	CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
	950	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
[1]	951	comm1dx, ierr )
[1111]	952	!$acc update device( f_inv )
[1]	953	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1106]	954	#endif
[1]	955
	956	ELSE
	957	!
[1106]	958	!-- Reorder the array in the same way like ALLTOALL did it
[683]	959	!$OMP PARALLEL PRIVATE ( i, j, k )
	960	!$OMP DO
[1216]	961	!$acc kernels present( f_in, f_inv )
[1003]	962	DO k = nzb_y, nzt_y
	963	DO j = 0, ny
	964	DO i = nxl_y, nxr_y
[164]	965	f_inv(i,k,j) = f_in(i,j,k)
	966	ENDDO
	967	ENDDO
	968	ENDDO
[1111]	969	!$acc end kernels
[683]	970	!$OMP END PARALLEL
[1106]	971
	972	ENDIF
	973
[1]	974	END SUBROUTINE transpose_zy
	975
	976
[1216]	977	SUBROUTINE transpose_zyd( f_in, f_out )
[1]	978
	979	!------------------------------------------------------------------------------!
	980	! Description:
	981	! ------------
	982	! Transposition of input array (f_in) from z to y. For the input array, all
	983	! elements along z reside on the same PE, while after transposition, all
	984	! elements along y reside on the same PE.
	985	! This is a direct transposition for arrays with indices in regular order
	986	! (k,j,i) (cf. transpose_zy).
	987	!------------------------------------------------------------------------------!
	988
[1320]	989	USE cpulog, &
	990	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	991
[1320]	992	USE indices, &
	993	ONLY: nnx, nny, nnz, nxl, nxr, nyn, nys, ny, nz
	994
	995	USE kinds
	996
[1324]	997	USE pegrid
[1320]	998
	999	USE transpose_indices, &
	1000	ONLY: nxl_y, nxl_yd, nxr_y, nxr_yd, nzb_y, nzb_yd, nzt_y, nzt_yd
	1001
[1]	1002	IMPLICIT NONE
	1003
[1320]	1004	INTEGER(iwp) :: i !:
	1005	INTEGER(iwp) :: j !:
	1006	INTEGER(iwp) :: k !:
	1007	INTEGER(iwp) :: l !:
	1008	INTEGER(iwp) :: m !:
	1009	INTEGER(iwp) :: ys !:
[1]	1010
[1320]	1011	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !:
	1012	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
	1013	REAL(wp) :: f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd) !:
	1014	REAL(wp) :: work(nnxnnynnz) !:
	1015
[1]	1016	#if defined( __parallel )
	1017
	1018	!
	1019	!-- Rearrange indices of input array in order to make data to be send
	1020	!-- by MPI contiguous
[1003]	1021	DO i = nxl, nxr
	1022	DO j = nys, nyn
	1023	DO k = 1, nz
[164]	1024	f_inv(j,i,k) = f_in(k,j,i)
[1]	1025	ENDDO
	1026	ENDDO
	1027	ENDDO
	1028
	1029	!
	1030	!-- Move data to different array, because memory location of work1 is
	1031	!-- needed further below (work1 = work2).
	1032	!-- If the PE grid is one-dimensional along x, only local reordering
	1033	!-- of the data is necessary and no transposition has to be done.
	1034	IF ( pdims(2) == 1 ) THEN
[1003]	1035	DO k = 1, nz
	1036	DO i = nxl, nxr
	1037	DO j = nys, nyn
[164]	1038	f_out(j,i,k) = f_inv(j,i,k)
[1]	1039	ENDDO
	1040	ENDDO
	1041	ENDDO
	1042	RETURN
	1043	ENDIF
	1044
	1045	!
	1046	!-- Transpose array
	1047	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	1048	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	1049	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
[164]	1050	work(1), sendrecvcount_zyd, MPI_REAL, &
[1]	1051	comm1dy, ierr )
	1052	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	1053
	1054	!
	1055	!-- Reorder transposed array
	1056	m = 0
	1057	DO l = 0, pdims(2) - 1
	1058	ys = 0 + l * nny
[1003]	1059	DO k = nzb_yd, nzt_yd
	1060	DO i = nxl_yd, nxr_yd
[1]	1061	DO j = ys, ys + nny - 1
	1062	m = m + 1
[164]	1063	f_out(j,i,k) = work(m)
[1]	1064	ENDDO
	1065	ENDDO
	1066	ENDDO
	1067	ENDDO
	1068
	1069	#endif
	1070
	1071	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |