Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 1621

Last change on this file since 1621 was 1325, checked in by suehring, 11 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 31.0 KB

Rev	Line
[1216]	1	SUBROUTINE resort_for_xy( f_in, f_inv )
[1]	2
[1036]	3	!--------------------------------------------------------------------------------!
	4	! This file is part of PALM.
	5	!
	6	! PALM is free software: you can redistribute it and/or modify it under the terms
	7	! of the GNU General Public License as published by the Free Software Foundation,
	8	! either version 3 of the License, or (at your option) any later version.
	9	!
	10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
	11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
	12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
	13	!
	14	! You should have received a copy of the GNU General Public License along with
	15	! PALM. If not, see <http://www.gnu.org/licenses/>.
	16	!
[1310]	17	! Copyright 1997-2014 Leibniz Universitaet Hannover
[1036]	18	!--------------------------------------------------------------------------------!
	19	!
[484]	20	! Current revisions:
[1]	21	! -----------------
[1321]	22	!
	23	! Former revisions:
	24	! -----------------
	25	! $Id: transpose.f90 1325 2014-03-21 09:21:15Z heinze $
	26	!
[1325]	27	! 1324 2014-03-21 09:13:16Z suehring
	28	! Bugfix: ONLY statement for module pegrid removed
	29	!
[1321]	30	! 1320 2014-03-20 08:40:49Z raasch
[1320]	31	! ONLY-attribute added to USE-statements,
	32	! kind-parameters added to all INTEGER and REAL declaration statements,
	33	! kinds are defined in new module kinds,
	34	! old module precision_kind is removed,
	35	! revision history before 2012 removed,
	36	! comment fields (!:) to be used for variable explanations added to
	37	! all variable declaration statements
[198]	38	!
[1319]	39	! 1318 2014-03-17 13:35:16Z raasch
	40	! cpu_log_nowait parameter added to cpu measurements of the transpositions
	41	! required for solving the Poisson equation (poisfft),
	42	! module interfaces removed
	43	!
[1258]	44	! 1257 2013-11-08 15:18:40Z raasch
	45	! openacc loop and loop vector clauses removed
	46	!
[1217]	47	! 1216 2013-08-26 09:31:42Z raasch
	48	! re-sorting of the transposed / to be transposed arrays moved to separate
	49	! routines resort_for_...
	50	!
[1112]	51	! 1111 2013-03-08 23:54:10Z raasch
	52	! openACC directives added,
	53	! resorting data from/to work changed, work got 4 dimensions instead of 1
	54	!
[1107]	55	! 1106 2013-03-04 05:31:38Z raasch
	56	! preprocessor lines rearranged so that routines can also be used in serial
	57	! (non-parallel) mode
	58	!
[1093]	59	! 1092 2013-02-02 11:24:22Z raasch
	60	! unused variables removed
	61	!
[1037]	62	! 1036 2012-10-22 13:43:42Z raasch
	63	! code put under GPL (PALM 3.9)
	64	!
[1004]	65	! 1003 2012-09-14 14:35:53Z raasch
	66	! indices nxa, nya, etc. replaced by nx, ny, etc.
	67	!
[1]	68	! Revision 1.1 1997/07/24 11:25:18 raasch
	69	! Initial revision
	70	!
[1216]	71	!------------------------------------------------------------------------------!
	72	! Description:
	73	! ------------
	74	! Resorting data for the transposition from x to y. The transposition itself
	75	! is carried out in transpose_xy
	76	!------------------------------------------------------------------------------!
	77
[1320]	78	USE indices, &
	79	ONLY: nx
[1216]	80
[1320]	81	USE kinds
	82
	83	USE transpose_indices, &
	84	ONLY: nxl_z, nxr_z, nyn_x, nyn_z, nys_x, nys_z, nzb_x, nzt_x
	85
[1216]	86	IMPLICIT NONE
	87
[1320]	88	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
	89	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
[1216]	90
	91
[1320]	92	INTEGER(iwp) :: i !:
	93	INTEGER(iwp) :: j !:
	94	INTEGER(iwp) :: k !:
[1]	95	!
[1216]	96	!-- Rearrange indices of input array in order to make data to be send
	97	!-- by MPI contiguous
	98	!$OMP PARALLEL PRIVATE ( i, j, k )
	99	!$OMP DO
	100	!$acc kernels present( f_in, f_inv )
	101	DO i = 0, nx
	102	DO k = nzb_x, nzt_x
	103	DO j = nys_x, nyn_x
	104	f_inv(j,k,i) = f_in(i,j,k)
	105	ENDDO
	106	ENDDO
	107	ENDDO
	108	!$acc end kernels
	109	!$OMP END PARALLEL
	110
	111	END SUBROUTINE resort_for_xy
	112
	113
	114	SUBROUTINE transpose_xy( f_inv, f_out )
	115
	116	!------------------------------------------------------------------------------!
[1]	117	! Description:
	118	! ------------
	119	! Transposition of input array (f_in) from x to y. For the input array, all
	120	! elements along x reside on the same PE, while after transposition, all
	121	! elements along y reside on the same PE.
	122	!------------------------------------------------------------------------------!
	123
[1320]	124	USE cpulog, &
	125	ONLY: cpu_log, cpu_log_nowait, log_point_s
	126
	127	USE indices, &
	128	ONLY: nx, ny
	129
	130	USE kinds
	131
[1]	132	USE pegrid
	133
[1320]	134	USE transpose_indices, &
	135	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
	136
[1]	137	IMPLICIT NONE
	138
[1320]	139	INTEGER(iwp) :: i !:
	140	INTEGER(iwp) :: j !:
	141	INTEGER(iwp) :: k !:
	142	INTEGER(iwp) :: l !:
	143	INTEGER(iwp) :: ys !:
	144
	145	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
	146	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
[1]	147
[1320]	148	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !:
[1111]	149
	150
[1106]	151	IF ( numprocs /= 1 ) THEN
	152
	153	#if defined( __parallel )
[1]	154	!
[1106]	155	!-- Transpose array
[1318]	156	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	157	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	158	!$acc update host( f_inv )
	159	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
	160	work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
[1106]	161	comm1dy, ierr )
	162	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	163
	164	!
[1106]	165	!-- Reorder transposed array
[1111]	166	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
[683]	167	!$OMP DO
[1216]	168	!$acc data copyin( work )
[1106]	169	DO l = 0, pdims(2) - 1
	170	ys = 0 + l * ( nyn_x - nys_x + 1 )
[1111]	171	!$acc kernels present( f_out, work )
[1106]	172	DO i = nxl_y, nxr_y
	173	DO k = nzb_y, nzt_y
	174	DO j = ys, ys + nyn_x - nys_x
[1111]	175	f_out(j,i,k) = work(j-ys+1,k,i,l)
[1106]	176	ENDDO
[1]	177	ENDDO
	178	ENDDO
[1111]	179	!$acc end kernels
[1]	180	ENDDO
[1216]	181	!$acc end data
[683]	182	!$OMP END PARALLEL
[1]	183	#endif
	184
[1106]	185	ELSE
	186
	187	!
	188	!-- Reorder transposed array
	189	!$OMP PARALLEL PRIVATE ( i, j, k )
	190	!$OMP DO
[1216]	191	!$acc kernels present( f_inv, f_out )
[1106]	192	DO k = nzb_y, nzt_y
	193	DO i = nxl_y, nxr_y
	194	DO j = 0, ny
	195	f_out(j,i,k) = f_inv(j,k,i)
	196	ENDDO
	197	ENDDO
	198	ENDDO
[1111]	199	!$acc end kernels
[1106]	200	!$OMP END PARALLEL
	201
	202	ENDIF
	203
[1]	204	END SUBROUTINE transpose_xy
	205
	206
[1216]	207	SUBROUTINE resort_for_xz( f_inv, f_out )
[1]	208
	209	!------------------------------------------------------------------------------!
	210	! Description:
	211	! ------------
[1216]	212	! Resorting data after the transposition from x to z. The transposition itself
	213	! is carried out in transpose_xz
	214	!------------------------------------------------------------------------------!
	215
[1320]	216	USE indices, &
	217	ONLY: nxl, nxr, nyn, nys, nz
[1216]	218
[1320]	219	USE kinds
	220
[1216]	221	IMPLICIT NONE
	222
[1320]	223	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
	224	REAL(wp) :: f_out(1:nz,nys:nyn,nxl:nxr) !:
[1216]	225
[1320]	226	INTEGER(iwp) :: i !:
	227	INTEGER(iwp) :: j !:
	228	INTEGER(iwp) :: k !:
[1216]	229	!
	230	!-- Rearrange indices of input array in order to make data to be send
	231	!-- by MPI contiguous.
	232	!-- In case of parallel fft/transposition, scattered store is faster in
	233	!-- backward direction!!!
	234	!$OMP PARALLEL PRIVATE ( i, j, k )
	235	!$OMP DO
	236	!$acc kernels present( f_inv, f_out )
	237	DO k = 1, nz
	238	DO i = nxl, nxr
	239	DO j = nys, nyn
	240	f_out(k,j,i) = f_inv(j,i,k)
	241	ENDDO
	242	ENDDO
	243	ENDDO
	244	!$acc end kernels
	245	!$OMP END PARALLEL
	246
	247	END SUBROUTINE resort_for_xz
	248
	249
	250	SUBROUTINE transpose_xz( f_in, f_inv )
	251
	252	!------------------------------------------------------------------------------!
	253	! Description:
	254	! ------------
[1]	255	! Transposition of input array (f_in) from x to z. For the input array, all
	256	! elements along x reside on the same PE, while after transposition, all
	257	! elements along z reside on the same PE.
	258	!------------------------------------------------------------------------------!
	259
[1320]	260	USE cpulog, &
	261	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	262
[1320]	263	USE indices, &
	264	ONLY: nnx, nx, nxl, nxr, ny, nyn, nys, nz
	265
	266	USE kinds
	267
[1324]	268	USE pegrid
[1320]	269
	270	USE transpose_indices, &
	271	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	272
[1]	273	IMPLICIT NONE
	274
[1320]	275	INTEGER(iwp) :: i !:
	276	INTEGER(iwp) :: j !:
	277	INTEGER(iwp) :: k !:
	278	INTEGER(iwp) :: l !:
	279	INTEGER(iwp) :: xs !:
[1]	280
[1320]	281	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
	282	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
[1]	283
[1320]	284	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !:
[1111]	285
[1320]	286
[1]	287	!
	288	!-- If the PE grid is one-dimensional along y, the array has only to be
	289	!-- reordered locally and therefore no transposition has to be done.
	290	IF ( pdims(1) /= 1 ) THEN
[1106]	291
	292	#if defined( __parallel )
[1]	293	!
	294	!-- Reorder input array for transposition
[1111]	295	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
[683]	296	!$OMP DO
[1216]	297	!$acc data copyout( work )
[1]	298	DO l = 0, pdims(1) - 1
	299	xs = 0 + l * nnx
[1111]	300	!$acc kernels present( f_in, work )
[1003]	301	DO k = nzb_x, nzt_x
[164]	302	DO i = xs, xs + nnx - 1
[1003]	303	DO j = nys_x, nyn_x
[1111]	304	work(j,i-xs+1,k,l) = f_in(i,j,k)
[1]	305	ENDDO
	306	ENDDO
	307	ENDDO
[1111]	308	!$acc end kernels
[1]	309	ENDDO
[1216]	310	!$acc end data
[683]	311	!$OMP END PARALLEL
[1]	312
	313	!
	314	!-- Transpose array
[1318]	315	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[622]	316	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	317	CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
	318	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
[1]	319	comm1dx, ierr )
[1111]	320	!$acc update device( f_inv )
[1]	321	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1106]	322	#endif
	323
[1]	324	ELSE
[1106]	325
[1]	326	!
	327	!-- Reorder the array in a way that the z index is in first position
[683]	328	!$OMP PARALLEL PRIVATE ( i, j, k )
	329	!$OMP DO
[1216]	330	!$acc kernels present( f_in, f_inv )
[1003]	331	DO i = nxl, nxr
	332	DO j = nys, nyn
	333	DO k = 1, nz
[164]	334	f_inv(j,i,k) = f_in(i,j,k)
[1]	335	ENDDO
	336	ENDDO
	337	ENDDO
[1111]	338	!$acc end kernels
[683]	339	!$OMP END PARALLEL
[1]	340
[164]	341	ENDIF
	342
[1]	343	END SUBROUTINE transpose_xz
	344
	345
[1216]	346	SUBROUTINE resort_for_yx( f_inv, f_out )
[1]	347
	348	!------------------------------------------------------------------------------!
	349	! Description:
	350	! ------------
[1216]	351	! Resorting data after the transposition from y to x. The transposition itself
	352	! is carried out in transpose_yx
	353	!------------------------------------------------------------------------------!
	354
[1320]	355	USE indices, &
	356	ONLY: nx
[1216]	357
[1320]	358	USE kinds
	359
	360	USE transpose_indices, &
	361	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	362
[1216]	363	IMPLICIT NONE
	364
[1320]	365	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
	366	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
[1216]	367
	368
[1320]	369	INTEGER(iwp) :: i !:
	370	INTEGER(iwp) :: j !:
	371	INTEGER(iwp) :: k !:
[1216]	372	!
	373	!-- Rearrange indices of input array in order to make data to be send
	374	!-- by MPI contiguous
	375	!$OMP PARALLEL PRIVATE ( i, j, k )
	376	!$OMP DO
	377	!$acc kernels present( f_inv, f_out )
	378	DO i = 0, nx
	379	DO k = nzb_x, nzt_x
	380	DO j = nys_x, nyn_x
	381	f_out(i,j,k) = f_inv(j,k,i)
	382	ENDDO
	383	ENDDO
	384	ENDDO
	385	!$acc end kernels
	386	!$OMP END PARALLEL
	387
	388	END SUBROUTINE resort_for_yx
	389
	390
	391	SUBROUTINE transpose_yx( f_in, f_inv )
	392
	393	!------------------------------------------------------------------------------!
	394	! Description:
	395	! ------------
[1]	396	! Transposition of input array (f_in) from y to x. For the input array, all
	397	! elements along y reside on the same PE, while after transposition, all
	398	! elements along x reside on the same PE.
	399	!------------------------------------------------------------------------------!
	400
[1320]	401	USE cpulog, &
	402	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	403
[1320]	404	USE indices, &
	405	ONLY: nx, ny
	406
	407	USE kinds
	408
[1324]	409	USE pegrid
[1320]	410
	411	USE transpose_indices, &
	412	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
	413
[1]	414	IMPLICIT NONE
	415
[1320]	416	INTEGER(iwp) :: i !:
	417	INTEGER(iwp) :: j !:
	418	INTEGER(iwp) :: k !:
	419	INTEGER(iwp) :: l !:
	420	INTEGER(iwp) :: ys !:
[1]	421
[1320]	422	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
	423	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
[1111]	424
[1320]	425	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !:
[1111]	426
[1320]	427
[1106]	428	IF ( numprocs /= 1 ) THEN
	429
[1]	430	#if defined( __parallel )
	431	!
[1106]	432	!-- Reorder input array for transposition
[1111]	433	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
[683]	434	!$OMP DO
[1216]	435	!$acc data copyout( work )
[1106]	436	DO l = 0, pdims(2) - 1
	437	ys = 0 + l * ( nyn_x - nys_x + 1 )
[1111]	438	!$acc kernels present( f_in, work )
[1106]	439	DO i = nxl_y, nxr_y
	440	DO k = nzb_y, nzt_y
	441	DO j = ys, ys + nyn_x - nys_x
[1111]	442	work(j-ys+1,k,i,l) = f_in(j,i,k)
[1106]	443	ENDDO
	444	ENDDO
	445	ENDDO
[1111]	446	!$acc end kernels
[1106]	447	ENDDO
[1216]	448	!$acc end data
[1106]	449	!$OMP END PARALLEL
	450
	451	!
	452	!-- Transpose array
[1318]	453	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	454	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	455	CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
	456	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
[1106]	457	comm1dy, ierr )
[1111]	458	!$acc update device( f_inv )
[1106]	459	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	460	#endif
	461
	462	ELSE
	463
	464	!
	465	!-- Reorder array f_in the same way as ALLTOALL did it
	466	!$OMP PARALLEL PRIVATE ( i, j, k )
	467	!$OMP DO
[1216]	468	!$acc kernels present( f_in, f_inv )
[1003]	469	DO i = nxl_y, nxr_y
	470	DO k = nzb_y, nzt_y
[1106]	471	DO j = 0, ny
	472	f_inv(j,k,i) = f_in(j,i,k)
[1]	473	ENDDO
	474	ENDDO
	475	ENDDO
[1111]	476	!$acc end kernels
[683]	477	!$OMP END PARALLEL
[1]	478
[1106]	479	ENDIF
[1]	480
	481	END SUBROUTINE transpose_yx
	482
	483
[1216]	484	SUBROUTINE transpose_yxd( f_in, f_out )
[1]	485
	486	!------------------------------------------------------------------------------!
	487	! Description:
	488	! ------------
	489	! Transposition of input array (f_in) from y to x. For the input array, all
	490	! elements along y reside on the same PE, while after transposition, all
	491	! elements along x reside on the same PE.
	492	! This is a direct transposition for arrays with indices in regular order
	493	! (k,j,i) (cf. transpose_yx).
	494	!------------------------------------------------------------------------------!
	495
[1320]	496	USE cpulog, &
	497	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	498
[1320]	499	USE indices, &
	500	ONLY: nnx, nny, nnz, nx, nxl, nxr, nyn, nys, nz
	501
	502	USE kinds
	503
[1324]	504	USE pegrid
[1320]	505
	506	USE transpose_indices, &
	507	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	508
[1]	509	IMPLICIT NONE
	510
[1320]	511	INTEGER(iwp) :: i !:
	512	INTEGER(iwp) :: j !:
	513	INTEGER(iwp) :: k !:
	514	INTEGER(iwp) :: l !:
	515	INTEGER(iwp) :: m !:
	516	INTEGER(iwp) :: xs !:
[1]	517
[1320]	518	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !:
	519	REAL(wp) :: f_inv(nxl:nxr,1:nz,nys:nyn) !:
	520	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
	521	REAL(wp) :: work(nnxnnynnz) !:
[1]	522	#if defined( __parallel )
	523
	524	!
	525	!-- Rearrange indices of input array in order to make data to be send
	526	!-- by MPI contiguous
[1003]	527	DO k = 1, nz
	528	DO j = nys, nyn
	529	DO i = nxl, nxr
[164]	530	f_inv(i,k,j) = f_in(k,j,i)
[1]	531	ENDDO
	532	ENDDO
	533	ENDDO
	534
	535	!
	536	!-- Transpose array
	537	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	538	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	539	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
[164]	540	work(1), sendrecvcount_xy, MPI_REAL, &
[1]	541	comm1dx, ierr )
	542	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	543
	544	!
	545	!-- Reorder transposed array
	546	m = 0
	547	DO l = 0, pdims(1) - 1
	548	xs = 0 + l * nnx
[1003]	549	DO j = nys_x, nyn_x
	550	DO k = 1, nz
[1]	551	DO i = xs, xs + nnx - 1
	552	m = m + 1
[164]	553	f_out(i,j,k) = work(m)
[1]	554	ENDDO
	555	ENDDO
	556	ENDDO
	557	ENDDO
	558
	559	#endif
	560
	561	END SUBROUTINE transpose_yxd
	562
	563
[1216]	564	SUBROUTINE resort_for_yz( f_in, f_inv )
[1]	565
	566	!------------------------------------------------------------------------------!
	567	! Description:
	568	! ------------
[1216]	569	! Resorting data for the transposition from y to z. The transposition itself
	570	! is carried out in transpose_yz
	571	!------------------------------------------------------------------------------!
	572
[1320]	573	USE indices, &
	574	ONLY: ny
[1216]	575
[1320]	576	USE kinds
	577
	578	USE transpose_indices, &
	579	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
	580
[1216]	581	IMPLICIT NONE
	582
[1320]	583	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
	584	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
[1216]	585
[1320]	586	INTEGER(iwp) :: i !:
	587	INTEGER(iwp) :: j !:
	588	INTEGER(iwp) :: k !:
[1216]	589
	590	!
	591	!-- Rearrange indices of input array in order to make data to be send
	592	!-- by MPI contiguous
	593	!$OMP PARALLEL PRIVATE ( i, j, k )
	594	!$OMP DO
	595	!$acc kernels present( f_in, f_inv )
	596	DO j = 0, ny
	597	DO k = nzb_y, nzt_y
	598	DO i = nxl_y, nxr_y
	599	f_inv(i,k,j) = f_in(j,i,k)
	600	ENDDO
	601	ENDDO
	602	ENDDO
	603	!$acc end kernels
	604	!$OMP END PARALLEL
	605
	606	END SUBROUTINE resort_for_yz
	607
	608
	609	SUBROUTINE transpose_yz( f_inv, f_out )
	610
	611	!------------------------------------------------------------------------------!
	612	! Description:
	613	! ------------
[1]	614	! Transposition of input array (f_in) from y to z. For the input array, all
	615	! elements along y reside on the same PE, while after transposition, all
	616	! elements along z reside on the same PE.
	617	!------------------------------------------------------------------------------!
	618
[1320]	619	USE cpulog, &
	620	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	621
[1320]	622	USE indices, &
	623	ONLY: ny, nz
	624
	625	USE kinds
	626
[1324]	627	USE pegrid
[1320]	628
	629	USE transpose_indices, &
	630	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
	631
[1]	632	IMPLICIT NONE
	633
[1320]	634	INTEGER(iwp) :: i !:
	635	INTEGER(iwp) :: j !:
	636	INTEGER(iwp) :: k !:
	637	INTEGER(iwp) :: l !:
	638	INTEGER(iwp) :: zs !:
[1]	639
[1320]	640	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
	641	REAL(wp) :: f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !:
[1111]	642
[1320]	643	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !:
[1111]	644
[1320]	645
[1]	646	!
	647	!-- If the PE grid is one-dimensional along y, only local reordering
	648	!-- of the data is necessary and no transposition has to be done.
	649	IF ( pdims(1) == 1 ) THEN
[1106]	650
[683]	651	!$OMP PARALLEL PRIVATE ( i, j, k )
	652	!$OMP DO
[1216]	653	!$acc kernels present( f_inv, f_out )
[1003]	654	DO j = 0, ny
	655	DO k = nzb_y, nzt_y
	656	DO i = nxl_y, nxr_y
[164]	657	f_out(i,j,k) = f_inv(i,k,j)
[1]	658	ENDDO
	659	ENDDO
	660	ENDDO
[1111]	661	!$acc end kernels
[683]	662	!$OMP END PARALLEL
[1]	663
[1106]	664	ELSE
	665
	666	#if defined( __parallel )
[1]	667	!
[1106]	668	!-- Transpose array
[1318]	669	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	670	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	671	!$acc update host( f_inv )
	672	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
	673	work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
[1106]	674	comm1dx, ierr )
	675	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	676
	677	!
[1106]	678	!-- Reorder transposed array
[1111]	679	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
[683]	680	!$OMP DO
[1216]	681	!$acc data copyin( work )
[1106]	682	DO l = 0, pdims(1) - 1
	683	zs = 1 + l * ( nzt_y - nzb_y + 1 )
[1216]	684	!$acc kernels present( f_out )
[1106]	685	DO j = nys_z, nyn_z
	686	DO k = zs, zs + nzt_y - nzb_y
	687	DO i = nxl_z, nxr_z
[1111]	688	f_out(i,j,k) = work(i,k-zs+1,j,l)
[1106]	689	ENDDO
[1]	690	ENDDO
	691	ENDDO
[1111]	692	!$acc end kernels
[1]	693	ENDDO
[1216]	694	!$acc end data
[683]	695	!$OMP END PARALLEL
[1]	696	#endif
	697
[1106]	698	ENDIF
	699
[1]	700	END SUBROUTINE transpose_yz
	701
	702
[1216]	703	SUBROUTINE resort_for_zx( f_in, f_inv )
[1]	704
	705	!------------------------------------------------------------------------------!
	706	! Description:
	707	! ------------
[1216]	708	! Resorting data for the transposition from z to x. The transposition itself
	709	! is carried out in transpose_zx
	710	!------------------------------------------------------------------------------!
	711
[1320]	712	USE indices, &
	713	ONLY: nxl, nxr, nyn, nys, nz
[1216]	714
[1320]	715	USE kinds
	716
[1216]	717	IMPLICIT NONE
	718
[1320]	719	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !:
	720	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
[1216]	721
[1320]	722	INTEGER(iwp) :: i !:
	723	INTEGER(iwp) :: j !:
	724	INTEGER(iwp) :: k !:
[1216]	725
	726	!
	727	!-- Rearrange indices of input array in order to make data to be send
	728	!-- by MPI contiguous
	729	!$OMP PARALLEL PRIVATE ( i, j, k )
	730	!$OMP DO
	731	!$acc kernels present( f_in, f_inv )
	732	DO k = 1,nz
	733	DO i = nxl, nxr
	734	DO j = nys, nyn
	735	f_inv(j,i,k) = f_in(k,j,i)
	736	ENDDO
	737	ENDDO
	738	ENDDO
	739	!$acc end kernels
	740	!$OMP END PARALLEL
	741
	742	END SUBROUTINE resort_for_zx
	743
	744
	745	SUBROUTINE transpose_zx( f_inv, f_out )
	746
	747	!------------------------------------------------------------------------------!
	748	! Description:
	749	! ------------
[1]	750	! Transposition of input array (f_in) from z to x. For the input array, all
	751	! elements along z reside on the same PE, while after transposition, all
	752	! elements along x reside on the same PE.
	753	!------------------------------------------------------------------------------!
	754
[1320]	755	USE cpulog, &
	756	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	757
[1320]	758	USE indices, &
	759	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
	760
	761	USE kinds
	762
[1324]	763	USE pegrid
[1320]	764
	765	USE transpose_indices, &
	766	ONLY: nyn_x, nys_x, nzb_x, nzt_x
	767
[1]	768	IMPLICIT NONE
	769
[1320]	770	INTEGER(iwp) :: i !:
	771	INTEGER(iwp) :: j !:
	772	INTEGER(iwp) :: k !:
	773	INTEGER(iwp) :: l !:
	774	INTEGER(iwp) :: xs !:
[1]	775
[1320]	776	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
	777	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
[1111]	778
[1320]	779	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !:
[1]	780
[1320]	781
[1]	782	!
	783	!-- If the PE grid is one-dimensional along y, only local reordering
	784	!-- of the data is necessary and no transposition has to be done.
	785	IF ( pdims(1) == 1 ) THEN
[1106]	786
[683]	787	!$OMP PARALLEL PRIVATE ( i, j, k )
	788	!$OMP DO
[1216]	789	!$acc kernels present( f_inv, f_out )
[1003]	790	DO k = 1, nz
	791	DO i = nxl, nxr
	792	DO j = nys, nyn
[164]	793	f_out(i,j,k) = f_inv(j,i,k)
[1]	794	ENDDO
	795	ENDDO
	796	ENDDO
[1111]	797	!$acc end kernels
[683]	798	!$OMP END PARALLEL
[1]	799
[1106]	800	ELSE
	801
	802	#if defined( __parallel )
[1]	803	!
[1106]	804	!-- Transpose array
[1318]	805	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[1106]	806	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	807	!$acc update host( f_inv )
	808	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
	809	work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
[1106]	810	comm1dx, ierr )
	811	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1]	812
	813	!
[1106]	814	!-- Reorder transposed array
[1111]	815	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
[683]	816	!$OMP DO
[1216]	817	!$acc data copyin( work )
[1106]	818	DO l = 0, pdims(1) - 1
	819	xs = 0 + l * nnx
[1216]	820	!$acc kernels present( f_out )
[1106]	821	DO k = nzb_x, nzt_x
	822	DO i = xs, xs + nnx - 1
	823	DO j = nys_x, nyn_x
[1111]	824	f_out(i,j,k) = work(j,i-xs+1,k,l)
[1106]	825	ENDDO
[1]	826	ENDDO
	827	ENDDO
[1111]	828	!$acc end kernels
[1]	829	ENDDO
[1216]	830	!$acc end data
[683]	831	!$OMP END PARALLEL
[1]	832	#endif
	833
[1106]	834	ENDIF
	835
[1]	836	END SUBROUTINE transpose_zx
	837
	838
[1216]	839	SUBROUTINE resort_for_zy( f_inv, f_out )
[1]	840
	841	!------------------------------------------------------------------------------!
	842	! Description:
	843	! ------------
[1216]	844	! Resorting data after the transposition from z to y. The transposition itself
	845	! is carried out in transpose_zy
	846	!------------------------------------------------------------------------------!
	847
[1320]	848	USE indices, &
	849	ONLY: ny
[1216]	850
[1320]	851	USE kinds
	852
	853	USE transpose_indices, &
	854	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
	855
[1216]	856	IMPLICIT NONE
	857
[1320]	858	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
	859	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
[1216]	860
	861
[1320]	862	INTEGER(iwp) :: i !:
	863	INTEGER(iwp) :: j !:
	864	INTEGER(iwp) :: k !:
[1216]	865
	866	!
	867	!-- Rearrange indices of input array in order to make data to be send
	868	!-- by MPI contiguous
	869	!$OMP PARALLEL PRIVATE ( i, j, k )
	870	!$OMP DO
	871	!$acc kernels present( f_inv, f_out )
	872	DO k = nzb_y, nzt_y
	873	DO j = 0, ny
	874	DO i = nxl_y, nxr_y
	875	f_out(j,i,k) = f_inv(i,k,j)
	876	ENDDO
	877	ENDDO
	878	ENDDO
	879	!$acc end kernels
	880	!$OMP END PARALLEL
	881
	882	END SUBROUTINE resort_for_zy
	883
	884
	885	SUBROUTINE transpose_zy( f_in, f_inv )
	886
	887	!------------------------------------------------------------------------------!
	888	! Description:
	889	! ------------
[1]	890	! Transposition of input array (f_in) from z to y. For the input array, all
	891	! elements along z reside on the same PE, while after transposition, all
	892	! elements along y reside on the same PE.
	893	!------------------------------------------------------------------------------!
	894
[1320]	895	USE cpulog, &
	896	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	897
[1320]	898	USE indices, &
	899	ONLY: ny, nz
	900
	901	USE kinds
	902
[1324]	903	USE pegrid
[1320]	904
	905	USE transpose_indices, &
	906	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
	907
[1]	908	IMPLICIT NONE
	909
[1320]	910	INTEGER(iwp) :: i !:
	911	INTEGER(iwp) :: j !:
	912	INTEGER(iwp) :: k !:
	913	INTEGER(iwp) :: l !:
	914	INTEGER(iwp) :: zs !:
[1]	915
[1320]	916	REAL(wp) :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !:
	917	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
[1111]	918
[1320]	919	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !:
[1111]	920
[1]	921	!
	922	!-- If the PE grid is one-dimensional along y, the array has only to be
	923	!-- reordered locally and therefore no transposition has to be done.
	924	IF ( pdims(1) /= 1 ) THEN
[1106]	925
	926	#if defined( __parallel )
[1]	927	!
	928	!-- Reorder input array for transposition
[1111]	929	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
[683]	930	!$OMP DO
[1216]	931	!$acc data copyout( work )
[1]	932	DO l = 0, pdims(1) - 1
[1003]	933	zs = 1 + l * ( nzt_y - nzb_y + 1 )
[1111]	934	!$acc kernels present( f_in, work )
[1003]	935	DO j = nys_z, nyn_z
	936	DO k = zs, zs + nzt_y - nzb_y
	937	DO i = nxl_z, nxr_z
[1111]	938	work(i,k-zs+1,j,l) = f_in(i,j,k)
[1]	939	ENDDO
	940	ENDDO
	941	ENDDO
[1111]	942	!$acc end kernels
[1]	943	ENDDO
[1216]	944	!$acc end data
[683]	945	!$OMP END PARALLEL
[1]	946
	947	!
	948	!-- Transpose array
[1318]	949	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
[622]	950	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1111]	951	CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
	952	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
[1]	953	comm1dx, ierr )
[1111]	954	!$acc update device( f_inv )
[1]	955	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
[1106]	956	#endif
[1]	957
	958	ELSE
	959	!
[1106]	960	!-- Reorder the array in the same way like ALLTOALL did it
[683]	961	!$OMP PARALLEL PRIVATE ( i, j, k )
	962	!$OMP DO
[1216]	963	!$acc kernels present( f_in, f_inv )
[1003]	964	DO k = nzb_y, nzt_y
	965	DO j = 0, ny
	966	DO i = nxl_y, nxr_y
[164]	967	f_inv(i,k,j) = f_in(i,j,k)
	968	ENDDO
	969	ENDDO
	970	ENDDO
[1111]	971	!$acc end kernels
[683]	972	!$OMP END PARALLEL
[1106]	973
	974	ENDIF
	975
[1]	976	END SUBROUTINE transpose_zy
	977
	978
[1216]	979	SUBROUTINE transpose_zyd( f_in, f_out )
[1]	980
	981	!------------------------------------------------------------------------------!
	982	! Description:
	983	! ------------
	984	! Transposition of input array (f_in) from z to y. For the input array, all
	985	! elements along z reside on the same PE, while after transposition, all
	986	! elements along y reside on the same PE.
	987	! This is a direct transposition for arrays with indices in regular order
	988	! (k,j,i) (cf. transpose_zy).
	989	!------------------------------------------------------------------------------!
	990
[1320]	991	USE cpulog, &
	992	ONLY: cpu_log, cpu_log_nowait, log_point_s
[1]	993
[1320]	994	USE indices, &
	995	ONLY: nnx, nny, nnz, nxl, nxr, nyn, nys, ny, nz
	996
	997	USE kinds
	998
[1324]	999	USE pegrid
[1320]	1000
	1001	USE transpose_indices, &
	1002	ONLY: nxl_y, nxl_yd, nxr_y, nxr_yd, nzb_y, nzb_yd, nzt_y, nzt_yd
	1003
[1]	1004	IMPLICIT NONE
	1005
[1320]	1006	INTEGER(iwp) :: i !:
	1007	INTEGER(iwp) :: j !:
	1008	INTEGER(iwp) :: k !:
	1009	INTEGER(iwp) :: l !:
	1010	INTEGER(iwp) :: m !:
	1011	INTEGER(iwp) :: ys !:
[1]	1012
[1320]	1013	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !:
	1014	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
	1015	REAL(wp) :: f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd) !:
	1016	REAL(wp) :: work(nnxnnynnz) !:
	1017
[1]	1018	#if defined( __parallel )
	1019
	1020	!
	1021	!-- Rearrange indices of input array in order to make data to be send
	1022	!-- by MPI contiguous
[1003]	1023	DO i = nxl, nxr
	1024	DO j = nys, nyn
	1025	DO k = 1, nz
[164]	1026	f_inv(j,i,k) = f_in(k,j,i)
[1]	1027	ENDDO
	1028	ENDDO
	1029	ENDDO
	1030
	1031	!
	1032	!-- Move data to different array, because memory location of work1 is
	1033	!-- needed further below (work1 = work2).
	1034	!-- If the PE grid is one-dimensional along x, only local reordering
	1035	!-- of the data is necessary and no transposition has to be done.
	1036	IF ( pdims(2) == 1 ) THEN
[1003]	1037	DO k = 1, nz
	1038	DO i = nxl, nxr
	1039	DO j = nys, nyn
[164]	1040	f_out(j,i,k) = f_inv(j,i,k)
[1]	1041	ENDDO
	1042	ENDDO
	1043	ENDDO
	1044	RETURN
	1045	ENDIF
	1046
	1047	!
	1048	!-- Transpose array
	1049	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
[622]	1050	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	1051	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
[164]	1052	work(1), sendrecvcount_zyd, MPI_REAL, &
[1]	1053	comm1dy, ierr )
	1054	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
	1055
	1056	!
	1057	!-- Reorder transposed array
	1058	m = 0
	1059	DO l = 0, pdims(2) - 1
	1060	ys = 0 + l * nny
[1003]	1061	DO k = nzb_yd, nzt_yd
	1062	DO i = nxl_yd, nxr_yd
[1]	1063	DO j = ys, ys + nny - 1
	1064	m = m + 1
[164]	1065	f_out(j,i,k) = work(m)
[1]	1066	ENDDO
	1067	ENDDO
	1068	ENDDO
	1069	ENDDO
	1070
	1071	#endif
	1072
	1073	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |