Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 2000

Last change on this file since 2000 was 2000, checked in by knoop, 8 years ago
Forced header and separation lines into 80 columns
Property svn:keywords set to `Id`
File size: 31.2 KB

Line
1	!> @file transpose.f90
2	!------------------------------------------------------------------------------!
3	! This file is part of PALM.
4	!
5	! PALM is free software: you can redistribute it and/or modify it under the
6	! terms of the GNU General Public License as published by the Free Software
7	! Foundation, either version 3 of the License, or (at your option) any later
8	! version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2016 Leibniz Universitaet Hannover
18	!------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	! Forced header and separation lines into 80 columns
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: transpose.f90 2000 2016-08-20 18:09:15Z knoop $
27	!
28	! 1682 2015-10-07 23:56:08Z knoop
29	! Code annotations made doxygen readable
30	!
31	! 1324 2014-03-21 09:13:16Z suehring
32	! Bugfix: ONLY statement for module pegrid removed
33	!
34	! 1320 2014-03-20 08:40:49Z raasch
35	! ONLY-attribute added to USE-statements,
36	! kind-parameters added to all INTEGER and REAL declaration statements,
37	! kinds are defined in new module kinds,
38	! old module precision_kind is removed,
39	! revision history before 2012 removed,
40	! comment fields (!:) to be used for variable explanations added to
41	! all variable declaration statements
42	!
43	! 1318 2014-03-17 13:35:16Z raasch
44	! cpu_log_nowait parameter added to cpu measurements of the transpositions
45	! required for solving the Poisson equation (poisfft),
46	! module interfaces removed
47	!
48	! 1257 2013-11-08 15:18:40Z raasch
49	! openacc loop and loop vector clauses removed
50	!
51	! 1216 2013-08-26 09:31:42Z raasch
52	! re-sorting of the transposed / to be transposed arrays moved to separate
53	! routines resort_for_...
54	!
55	! 1111 2013-03-08 23:54:10Z raasch
56	! openACC directives added,
57	! resorting data from/to work changed, work got 4 dimensions instead of 1
58	!
59	! 1106 2013-03-04 05:31:38Z raasch
60	! preprocessor lines rearranged so that routines can also be used in serial
61	! (non-parallel) mode
62	!
63	! 1092 2013-02-02 11:24:22Z raasch
64	! unused variables removed
65	!
66	! 1036 2012-10-22 13:43:42Z raasch
67	! code put under GPL (PALM 3.9)
68	!
69	! 1003 2012-09-14 14:35:53Z raasch
70	! indices nxa, nya, etc. replaced by nx, ny, etc.
71	!
72	! Revision 1.1 1997/07/24 11:25:18 raasch
73	! Initial revision
74	!
75	!------------------------------------------------------------------------------!
76	! Description:
77	! ------------
78	!> Resorting data for the transposition from x to y. The transposition itself
79	!> is carried out in transpose_xy
80	!------------------------------------------------------------------------------!
81	SUBROUTINE resort_for_xy( f_in, f_inv )
82
83
84	USE indices, &
85	ONLY: nx
86
87	USE kinds
88
89	USE transpose_indices, &
90	ONLY: nxl_z, nxr_z, nyn_x, nyn_z, nys_x, nys_z, nzb_x, nzt_x
91
92	IMPLICIT NONE
93
94	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
95	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
96
97
98	INTEGER(iwp) :: i !<
99	INTEGER(iwp) :: j !<
100	INTEGER(iwp) :: k !<
101	!
102	!-- Rearrange indices of input array in order to make data to be send
103	!-- by MPI contiguous
104	!$OMP PARALLEL PRIVATE ( i, j, k )
105	!$OMP DO
106	!$acc kernels present( f_in, f_inv )
107	DO i = 0, nx
108	DO k = nzb_x, nzt_x
109	DO j = nys_x, nyn_x
110	f_inv(j,k,i) = f_in(i,j,k)
111	ENDDO
112	ENDDO
113	ENDDO
114	!$acc end kernels
115	!$OMP END PARALLEL
116
117	END SUBROUTINE resort_for_xy
118
119
120	!------------------------------------------------------------------------------!
121	! Description:
122	! ------------
123	!> Transposition of input array (f_in) from x to y. For the input array, all
124	!> elements along x reside on the same PE, while after transposition, all
125	!> elements along y reside on the same PE.
126	!------------------------------------------------------------------------------!
127	SUBROUTINE transpose_xy( f_inv, f_out )
128
129
130	USE cpulog, &
131	ONLY: cpu_log, cpu_log_nowait, log_point_s
132
133	USE indices, &
134	ONLY: nx, ny
135
136	USE kinds
137
138	USE pegrid
139
140	USE transpose_indices, &
141	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
142
143	IMPLICIT NONE
144
145	INTEGER(iwp) :: i !<
146	INTEGER(iwp) :: j !<
147	INTEGER(iwp) :: k !<
148	INTEGER(iwp) :: l !<
149	INTEGER(iwp) :: ys !<
150
151	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
152	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
153
154	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
155
156
157	IF ( numprocs /= 1 ) THEN
158
159	#if defined( __parallel )
160	!
161	!-- Transpose array
162	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
163	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
164	!$acc update host( f_inv )
165	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
166	work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
167	comm1dy, ierr )
168	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
169
170	!
171	!-- Reorder transposed array
172	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
173	!$OMP DO
174	!$acc data copyin( work )
175	DO l = 0, pdims(2) - 1
176	ys = 0 + l * ( nyn_x - nys_x + 1 )
177	!$acc kernels present( f_out, work )
178	DO i = nxl_y, nxr_y
179	DO k = nzb_y, nzt_y
180	DO j = ys, ys + nyn_x - nys_x
181	f_out(j,i,k) = work(j-ys+1,k,i,l)
182	ENDDO
183	ENDDO
184	ENDDO
185	!$acc end kernels
186	ENDDO
187	!$acc end data
188	!$OMP END PARALLEL
189	#endif
190
191	ELSE
192
193	!
194	!-- Reorder transposed array
195	!$OMP PARALLEL PRIVATE ( i, j, k )
196	!$OMP DO
197	!$acc kernels present( f_inv, f_out )
198	DO k = nzb_y, nzt_y
199	DO i = nxl_y, nxr_y
200	DO j = 0, ny
201	f_out(j,i,k) = f_inv(j,k,i)
202	ENDDO
203	ENDDO
204	ENDDO
205	!$acc end kernels
206	!$OMP END PARALLEL
207
208	ENDIF
209
210	END SUBROUTINE transpose_xy
211
212
213	!------------------------------------------------------------------------------!
214	! Description:
215	! ------------
216	!> Resorting data after the transposition from x to z. The transposition itself
217	!> is carried out in transpose_xz
218	!------------------------------------------------------------------------------!
219	SUBROUTINE resort_for_xz( f_inv, f_out )
220
221
222	USE indices, &
223	ONLY: nxl, nxr, nyn, nys, nz
224
225	USE kinds
226
227	IMPLICIT NONE
228
229	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
230	REAL(wp) :: f_out(1:nz,nys:nyn,nxl:nxr) !<
231
232	INTEGER(iwp) :: i !<
233	INTEGER(iwp) :: j !<
234	INTEGER(iwp) :: k !<
235	!
236	!-- Rearrange indices of input array in order to make data to be send
237	!-- by MPI contiguous.
238	!-- In case of parallel fft/transposition, scattered store is faster in
239	!-- backward direction!!!
240	!$OMP PARALLEL PRIVATE ( i, j, k )
241	!$OMP DO
242	!$acc kernels present( f_inv, f_out )
243	DO k = 1, nz
244	DO i = nxl, nxr
245	DO j = nys, nyn
246	f_out(k,j,i) = f_inv(j,i,k)
247	ENDDO
248	ENDDO
249	ENDDO
250	!$acc end kernels
251	!$OMP END PARALLEL
252
253	END SUBROUTINE resort_for_xz
254
255
256	!------------------------------------------------------------------------------!
257	! Description:
258	! ------------
259	!> Transposition of input array (f_in) from x to z. For the input array, all
260	!> elements along x reside on the same PE, while after transposition, all
261	!> elements along z reside on the same PE.
262	!------------------------------------------------------------------------------!
263	SUBROUTINE transpose_xz( f_in, f_inv )
264
265
266	USE cpulog, &
267	ONLY: cpu_log, cpu_log_nowait, log_point_s
268
269	USE indices, &
270	ONLY: nnx, nx, nxl, nxr, ny, nyn, nys, nz
271
272	USE kinds
273
274	USE pegrid
275
276	USE transpose_indices, &
277	ONLY: nyn_x, nys_x, nzb_x, nzt_x
278
279	IMPLICIT NONE
280
281	INTEGER(iwp) :: i !<
282	INTEGER(iwp) :: j !<
283	INTEGER(iwp) :: k !<
284	INTEGER(iwp) :: l !<
285	INTEGER(iwp) :: xs !<
286
287	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
288	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
289
290	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
291
292
293	!
294	!-- If the PE grid is one-dimensional along y, the array has only to be
295	!-- reordered locally and therefore no transposition has to be done.
296	IF ( pdims(1) /= 1 ) THEN
297
298	#if defined( __parallel )
299	!
300	!-- Reorder input array for transposition
301	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
302	!$OMP DO
303	!$acc data copyout( work )
304	DO l = 0, pdims(1) - 1
305	xs = 0 + l * nnx
306	!$acc kernels present( f_in, work )
307	DO k = nzb_x, nzt_x
308	DO i = xs, xs + nnx - 1
309	DO j = nys_x, nyn_x
310	work(j,i-xs+1,k,l) = f_in(i,j,k)
311	ENDDO
312	ENDDO
313	ENDDO
314	!$acc end kernels
315	ENDDO
316	!$acc end data
317	!$OMP END PARALLEL
318
319	!
320	!-- Transpose array
321	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
322	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
323	CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
324	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
325	comm1dx, ierr )
326	!$acc update device( f_inv )
327	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
328	#endif
329
330	ELSE
331
332	!
333	!-- Reorder the array in a way that the z index is in first position
334	!$OMP PARALLEL PRIVATE ( i, j, k )
335	!$OMP DO
336	!$acc kernels present( f_in, f_inv )
337	DO i = nxl, nxr
338	DO j = nys, nyn
339	DO k = 1, nz
340	f_inv(j,i,k) = f_in(i,j,k)
341	ENDDO
342	ENDDO
343	ENDDO
344	!$acc end kernels
345	!$OMP END PARALLEL
346
347	ENDIF
348
349	END SUBROUTINE transpose_xz
350
351
352	!------------------------------------------------------------------------------!
353	! Description:
354	! ------------
355	!> Resorting data after the transposition from y to x. The transposition itself
356	!> is carried out in transpose_yx
357	!------------------------------------------------------------------------------!
358	SUBROUTINE resort_for_yx( f_inv, f_out )
359
360
361	USE indices, &
362	ONLY: nx
363
364	USE kinds
365
366	USE transpose_indices, &
367	ONLY: nyn_x, nys_x, nzb_x, nzt_x
368
369	IMPLICIT NONE
370
371	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
372	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
373
374
375	INTEGER(iwp) :: i !<
376	INTEGER(iwp) :: j !<
377	INTEGER(iwp) :: k !<
378	!
379	!-- Rearrange indices of input array in order to make data to be send
380	!-- by MPI contiguous
381	!$OMP PARALLEL PRIVATE ( i, j, k )
382	!$OMP DO
383	!$acc kernels present( f_inv, f_out )
384	DO i = 0, nx
385	DO k = nzb_x, nzt_x
386	DO j = nys_x, nyn_x
387	f_out(i,j,k) = f_inv(j,k,i)
388	ENDDO
389	ENDDO
390	ENDDO
391	!$acc end kernels
392	!$OMP END PARALLEL
393
394	END SUBROUTINE resort_for_yx
395
396
397	!------------------------------------------------------------------------------!
398	! Description:
399	! ------------
400	!> Transposition of input array (f_in) from y to x. For the input array, all
401	!> elements along y reside on the same PE, while after transposition, all
402	!> elements along x reside on the same PE.
403	!------------------------------------------------------------------------------!
404	SUBROUTINE transpose_yx( f_in, f_inv )
405
406
407	USE cpulog, &
408	ONLY: cpu_log, cpu_log_nowait, log_point_s
409
410	USE indices, &
411	ONLY: nx, ny
412
413	USE kinds
414
415	USE pegrid
416
417	USE transpose_indices, &
418	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
419
420	IMPLICIT NONE
421
422	INTEGER(iwp) :: i !<
423	INTEGER(iwp) :: j !<
424	INTEGER(iwp) :: k !<
425	INTEGER(iwp) :: l !<
426	INTEGER(iwp) :: ys !<
427
428	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
429	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
430
431	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
432
433
434	IF ( numprocs /= 1 ) THEN
435
436	#if defined( __parallel )
437	!
438	!-- Reorder input array for transposition
439	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
440	!$OMP DO
441	!$acc data copyout( work )
442	DO l = 0, pdims(2) - 1
443	ys = 0 + l * ( nyn_x - nys_x + 1 )
444	!$acc kernels present( f_in, work )
445	DO i = nxl_y, nxr_y
446	DO k = nzb_y, nzt_y
447	DO j = ys, ys + nyn_x - nys_x
448	work(j-ys+1,k,i,l) = f_in(j,i,k)
449	ENDDO
450	ENDDO
451	ENDDO
452	!$acc end kernels
453	ENDDO
454	!$acc end data
455	!$OMP END PARALLEL
456
457	!
458	!-- Transpose array
459	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
460	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
461	CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
462	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
463	comm1dy, ierr )
464	!$acc update device( f_inv )
465	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
466	#endif
467
468	ELSE
469
470	!
471	!-- Reorder array f_in the same way as ALLTOALL did it
472	!$OMP PARALLEL PRIVATE ( i, j, k )
473	!$OMP DO
474	!$acc kernels present( f_in, f_inv )
475	DO i = nxl_y, nxr_y
476	DO k = nzb_y, nzt_y
477	DO j = 0, ny
478	f_inv(j,k,i) = f_in(j,i,k)
479	ENDDO
480	ENDDO
481	ENDDO
482	!$acc end kernels
483	!$OMP END PARALLEL
484
485	ENDIF
486
487	END SUBROUTINE transpose_yx
488
489
490	!------------------------------------------------------------------------------!
491	! Description:
492	! ------------
493	!> Transposition of input array (f_in) from y to x. For the input array, all
494	!> elements along y reside on the same PE, while after transposition, all
495	!> elements along x reside on the same PE.
496	!> This is a direct transposition for arrays with indices in regular order
497	!> (k,j,i) (cf. transpose_yx).
498	!------------------------------------------------------------------------------!
499	SUBROUTINE transpose_yxd( f_in, f_out )
500
501
502	USE cpulog, &
503	ONLY: cpu_log, cpu_log_nowait, log_point_s
504
505	USE indices, &
506	ONLY: nnx, nny, nnz, nx, nxl, nxr, nyn, nys, nz
507
508	USE kinds
509
510	USE pegrid
511
512	USE transpose_indices, &
513	ONLY: nyn_x, nys_x, nzb_x, nzt_x
514
515	IMPLICIT NONE
516
517	INTEGER(iwp) :: i !<
518	INTEGER(iwp) :: j !<
519	INTEGER(iwp) :: k !<
520	INTEGER(iwp) :: l !<
521	INTEGER(iwp) :: m !<
522	INTEGER(iwp) :: xs !<
523
524	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
525	REAL(wp) :: f_inv(nxl:nxr,1:nz,nys:nyn) !<
526	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
527	REAL(wp) :: work(nnxnnynnz) !<
528	#if defined( __parallel )
529
530	!
531	!-- Rearrange indices of input array in order to make data to be send
532	!-- by MPI contiguous
533	DO k = 1, nz
534	DO j = nys, nyn
535	DO i = nxl, nxr
536	f_inv(i,k,j) = f_in(k,j,i)
537	ENDDO
538	ENDDO
539	ENDDO
540
541	!
542	!-- Transpose array
543	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
544	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
545	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
546	work(1), sendrecvcount_xy, MPI_REAL, &
547	comm1dx, ierr )
548	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
549
550	!
551	!-- Reorder transposed array
552	m = 0
553	DO l = 0, pdims(1) - 1
554	xs = 0 + l * nnx
555	DO j = nys_x, nyn_x
556	DO k = 1, nz
557	DO i = xs, xs + nnx - 1
558	m = m + 1
559	f_out(i,j,k) = work(m)
560	ENDDO
561	ENDDO
562	ENDDO
563	ENDDO
564
565	#endif
566
567	END SUBROUTINE transpose_yxd
568
569
570	!------------------------------------------------------------------------------!
571	! Description:
572	! ------------
573	!> Resorting data for the transposition from y to z. The transposition itself
574	!> is carried out in transpose_yz
575	!------------------------------------------------------------------------------!
576	SUBROUTINE resort_for_yz( f_in, f_inv )
577
578
579	USE indices, &
580	ONLY: ny
581
582	USE kinds
583
584	USE transpose_indices, &
585	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
586
587	IMPLICIT NONE
588
589	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
590	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
591
592	INTEGER(iwp) :: i !<
593	INTEGER(iwp) :: j !<
594	INTEGER(iwp) :: k !<
595
596	!
597	!-- Rearrange indices of input array in order to make data to be send
598	!-- by MPI contiguous
599	!$OMP PARALLEL PRIVATE ( i, j, k )
600	!$OMP DO
601	!$acc kernels present( f_in, f_inv )
602	DO j = 0, ny
603	DO k = nzb_y, nzt_y
604	DO i = nxl_y, nxr_y
605	f_inv(i,k,j) = f_in(j,i,k)
606	ENDDO
607	ENDDO
608	ENDDO
609	!$acc end kernels
610	!$OMP END PARALLEL
611
612	END SUBROUTINE resort_for_yz
613
614
615	!------------------------------------------------------------------------------!
616	! Description:
617	! ------------
618	!> Transposition of input array (f_in) from y to z. For the input array, all
619	!> elements along y reside on the same PE, while after transposition, all
620	!> elements along z reside on the same PE.
621	!------------------------------------------------------------------------------!
622	SUBROUTINE transpose_yz( f_inv, f_out )
623
624
625	USE cpulog, &
626	ONLY: cpu_log, cpu_log_nowait, log_point_s
627
628	USE indices, &
629	ONLY: ny, nz
630
631	USE kinds
632
633	USE pegrid
634
635	USE transpose_indices, &
636	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
637
638	IMPLICIT NONE
639
640	INTEGER(iwp) :: i !<
641	INTEGER(iwp) :: j !<
642	INTEGER(iwp) :: k !<
643	INTEGER(iwp) :: l !<
644	INTEGER(iwp) :: zs !<
645
646	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
647	REAL(wp) :: f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
648
649	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
650
651
652	!
653	!-- If the PE grid is one-dimensional along y, only local reordering
654	!-- of the data is necessary and no transposition has to be done.
655	IF ( pdims(1) == 1 ) THEN
656
657	!$OMP PARALLEL PRIVATE ( i, j, k )
658	!$OMP DO
659	!$acc kernels present( f_inv, f_out )
660	DO j = 0, ny
661	DO k = nzb_y, nzt_y
662	DO i = nxl_y, nxr_y
663	f_out(i,j,k) = f_inv(i,k,j)
664	ENDDO
665	ENDDO
666	ENDDO
667	!$acc end kernels
668	!$OMP END PARALLEL
669
670	ELSE
671
672	#if defined( __parallel )
673	!
674	!-- Transpose array
675	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
676	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
677	!$acc update host( f_inv )
678	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
679	work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
680	comm1dx, ierr )
681	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
682
683	!
684	!-- Reorder transposed array
685	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
686	!$OMP DO
687	!$acc data copyin( work )
688	DO l = 0, pdims(1) - 1
689	zs = 1 + l * ( nzt_y - nzb_y + 1 )
690	!$acc kernels present( f_out )
691	DO j = nys_z, nyn_z
692	DO k = zs, zs + nzt_y - nzb_y
693	DO i = nxl_z, nxr_z
694	f_out(i,j,k) = work(i,k-zs+1,j,l)
695	ENDDO
696	ENDDO
697	ENDDO
698	!$acc end kernels
699	ENDDO
700	!$acc end data
701	!$OMP END PARALLEL
702	#endif
703
704	ENDIF
705
706	END SUBROUTINE transpose_yz
707
708
709	!------------------------------------------------------------------------------!
710	! Description:
711	! ------------
712	!> Resorting data for the transposition from z to x. The transposition itself
713	!> is carried out in transpose_zx
714	!------------------------------------------------------------------------------!
715	SUBROUTINE resort_for_zx( f_in, f_inv )
716
717
718	USE indices, &
719	ONLY: nxl, nxr, nyn, nys, nz
720
721	USE kinds
722
723	IMPLICIT NONE
724
725	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
726	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
727
728	INTEGER(iwp) :: i !<
729	INTEGER(iwp) :: j !<
730	INTEGER(iwp) :: k !<
731
732	!
733	!-- Rearrange indices of input array in order to make data to be send
734	!-- by MPI contiguous
735	!$OMP PARALLEL PRIVATE ( i, j, k )
736	!$OMP DO
737	!$acc kernels present( f_in, f_inv )
738	DO k = 1,nz
739	DO i = nxl, nxr
740	DO j = nys, nyn
741	f_inv(j,i,k) = f_in(k,j,i)
742	ENDDO
743	ENDDO
744	ENDDO
745	!$acc end kernels
746	!$OMP END PARALLEL
747
748	END SUBROUTINE resort_for_zx
749
750
751	!------------------------------------------------------------------------------!
752	! Description:
753	! ------------
754	!> Transposition of input array (f_in) from z to x. For the input array, all
755	!> elements along z reside on the same PE, while after transposition, all
756	!> elements along x reside on the same PE.
757	!------------------------------------------------------------------------------!
758	SUBROUTINE transpose_zx( f_inv, f_out )
759
760
761	USE cpulog, &
762	ONLY: cpu_log, cpu_log_nowait, log_point_s
763
764	USE indices, &
765	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
766
767	USE kinds
768
769	USE pegrid
770
771	USE transpose_indices, &
772	ONLY: nyn_x, nys_x, nzb_x, nzt_x
773
774	IMPLICIT NONE
775
776	INTEGER(iwp) :: i !<
777	INTEGER(iwp) :: j !<
778	INTEGER(iwp) :: k !<
779	INTEGER(iwp) :: l !<
780	INTEGER(iwp) :: xs !<
781
782	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
783	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
784
785	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
786
787
788	!
789	!-- If the PE grid is one-dimensional along y, only local reordering
790	!-- of the data is necessary and no transposition has to be done.
791	IF ( pdims(1) == 1 ) THEN
792
793	!$OMP PARALLEL PRIVATE ( i, j, k )
794	!$OMP DO
795	!$acc kernels present( f_inv, f_out )
796	DO k = 1, nz
797	DO i = nxl, nxr
798	DO j = nys, nyn
799	f_out(i,j,k) = f_inv(j,i,k)
800	ENDDO
801	ENDDO
802	ENDDO
803	!$acc end kernels
804	!$OMP END PARALLEL
805
806	ELSE
807
808	#if defined( __parallel )
809	!
810	!-- Transpose array
811	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
812	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
813	!$acc update host( f_inv )
814	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
815	work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
816	comm1dx, ierr )
817	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
818
819	!
820	!-- Reorder transposed array
821	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
822	!$OMP DO
823	!$acc data copyin( work )
824	DO l = 0, pdims(1) - 1
825	xs = 0 + l * nnx
826	!$acc kernels present( f_out )
827	DO k = nzb_x, nzt_x
828	DO i = xs, xs + nnx - 1
829	DO j = nys_x, nyn_x
830	f_out(i,j,k) = work(j,i-xs+1,k,l)
831	ENDDO
832	ENDDO
833	ENDDO
834	!$acc end kernels
835	ENDDO
836	!$acc end data
837	!$OMP END PARALLEL
838	#endif
839
840	ENDIF
841
842	END SUBROUTINE transpose_zx
843
844
845	!------------------------------------------------------------------------------!
846	! Description:
847	! ------------
848	!> Resorting data after the transposition from z to y. The transposition itself
849	!> is carried out in transpose_zy
850	!------------------------------------------------------------------------------!
851	SUBROUTINE resort_for_zy( f_inv, f_out )
852
853
854	USE indices, &
855	ONLY: ny
856
857	USE kinds
858
859	USE transpose_indices, &
860	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
861
862	IMPLICIT NONE
863
864	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
865	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
866
867
868	INTEGER(iwp) :: i !<
869	INTEGER(iwp) :: j !<
870	INTEGER(iwp) :: k !<
871
872	!
873	!-- Rearrange indices of input array in order to make data to be send
874	!-- by MPI contiguous
875	!$OMP PARALLEL PRIVATE ( i, j, k )
876	!$OMP DO
877	!$acc kernels present( f_inv, f_out )
878	DO k = nzb_y, nzt_y
879	DO j = 0, ny
880	DO i = nxl_y, nxr_y
881	f_out(j,i,k) = f_inv(i,k,j)
882	ENDDO
883	ENDDO
884	ENDDO
885	!$acc end kernels
886	!$OMP END PARALLEL
887
888	END SUBROUTINE resort_for_zy
889
890
891	!------------------------------------------------------------------------------!
892	! Description:
893	! ------------
894	!> Transposition of input array (f_in) from z to y. For the input array, all
895	!> elements along z reside on the same PE, while after transposition, all
896	!> elements along y reside on the same PE.
897	!------------------------------------------------------------------------------!
898	SUBROUTINE transpose_zy( f_in, f_inv )
899
900
901	USE cpulog, &
902	ONLY: cpu_log, cpu_log_nowait, log_point_s
903
904	USE indices, &
905	ONLY: ny, nz
906
907	USE kinds
908
909	USE pegrid
910
911	USE transpose_indices, &
912	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
913
914	IMPLICIT NONE
915
916	INTEGER(iwp) :: i !<
917	INTEGER(iwp) :: j !<
918	INTEGER(iwp) :: k !<
919	INTEGER(iwp) :: l !<
920	INTEGER(iwp) :: zs !<
921
922	REAL(wp) :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
923	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
924
925	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
926
927	!
928	!-- If the PE grid is one-dimensional along y, the array has only to be
929	!-- reordered locally and therefore no transposition has to be done.
930	IF ( pdims(1) /= 1 ) THEN
931
932	#if defined( __parallel )
933	!
934	!-- Reorder input array for transposition
935	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
936	!$OMP DO
937	!$acc data copyout( work )
938	DO l = 0, pdims(1) - 1
939	zs = 1 + l * ( nzt_y - nzb_y + 1 )
940	!$acc kernels present( f_in, work )
941	DO j = nys_z, nyn_z
942	DO k = zs, zs + nzt_y - nzb_y
943	DO i = nxl_z, nxr_z
944	work(i,k-zs+1,j,l) = f_in(i,j,k)
945	ENDDO
946	ENDDO
947	ENDDO
948	!$acc end kernels
949	ENDDO
950	!$acc end data
951	!$OMP END PARALLEL
952
953	!
954	!-- Transpose array
955	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
956	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
957	CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
958	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
959	comm1dx, ierr )
960	!$acc update device( f_inv )
961	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
962	#endif
963
964	ELSE
965	!
966	!-- Reorder the array in the same way like ALLTOALL did it
967	!$OMP PARALLEL PRIVATE ( i, j, k )
968	!$OMP DO
969	!$acc kernels present( f_in, f_inv )
970	DO k = nzb_y, nzt_y
971	DO j = 0, ny
972	DO i = nxl_y, nxr_y
973	f_inv(i,k,j) = f_in(i,j,k)
974	ENDDO
975	ENDDO
976	ENDDO
977	!$acc end kernels
978	!$OMP END PARALLEL
979
980	ENDIF
981
982	END SUBROUTINE transpose_zy
983
984
985	!------------------------------------------------------------------------------!
986	! Description:
987	! ------------
988	!> Transposition of input array (f_in) from z to y. For the input array, all
989	!> elements along z reside on the same PE, while after transposition, all
990	!> elements along y reside on the same PE.
991	!> This is a direct transposition for arrays with indices in regular order
992	!> (k,j,i) (cf. transpose_zy).
993	!------------------------------------------------------------------------------!
994	SUBROUTINE transpose_zyd( f_in, f_out )
995
996
997	USE cpulog, &
998	ONLY: cpu_log, cpu_log_nowait, log_point_s
999
1000	USE indices, &
1001	ONLY: nnx, nny, nnz, nxl, nxr, nyn, nys, ny, nz
1002
1003	USE kinds
1004
1005	USE pegrid
1006
1007	USE transpose_indices, &
1008	ONLY: nxl_y, nxl_yd, nxr_y, nxr_yd, nzb_y, nzb_yd, nzt_y, nzt_yd
1009
1010	IMPLICIT NONE
1011
1012	INTEGER(iwp) :: i !<
1013	INTEGER(iwp) :: j !<
1014	INTEGER(iwp) :: k !<
1015	INTEGER(iwp) :: l !<
1016	INTEGER(iwp) :: m !<
1017	INTEGER(iwp) :: ys !<
1018
1019	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
1020	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
1021	REAL(wp) :: f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd) !<
1022	REAL(wp) :: work(nnxnnynnz) !<
1023
1024	#if defined( __parallel )
1025
1026	!
1027	!-- Rearrange indices of input array in order to make data to be send
1028	!-- by MPI contiguous
1029	DO i = nxl, nxr
1030	DO j = nys, nyn
1031	DO k = 1, nz
1032	f_inv(j,i,k) = f_in(k,j,i)
1033	ENDDO
1034	ENDDO
1035	ENDDO
1036
1037	!
1038	!-- Move data to different array, because memory location of work1 is
1039	!-- needed further below (work1 = work2).
1040	!-- If the PE grid is one-dimensional along x, only local reordering
1041	!-- of the data is necessary and no transposition has to be done.
1042	IF ( pdims(2) == 1 ) THEN
1043	DO k = 1, nz
1044	DO i = nxl, nxr
1045	DO j = nys, nyn
1046	f_out(j,i,k) = f_inv(j,i,k)
1047	ENDDO
1048	ENDDO
1049	ENDDO
1050	RETURN
1051	ENDIF
1052
1053	!
1054	!-- Transpose array
1055	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1056	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1057	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
1058	work(1), sendrecvcount_zyd, MPI_REAL, &
1059	comm1dy, ierr )
1060	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1061
1062	!
1063	!-- Reorder transposed array
1064	m = 0
1065	DO l = 0, pdims(2) - 1
1066	ys = 0 + l * nny
1067	DO k = nzb_yd, nzt_yd
1068	DO i = nxl_yd, nxr_yd
1069	DO j = ys, ys + nny - 1
1070	m = m + 1
1071	f_out(j,i,k) = work(m)
1072	ENDDO
1073	ENDDO
1074	ENDDO
1075	ENDDO
1076
1077	#endif
1078
1079	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |