Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 1320

Last change on this file since 1320 was 1320, checked in by raasch, 10 years ago
ONLY-attribute added to USE-statements, kind-parameters added to all INTEGER and REAL declaration statements, kinds are defined in new module kinds, old module precision_kind is removed, revision history before 2012 removed, comment fields (!:) to be used for variable explanations added to all variable declaration statements
Property svn:keywords set to `Id`
File size: 32.2 KB

Line
1	SUBROUTINE resort_for_xy( f_in, f_inv )
2
3	!--------------------------------------------------------------------------------!
4	! This file is part of PALM.
5	!
6	! PALM is free software: you can redistribute it and/or modify it under the terms
7	! of the GNU General Public License as published by the Free Software Foundation,
8	! either version 3 of the License, or (at your option) any later version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2014 Leibniz Universitaet Hannover
18	!--------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	! ONLY-attribute added to USE-statements,
23	! kind-parameters added to all INTEGER and REAL declaration statements,
24	! kinds are defined in new module kinds,
25	! old module precision_kind is removed,
26	! revision history before 2012 removed,
27	! comment fields (!:) to be used for variable explanations added to
28	! all variable declaration statements
29	!
30	! Former revisions:
31	! -----------------
32	! $Id: transpose.f90 1320 2014-03-20 08:40:49Z raasch $
33	!
34	! 1318 2014-03-17 13:35:16Z raasch
35	! cpu_log_nowait parameter added to cpu measurements of the transpositions
36	! required for solving the Poisson equation (poisfft),
37	! module interfaces removed
38	!
39	! 1257 2013-11-08 15:18:40Z raasch
40	! openacc loop and loop vector clauses removed
41	!
42	! 1216 2013-08-26 09:31:42Z raasch
43	! re-sorting of the transposed / to be transposed arrays moved to separate
44	! routines resort_for_...
45	!
46	! 1111 2013-03-08 23:54:10Z raasch
47	! openACC directives added,
48	! resorting data from/to work changed, work got 4 dimensions instead of 1
49	!
50	! 1106 2013-03-04 05:31:38Z raasch
51	! preprocessor lines rearranged so that routines can also be used in serial
52	! (non-parallel) mode
53	!
54	! 1092 2013-02-02 11:24:22Z raasch
55	! unused variables removed
56	!
57	! 1036 2012-10-22 13:43:42Z raasch
58	! code put under GPL (PALM 3.9)
59	!
60	! 1003 2012-09-14 14:35:53Z raasch
61	! indices nxa, nya, etc. replaced by nx, ny, etc.
62	!
63	! Revision 1.1 1997/07/24 11:25:18 raasch
64	! Initial revision
65	!
66	!------------------------------------------------------------------------------!
67	! Description:
68	! ------------
69	! Resorting data for the transposition from x to y. The transposition itself
70	! is carried out in transpose_xy
71	!------------------------------------------------------------------------------!
72
73	USE indices, &
74	ONLY: nx
75
76	USE kinds
77
78	USE transpose_indices, &
79	ONLY: nxl_z, nxr_z, nyn_x, nyn_z, nys_x, nys_z, nzb_x, nzt_x
80
81	IMPLICIT NONE
82
83	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
84	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
85
86
87	INTEGER(iwp) :: i !:
88	INTEGER(iwp) :: j !:
89	INTEGER(iwp) :: k !:
90	!
91	!-- Rearrange indices of input array in order to make data to be send
92	!-- by MPI contiguous
93	!$OMP PARALLEL PRIVATE ( i, j, k )
94	!$OMP DO
95	!$acc kernels present( f_in, f_inv )
96	DO i = 0, nx
97	DO k = nzb_x, nzt_x
98	DO j = nys_x, nyn_x
99	f_inv(j,k,i) = f_in(i,j,k)
100	ENDDO
101	ENDDO
102	ENDDO
103	!$acc end kernels
104	!$OMP END PARALLEL
105
106	END SUBROUTINE resort_for_xy
107
108
109	SUBROUTINE transpose_xy( f_inv, f_out )
110
111	!------------------------------------------------------------------------------!
112	! Description:
113	! ------------
114	! Transposition of input array (f_in) from x to y. For the input array, all
115	! elements along x reside on the same PE, while after transposition, all
116	! elements along y reside on the same PE.
117	!------------------------------------------------------------------------------!
118
119	USE cpulog, &
120	ONLY: cpu_log, cpu_log_nowait, log_point_s
121
122	USE indices, &
123	ONLY: nx, ny
124
125	USE kinds
126
127	USE pegrid
128
129	USE transpose_indices, &
130	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
131
132	IMPLICIT NONE
133
134	INTEGER(iwp) :: i !:
135	INTEGER(iwp) :: j !:
136	INTEGER(iwp) :: k !:
137	INTEGER(iwp) :: l !:
138	INTEGER(iwp) :: ys !:
139
140	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
141	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
142
143	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !:
144
145
146	IF ( numprocs /= 1 ) THEN
147
148	#if defined( __parallel )
149	!
150	!-- Transpose array
151	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
152	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
153	!$acc update host( f_inv )
154	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
155	work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
156	comm1dy, ierr )
157	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
158
159	!
160	!-- Reorder transposed array
161	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
162	!$OMP DO
163	!$acc data copyin( work )
164	DO l = 0, pdims(2) - 1
165	ys = 0 + l * ( nyn_x - nys_x + 1 )
166	!$acc kernels present( f_out, work )
167	DO i = nxl_y, nxr_y
168	DO k = nzb_y, nzt_y
169	DO j = ys, ys + nyn_x - nys_x
170	f_out(j,i,k) = work(j-ys+1,k,i,l)
171	ENDDO
172	ENDDO
173	ENDDO
174	!$acc end kernels
175	ENDDO
176	!$acc end data
177	!$OMP END PARALLEL
178	#endif
179
180	ELSE
181
182	!
183	!-- Reorder transposed array
184	!$OMP PARALLEL PRIVATE ( i, j, k )
185	!$OMP DO
186	!$acc kernels present( f_inv, f_out )
187	DO k = nzb_y, nzt_y
188	DO i = nxl_y, nxr_y
189	DO j = 0, ny
190	f_out(j,i,k) = f_inv(j,k,i)
191	ENDDO
192	ENDDO
193	ENDDO
194	!$acc end kernels
195	!$OMP END PARALLEL
196
197	ENDIF
198
199	END SUBROUTINE transpose_xy
200
201
202	SUBROUTINE resort_for_xz( f_inv, f_out )
203
204	!------------------------------------------------------------------------------!
205	! Description:
206	! ------------
207	! Resorting data after the transposition from x to z. The transposition itself
208	! is carried out in transpose_xz
209	!------------------------------------------------------------------------------!
210
211	USE indices, &
212	ONLY: nxl, nxr, nyn, nys, nz
213
214	USE kinds
215
216	IMPLICIT NONE
217
218	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
219	REAL(wp) :: f_out(1:nz,nys:nyn,nxl:nxr) !:
220
221	INTEGER(iwp) :: i !:
222	INTEGER(iwp) :: j !:
223	INTEGER(iwp) :: k !:
224	!
225	!-- Rearrange indices of input array in order to make data to be send
226	!-- by MPI contiguous.
227	!-- In case of parallel fft/transposition, scattered store is faster in
228	!-- backward direction!!!
229	!$OMP PARALLEL PRIVATE ( i, j, k )
230	!$OMP DO
231	!$acc kernels present( f_inv, f_out )
232	DO k = 1, nz
233	DO i = nxl, nxr
234	DO j = nys, nyn
235	f_out(k,j,i) = f_inv(j,i,k)
236	ENDDO
237	ENDDO
238	ENDDO
239	!$acc end kernels
240	!$OMP END PARALLEL
241
242	END SUBROUTINE resort_for_xz
243
244
245	SUBROUTINE transpose_xz( f_in, f_inv )
246
247	!------------------------------------------------------------------------------!
248	! Description:
249	! ------------
250	! Transposition of input array (f_in) from x to z. For the input array, all
251	! elements along x reside on the same PE, while after transposition, all
252	! elements along z reside on the same PE.
253	!------------------------------------------------------------------------------!
254
255	USE cpulog, &
256	ONLY: cpu_log, cpu_log_nowait, log_point_s
257
258	USE indices, &
259	ONLY: nnx, nx, nxl, nxr, ny, nyn, nys, nz
260
261	USE kinds
262
263	USE pegrid, &
264	ONLY: collective_wait, comm1dx, comm2d, ierr, MPI_DOUBLE_PRECISION, &
265	pdims, sendrecvcount_zx
266
267	USE transpose_indices, &
268	ONLY: nyn_x, nys_x, nzb_x, nzt_x
269
270	IMPLICIT NONE
271
272	INTEGER(iwp) :: i !:
273	INTEGER(iwp) :: j !:
274	INTEGER(iwp) :: k !:
275	INTEGER(iwp) :: l !:
276	INTEGER(iwp) :: xs !:
277
278	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
279	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
280
281	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !:
282
283
284	!
285	!-- If the PE grid is one-dimensional along y, the array has only to be
286	!-- reordered locally and therefore no transposition has to be done.
287	IF ( pdims(1) /= 1 ) THEN
288
289	#if defined( __parallel )
290	!
291	!-- Reorder input array for transposition
292	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
293	!$OMP DO
294	!$acc data copyout( work )
295	DO l = 0, pdims(1) - 1
296	xs = 0 + l * nnx
297	!$acc kernels present( f_in, work )
298	DO k = nzb_x, nzt_x
299	DO i = xs, xs + nnx - 1
300	DO j = nys_x, nyn_x
301	work(j,i-xs+1,k,l) = f_in(i,j,k)
302	ENDDO
303	ENDDO
304	ENDDO
305	!$acc end kernels
306	ENDDO
307	!$acc end data
308	!$OMP END PARALLEL
309
310	!
311	!-- Transpose array
312	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
313	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
314	CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
315	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
316	comm1dx, ierr )
317	!$acc update device( f_inv )
318	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
319	#endif
320
321	ELSE
322
323	!
324	!-- Reorder the array in a way that the z index is in first position
325	!$OMP PARALLEL PRIVATE ( i, j, k )
326	!$OMP DO
327	!$acc kernels present( f_in, f_inv )
328	DO i = nxl, nxr
329	DO j = nys, nyn
330	DO k = 1, nz
331	f_inv(j,i,k) = f_in(i,j,k)
332	ENDDO
333	ENDDO
334	ENDDO
335	!$acc end kernels
336	!$OMP END PARALLEL
337
338	ENDIF
339
340	END SUBROUTINE transpose_xz
341
342
343	SUBROUTINE resort_for_yx( f_inv, f_out )
344
345	!------------------------------------------------------------------------------!
346	! Description:
347	! ------------
348	! Resorting data after the transposition from y to x. The transposition itself
349	! is carried out in transpose_yx
350	!------------------------------------------------------------------------------!
351
352	USE indices, &
353	ONLY: nx
354
355	USE kinds
356
357	USE transpose_indices, &
358	ONLY: nyn_x, nys_x, nzb_x, nzt_x
359
360	IMPLICIT NONE
361
362	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
363	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
364
365
366	INTEGER(iwp) :: i !:
367	INTEGER(iwp) :: j !:
368	INTEGER(iwp) :: k !:
369	!
370	!-- Rearrange indices of input array in order to make data to be send
371	!-- by MPI contiguous
372	!$OMP PARALLEL PRIVATE ( i, j, k )
373	!$OMP DO
374	!$acc kernels present( f_inv, f_out )
375	DO i = 0, nx
376	DO k = nzb_x, nzt_x
377	DO j = nys_x, nyn_x
378	f_out(i,j,k) = f_inv(j,k,i)
379	ENDDO
380	ENDDO
381	ENDDO
382	!$acc end kernels
383	!$OMP END PARALLEL
384
385	END SUBROUTINE resort_for_yx
386
387
388	SUBROUTINE transpose_yx( f_in, f_inv )
389
390	!------------------------------------------------------------------------------!
391	! Description:
392	! ------------
393	! Transposition of input array (f_in) from y to x. For the input array, all
394	! elements along y reside on the same PE, while after transposition, all
395	! elements along x reside on the same PE.
396	!------------------------------------------------------------------------------!
397
398	USE cpulog, &
399	ONLY: cpu_log, cpu_log_nowait, log_point_s
400
401	USE indices, &
402	ONLY: nx, ny
403
404	USE kinds
405
406	USE pegrid, &
407	ONLY: collective_wait, comm1dy, comm2d, ierr, MPI_DOUBLE_PRECISION, &
408	numprocs, pdims, sendrecvcount_xy
409
410	USE transpose_indices, &
411	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
412
413	IMPLICIT NONE
414
415	INTEGER(iwp) :: i !:
416	INTEGER(iwp) :: j !:
417	INTEGER(iwp) :: k !:
418	INTEGER(iwp) :: l !:
419	INTEGER(iwp) :: ys !:
420
421	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
422	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
423
424	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !:
425
426
427	IF ( numprocs /= 1 ) THEN
428
429	#if defined( __parallel )
430	!
431	!-- Reorder input array for transposition
432	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
433	!$OMP DO
434	!$acc data copyout( work )
435	DO l = 0, pdims(2) - 1
436	ys = 0 + l * ( nyn_x - nys_x + 1 )
437	!$acc kernels present( f_in, work )
438	DO i = nxl_y, nxr_y
439	DO k = nzb_y, nzt_y
440	DO j = ys, ys + nyn_x - nys_x
441	work(j-ys+1,k,i,l) = f_in(j,i,k)
442	ENDDO
443	ENDDO
444	ENDDO
445	!$acc end kernels
446	ENDDO
447	!$acc end data
448	!$OMP END PARALLEL
449
450	!
451	!-- Transpose array
452	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
453	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
454	CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
455	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
456	comm1dy, ierr )
457	!$acc update device( f_inv )
458	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
459	#endif
460
461	ELSE
462
463	!
464	!-- Reorder array f_in the same way as ALLTOALL did it
465	!$OMP PARALLEL PRIVATE ( i, j, k )
466	!$OMP DO
467	!$acc kernels present( f_in, f_inv )
468	DO i = nxl_y, nxr_y
469	DO k = nzb_y, nzt_y
470	DO j = 0, ny
471	f_inv(j,k,i) = f_in(j,i,k)
472	ENDDO
473	ENDDO
474	ENDDO
475	!$acc end kernels
476	!$OMP END PARALLEL
477
478	ENDIF
479
480	END SUBROUTINE transpose_yx
481
482
483	SUBROUTINE transpose_yxd( f_in, f_out )
484
485	!------------------------------------------------------------------------------!
486	! Description:
487	! ------------
488	! Transposition of input array (f_in) from y to x. For the input array, all
489	! elements along y reside on the same PE, while after transposition, all
490	! elements along x reside on the same PE.
491	! This is a direct transposition for arrays with indices in regular order
492	! (k,j,i) (cf. transpose_yx).
493	!------------------------------------------------------------------------------!
494
495	USE cpulog, &
496	ONLY: cpu_log, cpu_log_nowait, log_point_s
497
498	USE indices, &
499	ONLY: nnx, nny, nnz, nx, nxl, nxr, nyn, nys, nz
500
501	USE kinds
502
503	USE pegrid, &
504	ONLY: collective_wait, comm1dx, comm2d, ierr, MPI_DOUBLE_PRECISION, &
505	pdims, sendrecvcount_xy
506
507	USE transpose_indices, &
508	ONLY: nyn_x, nys_x, nzb_x, nzt_x
509
510	IMPLICIT NONE
511
512	INTEGER(iwp) :: i !:
513	INTEGER(iwp) :: j !:
514	INTEGER(iwp) :: k !:
515	INTEGER(iwp) :: l !:
516	INTEGER(iwp) :: m !:
517	INTEGER(iwp) :: xs !:
518
519	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !:
520	REAL(wp) :: f_inv(nxl:nxr,1:nz,nys:nyn) !:
521	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
522	REAL(wp) :: work(nnxnnynnz) !:
523	#if defined( __parallel )
524
525	!
526	!-- Rearrange indices of input array in order to make data to be send
527	!-- by MPI contiguous
528	DO k = 1, nz
529	DO j = nys, nyn
530	DO i = nxl, nxr
531	f_inv(i,k,j) = f_in(k,j,i)
532	ENDDO
533	ENDDO
534	ENDDO
535
536	!
537	!-- Transpose array
538	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
539	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
540	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
541	work(1), sendrecvcount_xy, MPI_REAL, &
542	comm1dx, ierr )
543	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
544
545	!
546	!-- Reorder transposed array
547	m = 0
548	DO l = 0, pdims(1) - 1
549	xs = 0 + l * nnx
550	DO j = nys_x, nyn_x
551	DO k = 1, nz
552	DO i = xs, xs + nnx - 1
553	m = m + 1
554	f_out(i,j,k) = work(m)
555	ENDDO
556	ENDDO
557	ENDDO
558	ENDDO
559
560	#endif
561
562	END SUBROUTINE transpose_yxd
563
564
565	SUBROUTINE resort_for_yz( f_in, f_inv )
566
567	!------------------------------------------------------------------------------!
568	! Description:
569	! ------------
570	! Resorting data for the transposition from y to z. The transposition itself
571	! is carried out in transpose_yz
572	!------------------------------------------------------------------------------!
573
574	USE indices, &
575	ONLY: ny
576
577	USE kinds
578
579	USE transpose_indices, &
580	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
581
582	IMPLICIT NONE
583
584	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
585	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
586
587	INTEGER(iwp) :: i !:
588	INTEGER(iwp) :: j !:
589	INTEGER(iwp) :: k !:
590
591	!
592	!-- Rearrange indices of input array in order to make data to be send
593	!-- by MPI contiguous
594	!$OMP PARALLEL PRIVATE ( i, j, k )
595	!$OMP DO
596	!$acc kernels present( f_in, f_inv )
597	DO j = 0, ny
598	DO k = nzb_y, nzt_y
599	DO i = nxl_y, nxr_y
600	f_inv(i,k,j) = f_in(j,i,k)
601	ENDDO
602	ENDDO
603	ENDDO
604	!$acc end kernels
605	!$OMP END PARALLEL
606
607	END SUBROUTINE resort_for_yz
608
609
610	SUBROUTINE transpose_yz( f_inv, f_out )
611
612	!------------------------------------------------------------------------------!
613	! Description:
614	! ------------
615	! Transposition of input array (f_in) from y to z. For the input array, all
616	! elements along y reside on the same PE, while after transposition, all
617	! elements along z reside on the same PE.
618	!------------------------------------------------------------------------------!
619
620	USE cpulog, &
621	ONLY: cpu_log, cpu_log_nowait, log_point_s
622
623	USE indices, &
624	ONLY: ny, nz
625
626	USE kinds
627
628	USE pegrid, &
629	ONLY: collective_wait, comm1dx, comm2d, ierr, MPI_DOUBLE_PRECISION, &
630	pdims, sendrecvcount_yz
631
632	USE transpose_indices, &
633	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
634
635	IMPLICIT NONE
636
637	INTEGER(iwp) :: i !:
638	INTEGER(iwp) :: j !:
639	INTEGER(iwp) :: k !:
640	INTEGER(iwp) :: l !:
641	INTEGER(iwp) :: zs !:
642
643	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
644	REAL(wp) :: f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !:
645
646	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !:
647
648
649	!
650	!-- If the PE grid is one-dimensional along y, only local reordering
651	!-- of the data is necessary and no transposition has to be done.
652	IF ( pdims(1) == 1 ) THEN
653
654	!$OMP PARALLEL PRIVATE ( i, j, k )
655	!$OMP DO
656	!$acc kernels present( f_inv, f_out )
657	DO j = 0, ny
658	DO k = nzb_y, nzt_y
659	DO i = nxl_y, nxr_y
660	f_out(i,j,k) = f_inv(i,k,j)
661	ENDDO
662	ENDDO
663	ENDDO
664	!$acc end kernels
665	!$OMP END PARALLEL
666
667	ELSE
668
669	#if defined( __parallel )
670	!
671	!-- Transpose array
672	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
673	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
674	!$acc update host( f_inv )
675	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
676	work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
677	comm1dx, ierr )
678	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
679
680	!
681	!-- Reorder transposed array
682	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
683	!$OMP DO
684	!$acc data copyin( work )
685	DO l = 0, pdims(1) - 1
686	zs = 1 + l * ( nzt_y - nzb_y + 1 )
687	!$acc kernels present( f_out )
688	DO j = nys_z, nyn_z
689	DO k = zs, zs + nzt_y - nzb_y
690	DO i = nxl_z, nxr_z
691	f_out(i,j,k) = work(i,k-zs+1,j,l)
692	ENDDO
693	ENDDO
694	ENDDO
695	!$acc end kernels
696	ENDDO
697	!$acc end data
698	!$OMP END PARALLEL
699	#endif
700
701	ENDIF
702
703	END SUBROUTINE transpose_yz
704
705
706	SUBROUTINE resort_for_zx( f_in, f_inv )
707
708	!------------------------------------------------------------------------------!
709	! Description:
710	! ------------
711	! Resorting data for the transposition from z to x. The transposition itself
712	! is carried out in transpose_zx
713	!------------------------------------------------------------------------------!
714
715	USE indices, &
716	ONLY: nxl, nxr, nyn, nys, nz
717
718	USE kinds
719
720	IMPLICIT NONE
721
722	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !:
723	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
724
725	INTEGER(iwp) :: i !:
726	INTEGER(iwp) :: j !:
727	INTEGER(iwp) :: k !:
728
729	!
730	!-- Rearrange indices of input array in order to make data to be send
731	!-- by MPI contiguous
732	!$OMP PARALLEL PRIVATE ( i, j, k )
733	!$OMP DO
734	!$acc kernels present( f_in, f_inv )
735	DO k = 1,nz
736	DO i = nxl, nxr
737	DO j = nys, nyn
738	f_inv(j,i,k) = f_in(k,j,i)
739	ENDDO
740	ENDDO
741	ENDDO
742	!$acc end kernels
743	!$OMP END PARALLEL
744
745	END SUBROUTINE resort_for_zx
746
747
748	SUBROUTINE transpose_zx( f_inv, f_out )
749
750	!------------------------------------------------------------------------------!
751	! Description:
752	! ------------
753	! Transposition of input array (f_in) from z to x. For the input array, all
754	! elements along z reside on the same PE, while after transposition, all
755	! elements along x reside on the same PE.
756	!------------------------------------------------------------------------------!
757
758	USE cpulog, &
759	ONLY: cpu_log, cpu_log_nowait, log_point_s
760
761	USE indices, &
762	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
763
764	USE kinds
765
766	USE pegrid, &
767	ONLY: collective_wait, comm1dx, comm2d, ierr, MPI_DOUBLE_PRECISION, &
768	pdims, sendrecvcount_zx
769
770	USE transpose_indices, &
771	ONLY: nyn_x, nys_x, nzb_x, nzt_x
772
773	IMPLICIT NONE
774
775	INTEGER(iwp) :: i !:
776	INTEGER(iwp) :: j !:
777	INTEGER(iwp) :: k !:
778	INTEGER(iwp) :: l !:
779	INTEGER(iwp) :: xs !:
780
781	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
782	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
783
784	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !:
785
786
787	!
788	!-- If the PE grid is one-dimensional along y, only local reordering
789	!-- of the data is necessary and no transposition has to be done.
790	IF ( pdims(1) == 1 ) THEN
791
792	!$OMP PARALLEL PRIVATE ( i, j, k )
793	!$OMP DO
794	!$acc kernels present( f_inv, f_out )
795	DO k = 1, nz
796	DO i = nxl, nxr
797	DO j = nys, nyn
798	f_out(i,j,k) = f_inv(j,i,k)
799	ENDDO
800	ENDDO
801	ENDDO
802	!$acc end kernels
803	!$OMP END PARALLEL
804
805	ELSE
806
807	#if defined( __parallel )
808	!
809	!-- Transpose array
810	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
811	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
812	!$acc update host( f_inv )
813	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
814	work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
815	comm1dx, ierr )
816	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
817
818	!
819	!-- Reorder transposed array
820	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
821	!$OMP DO
822	!$acc data copyin( work )
823	DO l = 0, pdims(1) - 1
824	xs = 0 + l * nnx
825	!$acc kernels present( f_out )
826	DO k = nzb_x, nzt_x
827	DO i = xs, xs + nnx - 1
828	DO j = nys_x, nyn_x
829	f_out(i,j,k) = work(j,i-xs+1,k,l)
830	ENDDO
831	ENDDO
832	ENDDO
833	!$acc end kernels
834	ENDDO
835	!$acc end data
836	!$OMP END PARALLEL
837	#endif
838
839	ENDIF
840
841	END SUBROUTINE transpose_zx
842
843
844	SUBROUTINE resort_for_zy( f_inv, f_out )
845
846	!------------------------------------------------------------------------------!
847	! Description:
848	! ------------
849	! Resorting data after the transposition from z to y. The transposition itself
850	! is carried out in transpose_zy
851	!------------------------------------------------------------------------------!
852
853	USE indices, &
854	ONLY: ny
855
856	USE kinds
857
858	USE transpose_indices, &
859	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
860
861	IMPLICIT NONE
862
863	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
864	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
865
866
867	INTEGER(iwp) :: i !:
868	INTEGER(iwp) :: j !:
869	INTEGER(iwp) :: k !:
870
871	!
872	!-- Rearrange indices of input array in order to make data to be send
873	!-- by MPI contiguous
874	!$OMP PARALLEL PRIVATE ( i, j, k )
875	!$OMP DO
876	!$acc kernels present( f_inv, f_out )
877	DO k = nzb_y, nzt_y
878	DO j = 0, ny
879	DO i = nxl_y, nxr_y
880	f_out(j,i,k) = f_inv(i,k,j)
881	ENDDO
882	ENDDO
883	ENDDO
884	!$acc end kernels
885	!$OMP END PARALLEL
886
887	END SUBROUTINE resort_for_zy
888
889
890	SUBROUTINE transpose_zy( f_in, f_inv )
891
892	!------------------------------------------------------------------------------!
893	! Description:
894	! ------------
895	! Transposition of input array (f_in) from z to y. For the input array, all
896	! elements along z reside on the same PE, while after transposition, all
897	! elements along y reside on the same PE.
898	!------------------------------------------------------------------------------!
899
900	USE cpulog, &
901	ONLY: cpu_log, cpu_log_nowait, log_point_s
902
903	USE indices, &
904	ONLY: ny, nz
905
906	USE kinds
907
908	USE pegrid, &
909	ONLY: collective_wait, comm1dx, comm2d, ierr, MPI_DOUBLE_PRECISION, &
910	pdims, sendrecvcount_yz
911
912	USE transpose_indices, &
913	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
914
915	IMPLICIT NONE
916
917	INTEGER(iwp) :: i !:
918	INTEGER(iwp) :: j !:
919	INTEGER(iwp) :: k !:
920	INTEGER(iwp) :: l !:
921	INTEGER(iwp) :: zs !:
922
923	REAL(wp) :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !:
924	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
925
926	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !:
927
928	!
929	!-- If the PE grid is one-dimensional along y, the array has only to be
930	!-- reordered locally and therefore no transposition has to be done.
931	IF ( pdims(1) /= 1 ) THEN
932
933	#if defined( __parallel )
934	!
935	!-- Reorder input array for transposition
936	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
937	!$OMP DO
938	!$acc data copyout( work )
939	DO l = 0, pdims(1) - 1
940	zs = 1 + l * ( nzt_y - nzb_y + 1 )
941	!$acc kernels present( f_in, work )
942	DO j = nys_z, nyn_z
943	DO k = zs, zs + nzt_y - nzb_y
944	DO i = nxl_z, nxr_z
945	work(i,k-zs+1,j,l) = f_in(i,j,k)
946	ENDDO
947	ENDDO
948	ENDDO
949	!$acc end kernels
950	ENDDO
951	!$acc end data
952	!$OMP END PARALLEL
953
954	!
955	!-- Transpose array
956	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
957	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
958	CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
959	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
960	comm1dx, ierr )
961	!$acc update device( f_inv )
962	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
963	#endif
964
965	ELSE
966	!
967	!-- Reorder the array in the same way like ALLTOALL did it
968	!$OMP PARALLEL PRIVATE ( i, j, k )
969	!$OMP DO
970	!$acc kernels present( f_in, f_inv )
971	DO k = nzb_y, nzt_y
972	DO j = 0, ny
973	DO i = nxl_y, nxr_y
974	f_inv(i,k,j) = f_in(i,j,k)
975	ENDDO
976	ENDDO
977	ENDDO
978	!$acc end kernels
979	!$OMP END PARALLEL
980
981	ENDIF
982
983	END SUBROUTINE transpose_zy
984
985
986	SUBROUTINE transpose_zyd( f_in, f_out )
987
988	!------------------------------------------------------------------------------!
989	! Description:
990	! ------------
991	! Transposition of input array (f_in) from z to y. For the input array, all
992	! elements along z reside on the same PE, while after transposition, all
993	! elements along y reside on the same PE.
994	! This is a direct transposition for arrays with indices in regular order
995	! (k,j,i) (cf. transpose_zy).
996	!------------------------------------------------------------------------------!
997
998	USE cpulog, &
999	ONLY: cpu_log, cpu_log_nowait, log_point_s
1000
1001	USE indices, &
1002	ONLY: nnx, nny, nnz, nxl, nxr, nyn, nys, ny, nz
1003
1004	USE kinds
1005
1006	USE pegrid, &
1007	ONLY: collective_wait, comm1dy, comm2d, ierr, MPI_DOUBLE_PRECISION, &
1008	pdims, sendrecvcount_zyd
1009
1010	USE transpose_indices, &
1011	ONLY: nxl_y, nxl_yd, nxr_y, nxr_yd, nzb_y, nzb_yd, nzt_y, nzt_yd
1012
1013	IMPLICIT NONE
1014
1015	INTEGER(iwp) :: i !:
1016	INTEGER(iwp) :: j !:
1017	INTEGER(iwp) :: k !:
1018	INTEGER(iwp) :: l !:
1019	INTEGER(iwp) :: m !:
1020	INTEGER(iwp) :: ys !:
1021
1022	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !:
1023	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
1024	REAL(wp) :: f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd) !:
1025	REAL(wp) :: work(nnxnnynnz) !:
1026
1027	#if defined( __parallel )
1028
1029	!
1030	!-- Rearrange indices of input array in order to make data to be send
1031	!-- by MPI contiguous
1032	DO i = nxl, nxr
1033	DO j = nys, nyn
1034	DO k = 1, nz
1035	f_inv(j,i,k) = f_in(k,j,i)
1036	ENDDO
1037	ENDDO
1038	ENDDO
1039
1040	!
1041	!-- Move data to different array, because memory location of work1 is
1042	!-- needed further below (work1 = work2).
1043	!-- If the PE grid is one-dimensional along x, only local reordering
1044	!-- of the data is necessary and no transposition has to be done.
1045	IF ( pdims(2) == 1 ) THEN
1046	DO k = 1, nz
1047	DO i = nxl, nxr
1048	DO j = nys, nyn
1049	f_out(j,i,k) = f_inv(j,i,k)
1050	ENDDO
1051	ENDDO
1052	ENDDO
1053	RETURN
1054	ENDIF
1055
1056	!
1057	!-- Transpose array
1058	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1059	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1060	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
1061	work(1), sendrecvcount_zyd, MPI_REAL, &
1062	comm1dy, ierr )
1063	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1064
1065	!
1066	!-- Reorder transposed array
1067	m = 0
1068	DO l = 0, pdims(2) - 1
1069	ys = 0 + l * nny
1070	DO k = nzb_yd, nzt_yd
1071	DO i = nxl_yd, nxr_yd
1072	DO j = ys, ys + nny - 1
1073	m = m + 1
1074	f_out(j,i,k) = work(m)
1075	ENDDO
1076	ENDDO
1077	ENDDO
1078	ENDDO
1079
1080	#endif
1081
1082	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |