Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 1324

Last change on this file since 1324 was 1324, checked in by suehring, 10 years ago
Bugfixes in ONLY statements
Property svn:keywords set to `Id`
File size: 31.0 KB

Line
1	SUBROUTINE resort_for_xy( f_in, f_inv )
2
3	!--------------------------------------------------------------------------------!
4	! This file is part of PALM.
5	!
6	! PALM is free software: you can redistribute it and/or modify it under the terms
7	! of the GNU General Public License as published by the Free Software Foundation,
8	! either version 3 of the License, or (at your option) any later version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2014 Leibniz Universitaet Hannover
18	!--------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	! Bugfix: ONLY statement for module pegrid removed
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: transpose.f90 1324 2014-03-21 09:13:16Z suehring $
27	!
28	! 1320 2014-03-20 08:40:49Z raasch
29	! ONLY-attribute added to USE-statements,
30	! kind-parameters added to all INTEGER and REAL declaration statements,
31	! kinds are defined in new module kinds,
32	! old module precision_kind is removed,
33	! revision history before 2012 removed,
34	! comment fields (!:) to be used for variable explanations added to
35	! all variable declaration statements
36	!
37	! 1318 2014-03-17 13:35:16Z raasch
38	! cpu_log_nowait parameter added to cpu measurements of the transpositions
39	! required for solving the Poisson equation (poisfft),
40	! module interfaces removed
41	!
42	! 1257 2013-11-08 15:18:40Z raasch
43	! openacc loop and loop vector clauses removed
44	!
45	! 1216 2013-08-26 09:31:42Z raasch
46	! re-sorting of the transposed / to be transposed arrays moved to separate
47	! routines resort_for_...
48	!
49	! 1111 2013-03-08 23:54:10Z raasch
50	! openACC directives added,
51	! resorting data from/to work changed, work got 4 dimensions instead of 1
52	!
53	! 1106 2013-03-04 05:31:38Z raasch
54	! preprocessor lines rearranged so that routines can also be used in serial
55	! (non-parallel) mode
56	!
57	! 1092 2013-02-02 11:24:22Z raasch
58	! unused variables removed
59	!
60	! 1036 2012-10-22 13:43:42Z raasch
61	! code put under GPL (PALM 3.9)
62	!
63	! 1003 2012-09-14 14:35:53Z raasch
64	! indices nxa, nya, etc. replaced by nx, ny, etc.
65	!
66	! Revision 1.1 1997/07/24 11:25:18 raasch
67	! Initial revision
68	!
69	!------------------------------------------------------------------------------!
70	! Description:
71	! ------------
72	! Resorting data for the transposition from x to y. The transposition itself
73	! is carried out in transpose_xy
74	!------------------------------------------------------------------------------!
75
76	USE indices, &
77	ONLY: nx
78
79	USE kinds
80
81	USE transpose_indices, &
82	ONLY: nxl_z, nxr_z, nyn_x, nyn_z, nys_x, nys_z, nzb_x, nzt_x
83
84	IMPLICIT NONE
85
86	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
87	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
88
89
90	INTEGER(iwp) :: i !:
91	INTEGER(iwp) :: j !:
92	INTEGER(iwp) :: k !:
93	!
94	!-- Rearrange indices of input array in order to make data to be send
95	!-- by MPI contiguous
96	!$OMP PARALLEL PRIVATE ( i, j, k )
97	!$OMP DO
98	!$acc kernels present( f_in, f_inv )
99	DO i = 0, nx
100	DO k = nzb_x, nzt_x
101	DO j = nys_x, nyn_x
102	f_inv(j,k,i) = f_in(i,j,k)
103	ENDDO
104	ENDDO
105	ENDDO
106	!$acc end kernels
107	!$OMP END PARALLEL
108
109	END SUBROUTINE resort_for_xy
110
111
112	SUBROUTINE transpose_xy( f_inv, f_out )
113
114	!------------------------------------------------------------------------------!
115	! Description:
116	! ------------
117	! Transposition of input array (f_in) from x to y. For the input array, all
118	! elements along x reside on the same PE, while after transposition, all
119	! elements along y reside on the same PE.
120	!------------------------------------------------------------------------------!
121
122	USE cpulog, &
123	ONLY: cpu_log, cpu_log_nowait, log_point_s
124
125	USE indices, &
126	ONLY: nx, ny
127
128	USE kinds
129
130	USE pegrid
131
132	USE transpose_indices, &
133	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
134
135	IMPLICIT NONE
136
137	INTEGER(iwp) :: i !:
138	INTEGER(iwp) :: j !:
139	INTEGER(iwp) :: k !:
140	INTEGER(iwp) :: l !:
141	INTEGER(iwp) :: ys !:
142
143	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
144	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
145
146	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !:
147
148
149	IF ( numprocs /= 1 ) THEN
150
151	#if defined( __parallel )
152	!
153	!-- Transpose array
154	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
155	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
156	!$acc update host( f_inv )
157	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
158	work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
159	comm1dy, ierr )
160	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
161
162	!
163	!-- Reorder transposed array
164	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
165	!$OMP DO
166	!$acc data copyin( work )
167	DO l = 0, pdims(2) - 1
168	ys = 0 + l * ( nyn_x - nys_x + 1 )
169	!$acc kernels present( f_out, work )
170	DO i = nxl_y, nxr_y
171	DO k = nzb_y, nzt_y
172	DO j = ys, ys + nyn_x - nys_x
173	f_out(j,i,k) = work(j-ys+1,k,i,l)
174	ENDDO
175	ENDDO
176	ENDDO
177	!$acc end kernels
178	ENDDO
179	!$acc end data
180	!$OMP END PARALLEL
181	#endif
182
183	ELSE
184
185	!
186	!-- Reorder transposed array
187	!$OMP PARALLEL PRIVATE ( i, j, k )
188	!$OMP DO
189	!$acc kernels present( f_inv, f_out )
190	DO k = nzb_y, nzt_y
191	DO i = nxl_y, nxr_y
192	DO j = 0, ny
193	f_out(j,i,k) = f_inv(j,k,i)
194	ENDDO
195	ENDDO
196	ENDDO
197	!$acc end kernels
198	!$OMP END PARALLEL
199
200	ENDIF
201
202	END SUBROUTINE transpose_xy
203
204
205	SUBROUTINE resort_for_xz( f_inv, f_out )
206
207	!------------------------------------------------------------------------------!
208	! Description:
209	! ------------
210	! Resorting data after the transposition from x to z. The transposition itself
211	! is carried out in transpose_xz
212	!------------------------------------------------------------------------------!
213
214	USE indices, &
215	ONLY: nxl, nxr, nyn, nys, nz
216
217	USE kinds
218
219	IMPLICIT NONE
220
221	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
222	REAL(wp) :: f_out(1:nz,nys:nyn,nxl:nxr) !:
223
224	INTEGER(iwp) :: i !:
225	INTEGER(iwp) :: j !:
226	INTEGER(iwp) :: k !:
227	!
228	!-- Rearrange indices of input array in order to make data to be send
229	!-- by MPI contiguous.
230	!-- In case of parallel fft/transposition, scattered store is faster in
231	!-- backward direction!!!
232	!$OMP PARALLEL PRIVATE ( i, j, k )
233	!$OMP DO
234	!$acc kernels present( f_inv, f_out )
235	DO k = 1, nz
236	DO i = nxl, nxr
237	DO j = nys, nyn
238	f_out(k,j,i) = f_inv(j,i,k)
239	ENDDO
240	ENDDO
241	ENDDO
242	!$acc end kernels
243	!$OMP END PARALLEL
244
245	END SUBROUTINE resort_for_xz
246
247
248	SUBROUTINE transpose_xz( f_in, f_inv )
249
250	!------------------------------------------------------------------------------!
251	! Description:
252	! ------------
253	! Transposition of input array (f_in) from x to z. For the input array, all
254	! elements along x reside on the same PE, while after transposition, all
255	! elements along z reside on the same PE.
256	!------------------------------------------------------------------------------!
257
258	USE cpulog, &
259	ONLY: cpu_log, cpu_log_nowait, log_point_s
260
261	USE indices, &
262	ONLY: nnx, nx, nxl, nxr, ny, nyn, nys, nz
263
264	USE kinds
265
266	USE pegrid
267
268	USE transpose_indices, &
269	ONLY: nyn_x, nys_x, nzb_x, nzt_x
270
271	IMPLICIT NONE
272
273	INTEGER(iwp) :: i !:
274	INTEGER(iwp) :: j !:
275	INTEGER(iwp) :: k !:
276	INTEGER(iwp) :: l !:
277	INTEGER(iwp) :: xs !:
278
279	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
280	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
281
282	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !:
283
284
285	!
286	!-- If the PE grid is one-dimensional along y, the array has only to be
287	!-- reordered locally and therefore no transposition has to be done.
288	IF ( pdims(1) /= 1 ) THEN
289
290	#if defined( __parallel )
291	!
292	!-- Reorder input array for transposition
293	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
294	!$OMP DO
295	!$acc data copyout( work )
296	DO l = 0, pdims(1) - 1
297	xs = 0 + l * nnx
298	!$acc kernels present( f_in, work )
299	DO k = nzb_x, nzt_x
300	DO i = xs, xs + nnx - 1
301	DO j = nys_x, nyn_x
302	work(j,i-xs+1,k,l) = f_in(i,j,k)
303	ENDDO
304	ENDDO
305	ENDDO
306	!$acc end kernels
307	ENDDO
308	!$acc end data
309	!$OMP END PARALLEL
310
311	!
312	!-- Transpose array
313	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
314	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
315	CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
316	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
317	comm1dx, ierr )
318	!$acc update device( f_inv )
319	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
320	#endif
321
322	ELSE
323
324	!
325	!-- Reorder the array in a way that the z index is in first position
326	!$OMP PARALLEL PRIVATE ( i, j, k )
327	!$OMP DO
328	!$acc kernels present( f_in, f_inv )
329	DO i = nxl, nxr
330	DO j = nys, nyn
331	DO k = 1, nz
332	f_inv(j,i,k) = f_in(i,j,k)
333	ENDDO
334	ENDDO
335	ENDDO
336	!$acc end kernels
337	!$OMP END PARALLEL
338
339	ENDIF
340
341	END SUBROUTINE transpose_xz
342
343
344	SUBROUTINE resort_for_yx( f_inv, f_out )
345
346	!------------------------------------------------------------------------------!
347	! Description:
348	! ------------
349	! Resorting data after the transposition from y to x. The transposition itself
350	! is carried out in transpose_yx
351	!------------------------------------------------------------------------------!
352
353	USE indices, &
354	ONLY: nx
355
356	USE kinds
357
358	USE transpose_indices, &
359	ONLY: nyn_x, nys_x, nzb_x, nzt_x
360
361	IMPLICIT NONE
362
363	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
364	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
365
366
367	INTEGER(iwp) :: i !:
368	INTEGER(iwp) :: j !:
369	INTEGER(iwp) :: k !:
370	!
371	!-- Rearrange indices of input array in order to make data to be send
372	!-- by MPI contiguous
373	!$OMP PARALLEL PRIVATE ( i, j, k )
374	!$OMP DO
375	!$acc kernels present( f_inv, f_out )
376	DO i = 0, nx
377	DO k = nzb_x, nzt_x
378	DO j = nys_x, nyn_x
379	f_out(i,j,k) = f_inv(j,k,i)
380	ENDDO
381	ENDDO
382	ENDDO
383	!$acc end kernels
384	!$OMP END PARALLEL
385
386	END SUBROUTINE resort_for_yx
387
388
389	SUBROUTINE transpose_yx( f_in, f_inv )
390
391	!------------------------------------------------------------------------------!
392	! Description:
393	! ------------
394	! Transposition of input array (f_in) from y to x. For the input array, all
395	! elements along y reside on the same PE, while after transposition, all
396	! elements along x reside on the same PE.
397	!------------------------------------------------------------------------------!
398
399	USE cpulog, &
400	ONLY: cpu_log, cpu_log_nowait, log_point_s
401
402	USE indices, &
403	ONLY: nx, ny
404
405	USE kinds
406
407	USE pegrid
408
409	USE transpose_indices, &
410	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
411
412	IMPLICIT NONE
413
414	INTEGER(iwp) :: i !:
415	INTEGER(iwp) :: j !:
416	INTEGER(iwp) :: k !:
417	INTEGER(iwp) :: l !:
418	INTEGER(iwp) :: ys !:
419
420	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
421	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !:
422
423	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !:
424
425
426	IF ( numprocs /= 1 ) THEN
427
428	#if defined( __parallel )
429	!
430	!-- Reorder input array for transposition
431	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
432	!$OMP DO
433	!$acc data copyout( work )
434	DO l = 0, pdims(2) - 1
435	ys = 0 + l * ( nyn_x - nys_x + 1 )
436	!$acc kernels present( f_in, work )
437	DO i = nxl_y, nxr_y
438	DO k = nzb_y, nzt_y
439	DO j = ys, ys + nyn_x - nys_x
440	work(j-ys+1,k,i,l) = f_in(j,i,k)
441	ENDDO
442	ENDDO
443	ENDDO
444	!$acc end kernels
445	ENDDO
446	!$acc end data
447	!$OMP END PARALLEL
448
449	!
450	!-- Transpose array
451	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
452	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
453	CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
454	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
455	comm1dy, ierr )
456	!$acc update device( f_inv )
457	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
458	#endif
459
460	ELSE
461
462	!
463	!-- Reorder array f_in the same way as ALLTOALL did it
464	!$OMP PARALLEL PRIVATE ( i, j, k )
465	!$OMP DO
466	!$acc kernels present( f_in, f_inv )
467	DO i = nxl_y, nxr_y
468	DO k = nzb_y, nzt_y
469	DO j = 0, ny
470	f_inv(j,k,i) = f_in(j,i,k)
471	ENDDO
472	ENDDO
473	ENDDO
474	!$acc end kernels
475	!$OMP END PARALLEL
476
477	ENDIF
478
479	END SUBROUTINE transpose_yx
480
481
482	SUBROUTINE transpose_yxd( f_in, f_out )
483
484	!------------------------------------------------------------------------------!
485	! Description:
486	! ------------
487	! Transposition of input array (f_in) from y to x. For the input array, all
488	! elements along y reside on the same PE, while after transposition, all
489	! elements along x reside on the same PE.
490	! This is a direct transposition for arrays with indices in regular order
491	! (k,j,i) (cf. transpose_yx).
492	!------------------------------------------------------------------------------!
493
494	USE cpulog, &
495	ONLY: cpu_log, cpu_log_nowait, log_point_s
496
497	USE indices, &
498	ONLY: nnx, nny, nnz, nx, nxl, nxr, nyn, nys, nz
499
500	USE kinds
501
502	USE pegrid
503
504	USE transpose_indices, &
505	ONLY: nyn_x, nys_x, nzb_x, nzt_x
506
507	IMPLICIT NONE
508
509	INTEGER(iwp) :: i !:
510	INTEGER(iwp) :: j !:
511	INTEGER(iwp) :: k !:
512	INTEGER(iwp) :: l !:
513	INTEGER(iwp) :: m !:
514	INTEGER(iwp) :: xs !:
515
516	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !:
517	REAL(wp) :: f_inv(nxl:nxr,1:nz,nys:nyn) !:
518	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
519	REAL(wp) :: work(nnxnnynnz) !:
520	#if defined( __parallel )
521
522	!
523	!-- Rearrange indices of input array in order to make data to be send
524	!-- by MPI contiguous
525	DO k = 1, nz
526	DO j = nys, nyn
527	DO i = nxl, nxr
528	f_inv(i,k,j) = f_in(k,j,i)
529	ENDDO
530	ENDDO
531	ENDDO
532
533	!
534	!-- Transpose array
535	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
536	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
537	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
538	work(1), sendrecvcount_xy, MPI_REAL, &
539	comm1dx, ierr )
540	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
541
542	!
543	!-- Reorder transposed array
544	m = 0
545	DO l = 0, pdims(1) - 1
546	xs = 0 + l * nnx
547	DO j = nys_x, nyn_x
548	DO k = 1, nz
549	DO i = xs, xs + nnx - 1
550	m = m + 1
551	f_out(i,j,k) = work(m)
552	ENDDO
553	ENDDO
554	ENDDO
555	ENDDO
556
557	#endif
558
559	END SUBROUTINE transpose_yxd
560
561
562	SUBROUTINE resort_for_yz( f_in, f_inv )
563
564	!------------------------------------------------------------------------------!
565	! Description:
566	! ------------
567	! Resorting data for the transposition from y to z. The transposition itself
568	! is carried out in transpose_yz
569	!------------------------------------------------------------------------------!
570
571	USE indices, &
572	ONLY: ny
573
574	USE kinds
575
576	USE transpose_indices, &
577	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
578
579	IMPLICIT NONE
580
581	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
582	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
583
584	INTEGER(iwp) :: i !:
585	INTEGER(iwp) :: j !:
586	INTEGER(iwp) :: k !:
587
588	!
589	!-- Rearrange indices of input array in order to make data to be send
590	!-- by MPI contiguous
591	!$OMP PARALLEL PRIVATE ( i, j, k )
592	!$OMP DO
593	!$acc kernels present( f_in, f_inv )
594	DO j = 0, ny
595	DO k = nzb_y, nzt_y
596	DO i = nxl_y, nxr_y
597	f_inv(i,k,j) = f_in(j,i,k)
598	ENDDO
599	ENDDO
600	ENDDO
601	!$acc end kernels
602	!$OMP END PARALLEL
603
604	END SUBROUTINE resort_for_yz
605
606
607	SUBROUTINE transpose_yz( f_inv, f_out )
608
609	!------------------------------------------------------------------------------!
610	! Description:
611	! ------------
612	! Transposition of input array (f_in) from y to z. For the input array, all
613	! elements along y reside on the same PE, while after transposition, all
614	! elements along z reside on the same PE.
615	!------------------------------------------------------------------------------!
616
617	USE cpulog, &
618	ONLY: cpu_log, cpu_log_nowait, log_point_s
619
620	USE indices, &
621	ONLY: ny, nz
622
623	USE kinds
624
625	USE pegrid
626
627	USE transpose_indices, &
628	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
629
630	IMPLICIT NONE
631
632	INTEGER(iwp) :: i !:
633	INTEGER(iwp) :: j !:
634	INTEGER(iwp) :: k !:
635	INTEGER(iwp) :: l !:
636	INTEGER(iwp) :: zs !:
637
638	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
639	REAL(wp) :: f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !:
640
641	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !:
642
643
644	!
645	!-- If the PE grid is one-dimensional along y, only local reordering
646	!-- of the data is necessary and no transposition has to be done.
647	IF ( pdims(1) == 1 ) THEN
648
649	!$OMP PARALLEL PRIVATE ( i, j, k )
650	!$OMP DO
651	!$acc kernels present( f_inv, f_out )
652	DO j = 0, ny
653	DO k = nzb_y, nzt_y
654	DO i = nxl_y, nxr_y
655	f_out(i,j,k) = f_inv(i,k,j)
656	ENDDO
657	ENDDO
658	ENDDO
659	!$acc end kernels
660	!$OMP END PARALLEL
661
662	ELSE
663
664	#if defined( __parallel )
665	!
666	!-- Transpose array
667	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
668	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
669	!$acc update host( f_inv )
670	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
671	work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
672	comm1dx, ierr )
673	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
674
675	!
676	!-- Reorder transposed array
677	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
678	!$OMP DO
679	!$acc data copyin( work )
680	DO l = 0, pdims(1) - 1
681	zs = 1 + l * ( nzt_y - nzb_y + 1 )
682	!$acc kernels present( f_out )
683	DO j = nys_z, nyn_z
684	DO k = zs, zs + nzt_y - nzb_y
685	DO i = nxl_z, nxr_z
686	f_out(i,j,k) = work(i,k-zs+1,j,l)
687	ENDDO
688	ENDDO
689	ENDDO
690	!$acc end kernels
691	ENDDO
692	!$acc end data
693	!$OMP END PARALLEL
694	#endif
695
696	ENDIF
697
698	END SUBROUTINE transpose_yz
699
700
701	SUBROUTINE resort_for_zx( f_in, f_inv )
702
703	!------------------------------------------------------------------------------!
704	! Description:
705	! ------------
706	! Resorting data for the transposition from z to x. The transposition itself
707	! is carried out in transpose_zx
708	!------------------------------------------------------------------------------!
709
710	USE indices, &
711	ONLY: nxl, nxr, nyn, nys, nz
712
713	USE kinds
714
715	IMPLICIT NONE
716
717	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !:
718	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
719
720	INTEGER(iwp) :: i !:
721	INTEGER(iwp) :: j !:
722	INTEGER(iwp) :: k !:
723
724	!
725	!-- Rearrange indices of input array in order to make data to be send
726	!-- by MPI contiguous
727	!$OMP PARALLEL PRIVATE ( i, j, k )
728	!$OMP DO
729	!$acc kernels present( f_in, f_inv )
730	DO k = 1,nz
731	DO i = nxl, nxr
732	DO j = nys, nyn
733	f_inv(j,i,k) = f_in(k,j,i)
734	ENDDO
735	ENDDO
736	ENDDO
737	!$acc end kernels
738	!$OMP END PARALLEL
739
740	END SUBROUTINE resort_for_zx
741
742
743	SUBROUTINE transpose_zx( f_inv, f_out )
744
745	!------------------------------------------------------------------------------!
746	! Description:
747	! ------------
748	! Transposition of input array (f_in) from z to x. For the input array, all
749	! elements along z reside on the same PE, while after transposition, all
750	! elements along x reside on the same PE.
751	!------------------------------------------------------------------------------!
752
753	USE cpulog, &
754	ONLY: cpu_log, cpu_log_nowait, log_point_s
755
756	USE indices, &
757	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
758
759	USE kinds
760
761	USE pegrid
762
763	USE transpose_indices, &
764	ONLY: nyn_x, nys_x, nzb_x, nzt_x
765
766	IMPLICIT NONE
767
768	INTEGER(iwp) :: i !:
769	INTEGER(iwp) :: j !:
770	INTEGER(iwp) :: k !:
771	INTEGER(iwp) :: l !:
772	INTEGER(iwp) :: xs !:
773
774	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
775	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !:
776
777	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !:
778
779
780	!
781	!-- If the PE grid is one-dimensional along y, only local reordering
782	!-- of the data is necessary and no transposition has to be done.
783	IF ( pdims(1) == 1 ) THEN
784
785	!$OMP PARALLEL PRIVATE ( i, j, k )
786	!$OMP DO
787	!$acc kernels present( f_inv, f_out )
788	DO k = 1, nz
789	DO i = nxl, nxr
790	DO j = nys, nyn
791	f_out(i,j,k) = f_inv(j,i,k)
792	ENDDO
793	ENDDO
794	ENDDO
795	!$acc end kernels
796	!$OMP END PARALLEL
797
798	ELSE
799
800	#if defined( __parallel )
801	!
802	!-- Transpose array
803	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
804	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
805	!$acc update host( f_inv )
806	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
807	work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
808	comm1dx, ierr )
809	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
810
811	!
812	!-- Reorder transposed array
813	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
814	!$OMP DO
815	!$acc data copyin( work )
816	DO l = 0, pdims(1) - 1
817	xs = 0 + l * nnx
818	!$acc kernels present( f_out )
819	DO k = nzb_x, nzt_x
820	DO i = xs, xs + nnx - 1
821	DO j = nys_x, nyn_x
822	f_out(i,j,k) = work(j,i-xs+1,k,l)
823	ENDDO
824	ENDDO
825	ENDDO
826	!$acc end kernels
827	ENDDO
828	!$acc end data
829	!$OMP END PARALLEL
830	#endif
831
832	ENDIF
833
834	END SUBROUTINE transpose_zx
835
836
837	SUBROUTINE resort_for_zy( f_inv, f_out )
838
839	!------------------------------------------------------------------------------!
840	! Description:
841	! ------------
842	! Resorting data after the transposition from z to y. The transposition itself
843	! is carried out in transpose_zy
844	!------------------------------------------------------------------------------!
845
846	USE indices, &
847	ONLY: ny
848
849	USE kinds
850
851	USE transpose_indices, &
852	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
853
854	IMPLICIT NONE
855
856	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
857	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !:
858
859
860	INTEGER(iwp) :: i !:
861	INTEGER(iwp) :: j !:
862	INTEGER(iwp) :: k !:
863
864	!
865	!-- Rearrange indices of input array in order to make data to be send
866	!-- by MPI contiguous
867	!$OMP PARALLEL PRIVATE ( i, j, k )
868	!$OMP DO
869	!$acc kernels present( f_inv, f_out )
870	DO k = nzb_y, nzt_y
871	DO j = 0, ny
872	DO i = nxl_y, nxr_y
873	f_out(j,i,k) = f_inv(i,k,j)
874	ENDDO
875	ENDDO
876	ENDDO
877	!$acc end kernels
878	!$OMP END PARALLEL
879
880	END SUBROUTINE resort_for_zy
881
882
883	SUBROUTINE transpose_zy( f_in, f_inv )
884
885	!------------------------------------------------------------------------------!
886	! Description:
887	! ------------
888	! Transposition of input array (f_in) from z to y. For the input array, all
889	! elements along z reside on the same PE, while after transposition, all
890	! elements along y reside on the same PE.
891	!------------------------------------------------------------------------------!
892
893	USE cpulog, &
894	ONLY: cpu_log, cpu_log_nowait, log_point_s
895
896	USE indices, &
897	ONLY: ny, nz
898
899	USE kinds
900
901	USE pegrid
902
903	USE transpose_indices, &
904	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
905
906	IMPLICIT NONE
907
908	INTEGER(iwp) :: i !:
909	INTEGER(iwp) :: j !:
910	INTEGER(iwp) :: k !:
911	INTEGER(iwp) :: l !:
912	INTEGER(iwp) :: zs !:
913
914	REAL(wp) :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !:
915	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !:
916
917	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !:
918
919	!
920	!-- If the PE grid is one-dimensional along y, the array has only to be
921	!-- reordered locally and therefore no transposition has to be done.
922	IF ( pdims(1) /= 1 ) THEN
923
924	#if defined( __parallel )
925	!
926	!-- Reorder input array for transposition
927	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
928	!$OMP DO
929	!$acc data copyout( work )
930	DO l = 0, pdims(1) - 1
931	zs = 1 + l * ( nzt_y - nzb_y + 1 )
932	!$acc kernels present( f_in, work )
933	DO j = nys_z, nyn_z
934	DO k = zs, zs + nzt_y - nzb_y
935	DO i = nxl_z, nxr_z
936	work(i,k-zs+1,j,l) = f_in(i,j,k)
937	ENDDO
938	ENDDO
939	ENDDO
940	!$acc end kernels
941	ENDDO
942	!$acc end data
943	!$OMP END PARALLEL
944
945	!
946	!-- Transpose array
947	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
948	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
949	CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
950	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
951	comm1dx, ierr )
952	!$acc update device( f_inv )
953	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
954	#endif
955
956	ELSE
957	!
958	!-- Reorder the array in the same way like ALLTOALL did it
959	!$OMP PARALLEL PRIVATE ( i, j, k )
960	!$OMP DO
961	!$acc kernels present( f_in, f_inv )
962	DO k = nzb_y, nzt_y
963	DO j = 0, ny
964	DO i = nxl_y, nxr_y
965	f_inv(i,k,j) = f_in(i,j,k)
966	ENDDO
967	ENDDO
968	ENDDO
969	!$acc end kernels
970	!$OMP END PARALLEL
971
972	ENDIF
973
974	END SUBROUTINE transpose_zy
975
976
977	SUBROUTINE transpose_zyd( f_in, f_out )
978
979	!------------------------------------------------------------------------------!
980	! Description:
981	! ------------
982	! Transposition of input array (f_in) from z to y. For the input array, all
983	! elements along z reside on the same PE, while after transposition, all
984	! elements along y reside on the same PE.
985	! This is a direct transposition for arrays with indices in regular order
986	! (k,j,i) (cf. transpose_zy).
987	!------------------------------------------------------------------------------!
988
989	USE cpulog, &
990	ONLY: cpu_log, cpu_log_nowait, log_point_s
991
992	USE indices, &
993	ONLY: nnx, nny, nnz, nxl, nxr, nyn, nys, ny, nz
994
995	USE kinds
996
997	USE pegrid
998
999	USE transpose_indices, &
1000	ONLY: nxl_y, nxl_yd, nxr_y, nxr_yd, nzb_y, nzb_yd, nzt_y, nzt_yd
1001
1002	IMPLICIT NONE
1003
1004	INTEGER(iwp) :: i !:
1005	INTEGER(iwp) :: j !:
1006	INTEGER(iwp) :: k !:
1007	INTEGER(iwp) :: l !:
1008	INTEGER(iwp) :: m !:
1009	INTEGER(iwp) :: ys !:
1010
1011	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !:
1012	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !:
1013	REAL(wp) :: f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd) !:
1014	REAL(wp) :: work(nnxnnynnz) !:
1015
1016	#if defined( __parallel )
1017
1018	!
1019	!-- Rearrange indices of input array in order to make data to be send
1020	!-- by MPI contiguous
1021	DO i = nxl, nxr
1022	DO j = nys, nyn
1023	DO k = 1, nz
1024	f_inv(j,i,k) = f_in(k,j,i)
1025	ENDDO
1026	ENDDO
1027	ENDDO
1028
1029	!
1030	!-- Move data to different array, because memory location of work1 is
1031	!-- needed further below (work1 = work2).
1032	!-- If the PE grid is one-dimensional along x, only local reordering
1033	!-- of the data is necessary and no transposition has to be done.
1034	IF ( pdims(2) == 1 ) THEN
1035	DO k = 1, nz
1036	DO i = nxl, nxr
1037	DO j = nys, nyn
1038	f_out(j,i,k) = f_inv(j,i,k)
1039	ENDDO
1040	ENDDO
1041	ENDDO
1042	RETURN
1043	ENDIF
1044
1045	!
1046	!-- Transpose array
1047	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1048	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1049	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
1050	work(1), sendrecvcount_zyd, MPI_REAL, &
1051	comm1dy, ierr )
1052	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1053
1054	!
1055	!-- Reorder transposed array
1056	m = 0
1057	DO l = 0, pdims(2) - 1
1058	ys = 0 + l * nny
1059	DO k = nzb_yd, nzt_yd
1060	DO i = nxl_yd, nxr_yd
1061	DO j = ys, ys + nny - 1
1062	m = m + 1
1063	f_out(j,i,k) = work(m)
1064	ENDDO
1065	ENDDO
1066	ENDDO
1067	ENDDO
1068
1069	#endif
1070
1071	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |