Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 2699

Last change on this file since 2699 was 2696, checked in by kanani, 7 years ago
Merge of branch palm4u into trunk
Property svn:keywords set to `Id`
File size: 29.6 KB

Line
1	!> @file transpose.f90
2	!------------------------------------------------------------------------------!
3	! This file is part of the PALM model system.
4	!
5	! PALM is free software: you can redistribute it and/or modify it under the
6	! terms of the GNU General Public License as published by the Free Software
7	! Foundation, either version 3 of the License, or (at your option) any later
8	! version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2017 Leibniz Universitaet Hannover
18	!------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	!
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: transpose.f90 2696 2017-12-14 17:12:51Z kanani $
27	!
28	! 2118 2017-01-17 16:38:49Z raasch
29	! OpenACC directives removed
30	!
31	! 2000 2016-08-20 18:09:15Z knoop
32	! Forced header and separation lines into 80 columns
33	!
34	! 1682 2015-10-07 23:56:08Z knoop
35	! Code annotations made doxygen readable
36	!
37	! 1324 2014-03-21 09:13:16Z suehring
38	! Bugfix: ONLY statement for module pegrid removed
39	!
40	! 1320 2014-03-20 08:40:49Z raasch
41	! ONLY-attribute added to USE-statements,
42	! kind-parameters added to all INTEGER and REAL declaration statements,
43	! kinds are defined in new module kinds,
44	! old module precision_kind is removed,
45	! revision history before 2012 removed,
46	! comment fields (!:) to be used for variable explanations added to
47	! all variable declaration statements
48	!
49	! 1318 2014-03-17 13:35:16Z raasch
50	! cpu_log_nowait parameter added to cpu measurements of the transpositions
51	! required for solving the Poisson equation (poisfft),
52	! module interfaces removed
53	!
54	! 1257 2013-11-08 15:18:40Z raasch
55	! openacc loop and loop vector clauses removed
56	!
57	! 1216 2013-08-26 09:31:42Z raasch
58	! re-sorting of the transposed / to be transposed arrays moved to separate
59	! routines resort_for_...
60	!
61	! 1111 2013-03-08 23:54:10Z raasch
62	! openACC directives added,
63	! resorting data from/to work changed, work got 4 dimensions instead of 1
64	!
65	! 1106 2013-03-04 05:31:38Z raasch
66	! preprocessor lines rearranged so that routines can also be used in serial
67	! (non-parallel) mode
68	!
69	! 1092 2013-02-02 11:24:22Z raasch
70	! unused variables removed
71	!
72	! 1036 2012-10-22 13:43:42Z raasch
73	! code put under GPL (PALM 3.9)
74	!
75	! 1003 2012-09-14 14:35:53Z raasch
76	! indices nxa, nya, etc. replaced by nx, ny, etc.
77	!
78	! Revision 1.1 1997/07/24 11:25:18 raasch
79	! Initial revision
80	!
81	!------------------------------------------------------------------------------!
82	! Description:
83	! ------------
84	!> Resorting data for the transposition from x to y. The transposition itself
85	!> is carried out in transpose_xy
86	!------------------------------------------------------------------------------!
87	SUBROUTINE resort_for_xy( f_in, f_inv )
88
89
90	USE indices, &
91	ONLY: nx
92
93	USE kinds
94
95	USE transpose_indices, &
96	ONLY: nxl_z, nxr_z, nyn_x, nyn_z, nys_x, nys_z, nzb_x, nzt_x
97
98	IMPLICIT NONE
99
100	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
101	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
102
103
104	INTEGER(iwp) :: i !<
105	INTEGER(iwp) :: j !<
106	INTEGER(iwp) :: k !<
107	!
108	!-- Rearrange indices of input array in order to make data to be send
109	!-- by MPI contiguous
110	!$OMP PARALLEL PRIVATE ( i, j, k )
111	!$OMP DO
112	DO i = 0, nx
113	DO k = nzb_x, nzt_x
114	DO j = nys_x, nyn_x
115	f_inv(j,k,i) = f_in(i,j,k)
116	ENDDO
117	ENDDO
118	ENDDO
119	!$OMP END PARALLEL
120
121	END SUBROUTINE resort_for_xy
122
123
124	!------------------------------------------------------------------------------!
125	! Description:
126	! ------------
127	!> Transposition of input array (f_in) from x to y. For the input array, all
128	!> elements along x reside on the same PE, while after transposition, all
129	!> elements along y reside on the same PE.
130	!------------------------------------------------------------------------------!
131	SUBROUTINE transpose_xy( f_inv, f_out )
132
133
134	USE cpulog, &
135	ONLY: cpu_log, cpu_log_nowait, log_point_s
136
137	USE indices, &
138	ONLY: nx, ny
139
140	USE kinds
141
142	USE pegrid
143
144	USE transpose_indices, &
145	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
146
147	IMPLICIT NONE
148
149	INTEGER(iwp) :: i !<
150	INTEGER(iwp) :: j !<
151	INTEGER(iwp) :: k !<
152	INTEGER(iwp) :: l !<
153	INTEGER(iwp) :: ys !<
154
155	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
156	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
157
158	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
159
160
161	IF ( numprocs /= 1 ) THEN
162
163	#if defined( __parallel )
164	!
165	!-- Transpose array
166	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
167	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
168	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
169	work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
170	comm1dy, ierr )
171	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
172
173	!
174	!-- Reorder transposed array
175	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
176	!$OMP DO
177	DO l = 0, pdims(2) - 1
178	ys = 0 + l * ( nyn_x - nys_x + 1 )
179	DO i = nxl_y, nxr_y
180	DO k = nzb_y, nzt_y
181	DO j = ys, ys + nyn_x - nys_x
182	f_out(j,i,k) = work(j-ys+1,k,i,l)
183	ENDDO
184	ENDDO
185	ENDDO
186	ENDDO
187	!$OMP END PARALLEL
188	#endif
189
190	ELSE
191
192	!
193	!-- Reorder transposed array
194	!$OMP PARALLEL PRIVATE ( i, j, k )
195	!$OMP DO
196	DO k = nzb_y, nzt_y
197	DO i = nxl_y, nxr_y
198	DO j = 0, ny
199	f_out(j,i,k) = f_inv(j,k,i)
200	ENDDO
201	ENDDO
202	ENDDO
203	!$OMP END PARALLEL
204
205	ENDIF
206
207	END SUBROUTINE transpose_xy
208
209
210	!------------------------------------------------------------------------------!
211	! Description:
212	! ------------
213	!> Resorting data after the transposition from x to z. The transposition itself
214	!> is carried out in transpose_xz
215	!------------------------------------------------------------------------------!
216	SUBROUTINE resort_for_xz( f_inv, f_out )
217
218
219	USE indices, &
220	ONLY: nxl, nxr, nyn, nys, nz
221
222	USE kinds
223
224	IMPLICIT NONE
225
226	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
227	REAL(wp) :: f_out(1:nz,nys:nyn,nxl:nxr) !<
228
229	INTEGER(iwp) :: i !<
230	INTEGER(iwp) :: j !<
231	INTEGER(iwp) :: k !<
232	!
233	!-- Rearrange indices of input array in order to make data to be send
234	!-- by MPI contiguous.
235	!-- In case of parallel fft/transposition, scattered store is faster in
236	!-- backward direction!!!
237	!$OMP PARALLEL PRIVATE ( i, j, k )
238	!$OMP DO
239	DO k = 1, nz
240	DO i = nxl, nxr
241	DO j = nys, nyn
242	f_out(k,j,i) = f_inv(j,i,k)
243	ENDDO
244	ENDDO
245	ENDDO
246	!$OMP END PARALLEL
247
248	END SUBROUTINE resort_for_xz
249
250
251	!------------------------------------------------------------------------------!
252	! Description:
253	! ------------
254	!> Transposition of input array (f_in) from x to z. For the input array, all
255	!> elements along x reside on the same PE, while after transposition, all
256	!> elements along z reside on the same PE.
257	!------------------------------------------------------------------------------!
258	SUBROUTINE transpose_xz( f_in, f_inv )
259
260
261	USE cpulog, &
262	ONLY: cpu_log, cpu_log_nowait, log_point_s
263
264	USE indices, &
265	ONLY: nnx, nx, nxl, nxr, ny, nyn, nys, nz
266
267	USE kinds
268
269	USE pegrid
270
271	USE transpose_indices, &
272	ONLY: nyn_x, nys_x, nzb_x, nzt_x
273
274	IMPLICIT NONE
275
276	INTEGER(iwp) :: i !<
277	INTEGER(iwp) :: j !<
278	INTEGER(iwp) :: k !<
279	INTEGER(iwp) :: l !<
280	INTEGER(iwp) :: xs !<
281
282	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
283	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
284
285	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
286
287
288	!
289	!-- If the PE grid is one-dimensional along y, the array has only to be
290	!-- reordered locally and therefore no transposition has to be done.
291	IF ( pdims(1) /= 1 ) THEN
292
293	#if defined( __parallel )
294	!
295	!-- Reorder input array for transposition
296	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
297	!$OMP DO
298	DO l = 0, pdims(1) - 1
299	xs = 0 + l * nnx
300	DO k = nzb_x, nzt_x
301	DO i = xs, xs + nnx - 1
302	DO j = nys_x, nyn_x
303	work(j,i-xs+1,k,l) = f_in(i,j,k)
304	ENDDO
305	ENDDO
306	ENDDO
307	ENDDO
308	!$OMP END PARALLEL
309
310	!
311	!-- Transpose array
312	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
313	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
314	CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
315	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
316	comm1dx, ierr )
317	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
318	#endif
319
320	ELSE
321
322	!
323	!-- Reorder the array in a way that the z index is in first position
324	!$OMP PARALLEL PRIVATE ( i, j, k )
325	!$OMP DO
326	DO i = nxl, nxr
327	DO j = nys, nyn
328	DO k = 1, nz
329	f_inv(j,i,k) = f_in(i,j,k)
330	ENDDO
331	ENDDO
332	ENDDO
333	!$OMP END PARALLEL
334
335	ENDIF
336
337	END SUBROUTINE transpose_xz
338
339
340	!------------------------------------------------------------------------------!
341	! Description:
342	! ------------
343	!> Resorting data after the transposition from y to x. The transposition itself
344	!> is carried out in transpose_yx
345	!------------------------------------------------------------------------------!
346	SUBROUTINE resort_for_yx( f_inv, f_out )
347
348
349	USE indices, &
350	ONLY: nx
351
352	USE kinds
353
354	USE transpose_indices, &
355	ONLY: nyn_x, nys_x, nzb_x, nzt_x
356
357	IMPLICIT NONE
358
359	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
360	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
361
362
363	INTEGER(iwp) :: i !<
364	INTEGER(iwp) :: j !<
365	INTEGER(iwp) :: k !<
366	!
367	!-- Rearrange indices of input array in order to make data to be send
368	!-- by MPI contiguous
369	!$OMP PARALLEL PRIVATE ( i, j, k )
370	!$OMP DO
371	DO i = 0, nx
372	DO k = nzb_x, nzt_x
373	DO j = nys_x, nyn_x
374	f_out(i,j,k) = f_inv(j,k,i)
375	ENDDO
376	ENDDO
377	ENDDO
378	!$OMP END PARALLEL
379
380	END SUBROUTINE resort_for_yx
381
382
383	!------------------------------------------------------------------------------!
384	! Description:
385	! ------------
386	!> Transposition of input array (f_in) from y to x. For the input array, all
387	!> elements along y reside on the same PE, while after transposition, all
388	!> elements along x reside on the same PE.
389	!------------------------------------------------------------------------------!
390	SUBROUTINE transpose_yx( f_in, f_inv )
391
392
393	USE cpulog, &
394	ONLY: cpu_log, cpu_log_nowait, log_point_s
395
396	USE indices, &
397	ONLY: nx, ny
398
399	USE kinds
400
401	USE pegrid
402
403	USE transpose_indices, &
404	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
405
406	IMPLICIT NONE
407
408	INTEGER(iwp) :: i !<
409	INTEGER(iwp) :: j !<
410	INTEGER(iwp) :: k !<
411	INTEGER(iwp) :: l !<
412	INTEGER(iwp) :: ys !<
413
414	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
415	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
416
417	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
418
419
420	IF ( numprocs /= 1 ) THEN
421
422	#if defined( __parallel )
423	!
424	!-- Reorder input array for transposition
425	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
426	!$OMP DO
427	DO l = 0, pdims(2) - 1
428	ys = 0 + l * ( nyn_x - nys_x + 1 )
429	DO i = nxl_y, nxr_y
430	DO k = nzb_y, nzt_y
431	DO j = ys, ys + nyn_x - nys_x
432	work(j-ys+1,k,i,l) = f_in(j,i,k)
433	ENDDO
434	ENDDO
435	ENDDO
436	ENDDO
437	!$OMP END PARALLEL
438
439	!
440	!-- Transpose array
441	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
442	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
443	CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
444	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
445	comm1dy, ierr )
446	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
447	#endif
448
449	ELSE
450
451	!
452	!-- Reorder array f_in the same way as ALLTOALL did it
453	!$OMP PARALLEL PRIVATE ( i, j, k )
454	!$OMP DO
455	DO i = nxl_y, nxr_y
456	DO k = nzb_y, nzt_y
457	DO j = 0, ny
458	f_inv(j,k,i) = f_in(j,i,k)
459	ENDDO
460	ENDDO
461	ENDDO
462	!$OMP END PARALLEL
463
464	ENDIF
465
466	END SUBROUTINE transpose_yx
467
468
469	!------------------------------------------------------------------------------!
470	! Description:
471	! ------------
472	!> Transposition of input array (f_in) from y to x. For the input array, all
473	!> elements along y reside on the same PE, while after transposition, all
474	!> elements along x reside on the same PE.
475	!> This is a direct transposition for arrays with indices in regular order
476	!> (k,j,i) (cf. transpose_yx).
477	!------------------------------------------------------------------------------!
478	SUBROUTINE transpose_yxd( f_in, f_out )
479
480
481	USE cpulog, &
482	ONLY: cpu_log, cpu_log_nowait, log_point_s
483
484	USE indices, &
485	ONLY: nnx, nny, nnz, nx, nxl, nxr, nyn, nys, nz
486
487	USE kinds
488
489	USE pegrid
490
491	USE transpose_indices, &
492	ONLY: nyn_x, nys_x, nzb_x, nzt_x
493
494	IMPLICIT NONE
495
496	INTEGER(iwp) :: i !<
497	INTEGER(iwp) :: j !<
498	INTEGER(iwp) :: k !<
499	INTEGER(iwp) :: l !<
500	INTEGER(iwp) :: m !<
501	INTEGER(iwp) :: xs !<
502
503	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
504	REAL(wp) :: f_inv(nxl:nxr,1:nz,nys:nyn) !<
505	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
506	REAL(wp) :: work(nnxnnynnz) !<
507	#if defined( __parallel )
508
509	!
510	!-- Rearrange indices of input array in order to make data to be send
511	!-- by MPI contiguous
512	DO k = 1, nz
513	DO j = nys, nyn
514	DO i = nxl, nxr
515	f_inv(i,k,j) = f_in(k,j,i)
516	ENDDO
517	ENDDO
518	ENDDO
519
520	!
521	!-- Transpose array
522	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
523	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
524	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
525	work(1), sendrecvcount_xy, MPI_REAL, &
526	comm1dx, ierr )
527	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
528
529	!
530	!-- Reorder transposed array
531	m = 0
532	DO l = 0, pdims(1) - 1
533	xs = 0 + l * nnx
534	DO j = nys_x, nyn_x
535	DO k = 1, nz
536	DO i = xs, xs + nnx - 1
537	m = m + 1
538	f_out(i,j,k) = work(m)
539	ENDDO
540	ENDDO
541	ENDDO
542	ENDDO
543
544	#endif
545
546	END SUBROUTINE transpose_yxd
547
548
549	!------------------------------------------------------------------------------!
550	! Description:
551	! ------------
552	!> Resorting data for the transposition from y to z. The transposition itself
553	!> is carried out in transpose_yz
554	!------------------------------------------------------------------------------!
555	SUBROUTINE resort_for_yz( f_in, f_inv )
556
557
558	USE indices, &
559	ONLY: ny
560
561	USE kinds
562
563	USE transpose_indices, &
564	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
565
566	IMPLICIT NONE
567
568	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
569	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
570
571	INTEGER(iwp) :: i !<
572	INTEGER(iwp) :: j !<
573	INTEGER(iwp) :: k !<
574
575	!
576	!-- Rearrange indices of input array in order to make data to be send
577	!-- by MPI contiguous
578	!$OMP PARALLEL PRIVATE ( i, j, k )
579	!$OMP DO
580	DO j = 0, ny
581	DO k = nzb_y, nzt_y
582	DO i = nxl_y, nxr_y
583	f_inv(i,k,j) = f_in(j,i,k)
584	ENDDO
585	ENDDO
586	ENDDO
587	!$OMP END PARALLEL
588
589	END SUBROUTINE resort_for_yz
590
591
592	!------------------------------------------------------------------------------!
593	! Description:
594	! ------------
595	!> Transposition of input array (f_in) from y to z. For the input array, all
596	!> elements along y reside on the same PE, while after transposition, all
597	!> elements along z reside on the same PE.
598	!------------------------------------------------------------------------------!
599	SUBROUTINE transpose_yz( f_inv, f_out )
600
601
602	USE cpulog, &
603	ONLY: cpu_log, cpu_log_nowait, log_point_s
604
605	USE indices, &
606	ONLY: ny, nz
607
608	USE kinds
609
610	USE pegrid
611
612	USE transpose_indices, &
613	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
614
615	IMPLICIT NONE
616
617	INTEGER(iwp) :: i !<
618	INTEGER(iwp) :: j !<
619	INTEGER(iwp) :: k !<
620	INTEGER(iwp) :: l !<
621	INTEGER(iwp) :: zs !<
622
623	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
624	REAL(wp) :: f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
625
626	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
627
628
629	!
630	!-- If the PE grid is one-dimensional along y, only local reordering
631	!-- of the data is necessary and no transposition has to be done.
632	IF ( pdims(1) == 1 ) THEN
633
634	!$OMP PARALLEL PRIVATE ( i, j, k )
635	!$OMP DO
636	DO j = 0, ny
637	DO k = nzb_y, nzt_y
638	DO i = nxl_y, nxr_y
639	f_out(i,j,k) = f_inv(i,k,j)
640	ENDDO
641	ENDDO
642	ENDDO
643	!$OMP END PARALLEL
644
645	ELSE
646
647	#if defined( __parallel )
648	!
649	!-- Transpose array
650	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
651	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
652	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
653	work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
654	comm1dx, ierr )
655	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
656
657	!
658	!-- Reorder transposed array
659	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
660	!$OMP DO
661	DO l = 0, pdims(1) - 1
662	zs = 1 + l * ( nzt_y - nzb_y + 1 )
663	DO j = nys_z, nyn_z
664	DO k = zs, zs + nzt_y - nzb_y
665	DO i = nxl_z, nxr_z
666	f_out(i,j,k) = work(i,k-zs+1,j,l)
667	ENDDO
668	ENDDO
669	ENDDO
670	ENDDO
671	!$OMP END PARALLEL
672	#endif
673
674	ENDIF
675
676	END SUBROUTINE transpose_yz
677
678
679	!------------------------------------------------------------------------------!
680	! Description:
681	! ------------
682	!> Resorting data for the transposition from z to x. The transposition itself
683	!> is carried out in transpose_zx
684	!------------------------------------------------------------------------------!
685	SUBROUTINE resort_for_zx( f_in, f_inv )
686
687
688	USE indices, &
689	ONLY: nxl, nxr, nyn, nys, nz
690
691	USE kinds
692
693	IMPLICIT NONE
694
695	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
696	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
697
698	INTEGER(iwp) :: i !<
699	INTEGER(iwp) :: j !<
700	INTEGER(iwp) :: k !<
701
702	!
703	!-- Rearrange indices of input array in order to make data to be send
704	!-- by MPI contiguous
705	!$OMP PARALLEL PRIVATE ( i, j, k )
706	!$OMP DO
707	DO k = 1,nz
708	DO i = nxl, nxr
709	DO j = nys, nyn
710	f_inv(j,i,k) = f_in(k,j,i)
711	ENDDO
712	ENDDO
713	ENDDO
714	!$OMP END PARALLEL
715
716	END SUBROUTINE resort_for_zx
717
718
719	!------------------------------------------------------------------------------!
720	! Description:
721	! ------------
722	!> Transposition of input array (f_in) from z to x. For the input array, all
723	!> elements along z reside on the same PE, while after transposition, all
724	!> elements along x reside on the same PE.
725	!------------------------------------------------------------------------------!
726	SUBROUTINE transpose_zx( f_inv, f_out )
727
728
729	USE cpulog, &
730	ONLY: cpu_log, cpu_log_nowait, log_point_s
731
732	USE indices, &
733	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
734
735	USE kinds
736
737	USE pegrid
738
739	USE transpose_indices, &
740	ONLY: nyn_x, nys_x, nzb_x, nzt_x
741
742	IMPLICIT NONE
743
744	INTEGER(iwp) :: i !<
745	INTEGER(iwp) :: j !<
746	INTEGER(iwp) :: k !<
747	INTEGER(iwp) :: l !<
748	INTEGER(iwp) :: xs !<
749
750	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
751	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
752
753	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
754
755
756	!
757	!-- If the PE grid is one-dimensional along y, only local reordering
758	!-- of the data is necessary and no transposition has to be done.
759	IF ( pdims(1) == 1 ) THEN
760
761	!$OMP PARALLEL PRIVATE ( i, j, k )
762	!$OMP DO
763	DO k = 1, nz
764	DO i = nxl, nxr
765	DO j = nys, nyn
766	f_out(i,j,k) = f_inv(j,i,k)
767	ENDDO
768	ENDDO
769	ENDDO
770	!$OMP END PARALLEL
771
772	ELSE
773
774	#if defined( __parallel )
775	!
776	!-- Transpose array
777	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
778	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
779	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
780	work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
781	comm1dx, ierr )
782	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
783
784	!
785	!-- Reorder transposed array
786	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
787	!$OMP DO
788	DO l = 0, pdims(1) - 1
789	xs = 0 + l * nnx
790	DO k = nzb_x, nzt_x
791	DO i = xs, xs + nnx - 1
792	DO j = nys_x, nyn_x
793	f_out(i,j,k) = work(j,i-xs+1,k,l)
794	ENDDO
795	ENDDO
796	ENDDO
797	ENDDO
798	!$OMP END PARALLEL
799	#endif
800
801	ENDIF
802
803	END SUBROUTINE transpose_zx
804
805
806	!------------------------------------------------------------------------------!
807	! Description:
808	! ------------
809	!> Resorting data after the transposition from z to y. The transposition itself
810	!> is carried out in transpose_zy
811	!------------------------------------------------------------------------------!
812	SUBROUTINE resort_for_zy( f_inv, f_out )
813
814
815	USE indices, &
816	ONLY: ny
817
818	USE kinds
819
820	USE transpose_indices, &
821	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
822
823	IMPLICIT NONE
824
825	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
826	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
827
828
829	INTEGER(iwp) :: i !<
830	INTEGER(iwp) :: j !<
831	INTEGER(iwp) :: k !<
832
833	!
834	!-- Rearrange indices of input array in order to make data to be send
835	!-- by MPI contiguous
836	!$OMP PARALLEL PRIVATE ( i, j, k )
837	!$OMP DO
838	DO k = nzb_y, nzt_y
839	DO j = 0, ny
840	DO i = nxl_y, nxr_y
841	f_out(j,i,k) = f_inv(i,k,j)
842	ENDDO
843	ENDDO
844	ENDDO
845	!$OMP END PARALLEL
846
847	END SUBROUTINE resort_for_zy
848
849
850	!------------------------------------------------------------------------------!
851	! Description:
852	! ------------
853	!> Transposition of input array (f_in) from z to y. For the input array, all
854	!> elements along z reside on the same PE, while after transposition, all
855	!> elements along y reside on the same PE.
856	!------------------------------------------------------------------------------!
857	SUBROUTINE transpose_zy( f_in, f_inv )
858
859
860	USE cpulog, &
861	ONLY: cpu_log, cpu_log_nowait, log_point_s
862
863	USE indices, &
864	ONLY: ny, nz
865
866	USE kinds
867
868	USE pegrid
869
870	USE transpose_indices, &
871	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
872
873	IMPLICIT NONE
874
875	INTEGER(iwp) :: i !<
876	INTEGER(iwp) :: j !<
877	INTEGER(iwp) :: k !<
878	INTEGER(iwp) :: l !<
879	INTEGER(iwp) :: zs !<
880
881	REAL(wp) :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
882	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
883
884	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
885
886	!
887	!-- If the PE grid is one-dimensional along y, the array has only to be
888	!-- reordered locally and therefore no transposition has to be done.
889	IF ( pdims(1) /= 1 ) THEN
890
891	#if defined( __parallel )
892	!
893	!-- Reorder input array for transposition
894	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
895	!$OMP DO
896	DO l = 0, pdims(1) - 1
897	zs = 1 + l * ( nzt_y - nzb_y + 1 )
898	DO j = nys_z, nyn_z
899	DO k = zs, zs + nzt_y - nzb_y
900	DO i = nxl_z, nxr_z
901	work(i,k-zs+1,j,l) = f_in(i,j,k)
902	ENDDO
903	ENDDO
904	ENDDO
905	ENDDO
906	!$OMP END PARALLEL
907
908	!
909	!-- Transpose array
910	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
911	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
912	CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
913	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
914	comm1dx, ierr )
915	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
916	#endif
917
918	ELSE
919	!
920	!-- Reorder the array in the same way like ALLTOALL did it
921	!$OMP PARALLEL PRIVATE ( i, j, k )
922	!$OMP DO
923	DO k = nzb_y, nzt_y
924	DO j = 0, ny
925	DO i = nxl_y, nxr_y
926	f_inv(i,k,j) = f_in(i,j,k)
927	ENDDO
928	ENDDO
929	ENDDO
930	!$OMP END PARALLEL
931
932	ENDIF
933
934	END SUBROUTINE transpose_zy
935
936
937	!------------------------------------------------------------------------------!
938	! Description:
939	! ------------
940	!> Transposition of input array (f_in) from z to y. For the input array, all
941	!> elements along z reside on the same PE, while after transposition, all
942	!> elements along y reside on the same PE.
943	!> This is a direct transposition for arrays with indices in regular order
944	!> (k,j,i) (cf. transpose_zy).
945	!------------------------------------------------------------------------------!
946	SUBROUTINE transpose_zyd( f_in, f_out )
947
948
949	USE cpulog, &
950	ONLY: cpu_log, cpu_log_nowait, log_point_s
951
952	USE indices, &
953	ONLY: nnx, nny, nnz, nxl, nxr, nyn, nys, ny, nz
954
955	USE kinds
956
957	USE pegrid
958
959	USE transpose_indices, &
960	ONLY: nxl_y, nxl_yd, nxr_y, nxr_yd, nzb_y, nzb_yd, nzt_y, nzt_yd
961
962	IMPLICIT NONE
963
964	INTEGER(iwp) :: i !<
965	INTEGER(iwp) :: j !<
966	INTEGER(iwp) :: k !<
967	INTEGER(iwp) :: l !<
968	INTEGER(iwp) :: m !<
969	INTEGER(iwp) :: ys !<
970
971	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
972	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
973	REAL(wp) :: f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd) !<
974	REAL(wp) :: work(nnxnnynnz) !<
975
976	#if defined( __parallel )
977
978	!
979	!-- Rearrange indices of input array in order to make data to be send
980	!-- by MPI contiguous
981	DO i = nxl, nxr
982	DO j = nys, nyn
983	DO k = 1, nz
984	f_inv(j,i,k) = f_in(k,j,i)
985	ENDDO
986	ENDDO
987	ENDDO
988
989	!
990	!-- Move data to different array, because memory location of work1 is
991	!-- needed further below (work1 = work2).
992	!-- If the PE grid is one-dimensional along x, only local reordering
993	!-- of the data is necessary and no transposition has to be done.
994	IF ( pdims(2) == 1 ) THEN
995	DO k = 1, nz
996	DO i = nxl, nxr
997	DO j = nys, nyn
998	f_out(j,i,k) = f_inv(j,i,k)
999	ENDDO
1000	ENDDO
1001	ENDDO
1002	RETURN
1003	ENDIF
1004
1005	!
1006	!-- Transpose array
1007	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1008	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1009	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
1010	work(1), sendrecvcount_zyd, MPI_REAL, &
1011	comm1dy, ierr )
1012	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1013
1014	!
1015	!-- Reorder transposed array
1016	m = 0
1017	DO l = 0, pdims(2) - 1
1018	ys = 0 + l * nny
1019	DO k = nzb_yd, nzt_yd
1020	DO i = nxl_yd, nxr_yd
1021	DO j = ys, ys + nny - 1
1022	m = m + 1
1023	f_out(j,i,k) = work(m)
1024	ENDDO
1025	ENDDO
1026	ENDDO
1027	ENDDO
1028
1029	#endif
1030
1031	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |