Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 2997

Last change on this file since 2997 was 2718, checked in by maronga, 7 years ago
deleting of deprecated files; headers updated where needed
Property svn:keywords set to `Id`
File size: 29.7 KB

Line
1	!> @file transpose.f90
2	!------------------------------------------------------------------------------!
3	! This file is part of the PALM model system.
4	!
5	! PALM is free software: you can redistribute it and/or modify it under the
6	! terms of the GNU General Public License as published by the Free Software
7	! Foundation, either version 3 of the License, or (at your option) any later
8	! version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2018 Leibniz Universitaet Hannover
18	!------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	!
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: transpose.f90 2718 2018-01-02 08:49:38Z suehring $
27	! Corrected "Former revisions" section
28	!
29	! 2696 2017-12-14 17:12:51Z kanani
30	! Change in file header (GPL part)
31	!
32	! 2119 2017-01-17 16:51:50Z raasch
33	!
34	! 2118 2017-01-17 16:38:49Z raasch
35	! OpenACC directives removed
36	!
37	! 2000 2016-08-20 18:09:15Z knoop
38	! Forced header and separation lines into 80 columns
39	!
40	! 1682 2015-10-07 23:56:08Z knoop
41	! Code annotations made doxygen readable
42	!
43	! 1324 2014-03-21 09:13:16Z suehring
44	! Bugfix: ONLY statement for module pegrid removed
45	!
46	! 1320 2014-03-20 08:40:49Z raasch
47	! ONLY-attribute added to USE-statements,
48	! kind-parameters added to all INTEGER and REAL declaration statements,
49	! kinds are defined in new module kinds,
50	! old module precision_kind is removed,
51	! revision history before 2012 removed,
52	! comment fields (!:) to be used for variable explanations added to
53	! all variable declaration statements
54	!
55	! 1318 2014-03-17 13:35:16Z raasch
56	! cpu_log_nowait parameter added to cpu measurements of the transpositions
57	! required for solving the Poisson equation (poisfft),
58	! module interfaces removed
59	!
60	! 1257 2013-11-08 15:18:40Z raasch
61	! openacc loop and loop vector clauses removed
62	!
63	! 1216 2013-08-26 09:31:42Z raasch
64	! re-sorting of the transposed / to be transposed arrays moved to separate
65	! routines resort_for_...
66	!
67	! 1111 2013-03-08 23:54:10Z raasch
68	! openACC directives added,
69	! resorting data from/to work changed, work got 4 dimensions instead of 1
70	!
71	! 1106 2013-03-04 05:31:38Z raasch
72	! preprocessor lines rearranged so that routines can also be used in serial
73	! (non-parallel) mode
74	!
75	! 1092 2013-02-02 11:24:22Z raasch
76	! unused variables removed
77	!
78	! 1036 2012-10-22 13:43:42Z raasch
79	! code put under GPL (PALM 3.9)
80	!
81	! 1003 2012-09-14 14:35:53Z raasch
82	! indices nxa, nya, etc. replaced by nx, ny, etc.
83	!
84	! Revision 1.1 1997/07/24 11:25:18 raasch
85	! Initial revision
86	!
87	!------------------------------------------------------------------------------!
88	! Description:
89	! ------------
90	!> Resorting data for the transposition from x to y. The transposition itself
91	!> is carried out in transpose_xy
92	!------------------------------------------------------------------------------!
93	SUBROUTINE resort_for_xy( f_in, f_inv )
94
95
96	USE indices, &
97	ONLY: nx
98
99	USE kinds
100
101	USE transpose_indices, &
102	ONLY: nxl_z, nxr_z, nyn_x, nyn_z, nys_x, nys_z, nzb_x, nzt_x
103
104	IMPLICIT NONE
105
106	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
107	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
108
109
110	INTEGER(iwp) :: i !<
111	INTEGER(iwp) :: j !<
112	INTEGER(iwp) :: k !<
113	!
114	!-- Rearrange indices of input array in order to make data to be send
115	!-- by MPI contiguous
116	!$OMP PARALLEL PRIVATE ( i, j, k )
117	!$OMP DO
118	DO i = 0, nx
119	DO k = nzb_x, nzt_x
120	DO j = nys_x, nyn_x
121	f_inv(j,k,i) = f_in(i,j,k)
122	ENDDO
123	ENDDO
124	ENDDO
125	!$OMP END PARALLEL
126
127	END SUBROUTINE resort_for_xy
128
129
130	!------------------------------------------------------------------------------!
131	! Description:
132	! ------------
133	!> Transposition of input array (f_in) from x to y. For the input array, all
134	!> elements along x reside on the same PE, while after transposition, all
135	!> elements along y reside on the same PE.
136	!------------------------------------------------------------------------------!
137	SUBROUTINE transpose_xy( f_inv, f_out )
138
139
140	USE cpulog, &
141	ONLY: cpu_log, cpu_log_nowait, log_point_s
142
143	USE indices, &
144	ONLY: nx, ny
145
146	USE kinds
147
148	USE pegrid
149
150	USE transpose_indices, &
151	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
152
153	IMPLICIT NONE
154
155	INTEGER(iwp) :: i !<
156	INTEGER(iwp) :: j !<
157	INTEGER(iwp) :: k !<
158	INTEGER(iwp) :: l !<
159	INTEGER(iwp) :: ys !<
160
161	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
162	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
163
164	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
165
166
167	IF ( numprocs /= 1 ) THEN
168
169	#if defined( __parallel )
170	!
171	!-- Transpose array
172	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
173	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
174	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
175	work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
176	comm1dy, ierr )
177	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
178
179	!
180	!-- Reorder transposed array
181	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
182	!$OMP DO
183	DO l = 0, pdims(2) - 1
184	ys = 0 + l * ( nyn_x - nys_x + 1 )
185	DO i = nxl_y, nxr_y
186	DO k = nzb_y, nzt_y
187	DO j = ys, ys + nyn_x - nys_x
188	f_out(j,i,k) = work(j-ys+1,k,i,l)
189	ENDDO
190	ENDDO
191	ENDDO
192	ENDDO
193	!$OMP END PARALLEL
194	#endif
195
196	ELSE
197
198	!
199	!-- Reorder transposed array
200	!$OMP PARALLEL PRIVATE ( i, j, k )
201	!$OMP DO
202	DO k = nzb_y, nzt_y
203	DO i = nxl_y, nxr_y
204	DO j = 0, ny
205	f_out(j,i,k) = f_inv(j,k,i)
206	ENDDO
207	ENDDO
208	ENDDO
209	!$OMP END PARALLEL
210
211	ENDIF
212
213	END SUBROUTINE transpose_xy
214
215
216	!------------------------------------------------------------------------------!
217	! Description:
218	! ------------
219	!> Resorting data after the transposition from x to z. The transposition itself
220	!> is carried out in transpose_xz
221	!------------------------------------------------------------------------------!
222	SUBROUTINE resort_for_xz( f_inv, f_out )
223
224
225	USE indices, &
226	ONLY: nxl, nxr, nyn, nys, nz
227
228	USE kinds
229
230	IMPLICIT NONE
231
232	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
233	REAL(wp) :: f_out(1:nz,nys:nyn,nxl:nxr) !<
234
235	INTEGER(iwp) :: i !<
236	INTEGER(iwp) :: j !<
237	INTEGER(iwp) :: k !<
238	!
239	!-- Rearrange indices of input array in order to make data to be send
240	!-- by MPI contiguous.
241	!-- In case of parallel fft/transposition, scattered store is faster in
242	!-- backward direction!!!
243	!$OMP PARALLEL PRIVATE ( i, j, k )
244	!$OMP DO
245	DO k = 1, nz
246	DO i = nxl, nxr
247	DO j = nys, nyn
248	f_out(k,j,i) = f_inv(j,i,k)
249	ENDDO
250	ENDDO
251	ENDDO
252	!$OMP END PARALLEL
253
254	END SUBROUTINE resort_for_xz
255
256
257	!------------------------------------------------------------------------------!
258	! Description:
259	! ------------
260	!> Transposition of input array (f_in) from x to z. For the input array, all
261	!> elements along x reside on the same PE, while after transposition, all
262	!> elements along z reside on the same PE.
263	!------------------------------------------------------------------------------!
264	SUBROUTINE transpose_xz( f_in, f_inv )
265
266
267	USE cpulog, &
268	ONLY: cpu_log, cpu_log_nowait, log_point_s
269
270	USE indices, &
271	ONLY: nnx, nx, nxl, nxr, ny, nyn, nys, nz
272
273	USE kinds
274
275	USE pegrid
276
277	USE transpose_indices, &
278	ONLY: nyn_x, nys_x, nzb_x, nzt_x
279
280	IMPLICIT NONE
281
282	INTEGER(iwp) :: i !<
283	INTEGER(iwp) :: j !<
284	INTEGER(iwp) :: k !<
285	INTEGER(iwp) :: l !<
286	INTEGER(iwp) :: xs !<
287
288	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
289	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
290
291	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
292
293
294	!
295	!-- If the PE grid is one-dimensional along y, the array has only to be
296	!-- reordered locally and therefore no transposition has to be done.
297	IF ( pdims(1) /= 1 ) THEN
298
299	#if defined( __parallel )
300	!
301	!-- Reorder input array for transposition
302	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
303	!$OMP DO
304	DO l = 0, pdims(1) - 1
305	xs = 0 + l * nnx
306	DO k = nzb_x, nzt_x
307	DO i = xs, xs + nnx - 1
308	DO j = nys_x, nyn_x
309	work(j,i-xs+1,k,l) = f_in(i,j,k)
310	ENDDO
311	ENDDO
312	ENDDO
313	ENDDO
314	!$OMP END PARALLEL
315
316	!
317	!-- Transpose array
318	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
319	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
320	CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
321	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
322	comm1dx, ierr )
323	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
324	#endif
325
326	ELSE
327
328	!
329	!-- Reorder the array in a way that the z index is in first position
330	!$OMP PARALLEL PRIVATE ( i, j, k )
331	!$OMP DO
332	DO i = nxl, nxr
333	DO j = nys, nyn
334	DO k = 1, nz
335	f_inv(j,i,k) = f_in(i,j,k)
336	ENDDO
337	ENDDO
338	ENDDO
339	!$OMP END PARALLEL
340
341	ENDIF
342
343	END SUBROUTINE transpose_xz
344
345
346	!------------------------------------------------------------------------------!
347	! Description:
348	! ------------
349	!> Resorting data after the transposition from y to x. The transposition itself
350	!> is carried out in transpose_yx
351	!------------------------------------------------------------------------------!
352	SUBROUTINE resort_for_yx( f_inv, f_out )
353
354
355	USE indices, &
356	ONLY: nx
357
358	USE kinds
359
360	USE transpose_indices, &
361	ONLY: nyn_x, nys_x, nzb_x, nzt_x
362
363	IMPLICIT NONE
364
365	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
366	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
367
368
369	INTEGER(iwp) :: i !<
370	INTEGER(iwp) :: j !<
371	INTEGER(iwp) :: k !<
372	!
373	!-- Rearrange indices of input array in order to make data to be send
374	!-- by MPI contiguous
375	!$OMP PARALLEL PRIVATE ( i, j, k )
376	!$OMP DO
377	DO i = 0, nx
378	DO k = nzb_x, nzt_x
379	DO j = nys_x, nyn_x
380	f_out(i,j,k) = f_inv(j,k,i)
381	ENDDO
382	ENDDO
383	ENDDO
384	!$OMP END PARALLEL
385
386	END SUBROUTINE resort_for_yx
387
388
389	!------------------------------------------------------------------------------!
390	! Description:
391	! ------------
392	!> Transposition of input array (f_in) from y to x. For the input array, all
393	!> elements along y reside on the same PE, while after transposition, all
394	!> elements along x reside on the same PE.
395	!------------------------------------------------------------------------------!
396	SUBROUTINE transpose_yx( f_in, f_inv )
397
398
399	USE cpulog, &
400	ONLY: cpu_log, cpu_log_nowait, log_point_s
401
402	USE indices, &
403	ONLY: nx, ny
404
405	USE kinds
406
407	USE pegrid
408
409	USE transpose_indices, &
410	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
411
412	IMPLICIT NONE
413
414	INTEGER(iwp) :: i !<
415	INTEGER(iwp) :: j !<
416	INTEGER(iwp) :: k !<
417	INTEGER(iwp) :: l !<
418	INTEGER(iwp) :: ys !<
419
420	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
421	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
422
423	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
424
425
426	IF ( numprocs /= 1 ) THEN
427
428	#if defined( __parallel )
429	!
430	!-- Reorder input array for transposition
431	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
432	!$OMP DO
433	DO l = 0, pdims(2) - 1
434	ys = 0 + l * ( nyn_x - nys_x + 1 )
435	DO i = nxl_y, nxr_y
436	DO k = nzb_y, nzt_y
437	DO j = ys, ys + nyn_x - nys_x
438	work(j-ys+1,k,i,l) = f_in(j,i,k)
439	ENDDO
440	ENDDO
441	ENDDO
442	ENDDO
443	!$OMP END PARALLEL
444
445	!
446	!-- Transpose array
447	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
448	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
449	CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
450	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
451	comm1dy, ierr )
452	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
453	#endif
454
455	ELSE
456
457	!
458	!-- Reorder array f_in the same way as ALLTOALL did it
459	!$OMP PARALLEL PRIVATE ( i, j, k )
460	!$OMP DO
461	DO i = nxl_y, nxr_y
462	DO k = nzb_y, nzt_y
463	DO j = 0, ny
464	f_inv(j,k,i) = f_in(j,i,k)
465	ENDDO
466	ENDDO
467	ENDDO
468	!$OMP END PARALLEL
469
470	ENDIF
471
472	END SUBROUTINE transpose_yx
473
474
475	!------------------------------------------------------------------------------!
476	! Description:
477	! ------------
478	!> Transposition of input array (f_in) from y to x. For the input array, all
479	!> elements along y reside on the same PE, while after transposition, all
480	!> elements along x reside on the same PE.
481	!> This is a direct transposition for arrays with indices in regular order
482	!> (k,j,i) (cf. transpose_yx).
483	!------------------------------------------------------------------------------!
484	SUBROUTINE transpose_yxd( f_in, f_out )
485
486
487	USE cpulog, &
488	ONLY: cpu_log, cpu_log_nowait, log_point_s
489
490	USE indices, &
491	ONLY: nnx, nny, nnz, nx, nxl, nxr, nyn, nys, nz
492
493	USE kinds
494
495	USE pegrid
496
497	USE transpose_indices, &
498	ONLY: nyn_x, nys_x, nzb_x, nzt_x
499
500	IMPLICIT NONE
501
502	INTEGER(iwp) :: i !<
503	INTEGER(iwp) :: j !<
504	INTEGER(iwp) :: k !<
505	INTEGER(iwp) :: l !<
506	INTEGER(iwp) :: m !<
507	INTEGER(iwp) :: xs !<
508
509	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
510	REAL(wp) :: f_inv(nxl:nxr,1:nz,nys:nyn) !<
511	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
512	REAL(wp) :: work(nnxnnynnz) !<
513	#if defined( __parallel )
514
515	!
516	!-- Rearrange indices of input array in order to make data to be send
517	!-- by MPI contiguous
518	DO k = 1, nz
519	DO j = nys, nyn
520	DO i = nxl, nxr
521	f_inv(i,k,j) = f_in(k,j,i)
522	ENDDO
523	ENDDO
524	ENDDO
525
526	!
527	!-- Transpose array
528	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
529	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
530	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
531	work(1), sendrecvcount_xy, MPI_REAL, &
532	comm1dx, ierr )
533	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
534
535	!
536	!-- Reorder transposed array
537	m = 0
538	DO l = 0, pdims(1) - 1
539	xs = 0 + l * nnx
540	DO j = nys_x, nyn_x
541	DO k = 1, nz
542	DO i = xs, xs + nnx - 1
543	m = m + 1
544	f_out(i,j,k) = work(m)
545	ENDDO
546	ENDDO
547	ENDDO
548	ENDDO
549
550	#endif
551
552	END SUBROUTINE transpose_yxd
553
554
555	!------------------------------------------------------------------------------!
556	! Description:
557	! ------------
558	!> Resorting data for the transposition from y to z. The transposition itself
559	!> is carried out in transpose_yz
560	!------------------------------------------------------------------------------!
561	SUBROUTINE resort_for_yz( f_in, f_inv )
562
563
564	USE indices, &
565	ONLY: ny
566
567	USE kinds
568
569	USE transpose_indices, &
570	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
571
572	IMPLICIT NONE
573
574	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
575	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
576
577	INTEGER(iwp) :: i !<
578	INTEGER(iwp) :: j !<
579	INTEGER(iwp) :: k !<
580
581	!
582	!-- Rearrange indices of input array in order to make data to be send
583	!-- by MPI contiguous
584	!$OMP PARALLEL PRIVATE ( i, j, k )
585	!$OMP DO
586	DO j = 0, ny
587	DO k = nzb_y, nzt_y
588	DO i = nxl_y, nxr_y
589	f_inv(i,k,j) = f_in(j,i,k)
590	ENDDO
591	ENDDO
592	ENDDO
593	!$OMP END PARALLEL
594
595	END SUBROUTINE resort_for_yz
596
597
598	!------------------------------------------------------------------------------!
599	! Description:
600	! ------------
601	!> Transposition of input array (f_in) from y to z. For the input array, all
602	!> elements along y reside on the same PE, while after transposition, all
603	!> elements along z reside on the same PE.
604	!------------------------------------------------------------------------------!
605	SUBROUTINE transpose_yz( f_inv, f_out )
606
607
608	USE cpulog, &
609	ONLY: cpu_log, cpu_log_nowait, log_point_s
610
611	USE indices, &
612	ONLY: ny, nz
613
614	USE kinds
615
616	USE pegrid
617
618	USE transpose_indices, &
619	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
620
621	IMPLICIT NONE
622
623	INTEGER(iwp) :: i !<
624	INTEGER(iwp) :: j !<
625	INTEGER(iwp) :: k !<
626	INTEGER(iwp) :: l !<
627	INTEGER(iwp) :: zs !<
628
629	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
630	REAL(wp) :: f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
631
632	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
633
634
635	!
636	!-- If the PE grid is one-dimensional along y, only local reordering
637	!-- of the data is necessary and no transposition has to be done.
638	IF ( pdims(1) == 1 ) THEN
639
640	!$OMP PARALLEL PRIVATE ( i, j, k )
641	!$OMP DO
642	DO j = 0, ny
643	DO k = nzb_y, nzt_y
644	DO i = nxl_y, nxr_y
645	f_out(i,j,k) = f_inv(i,k,j)
646	ENDDO
647	ENDDO
648	ENDDO
649	!$OMP END PARALLEL
650
651	ELSE
652
653	#if defined( __parallel )
654	!
655	!-- Transpose array
656	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
657	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
658	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
659	work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
660	comm1dx, ierr )
661	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
662
663	!
664	!-- Reorder transposed array
665	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
666	!$OMP DO
667	DO l = 0, pdims(1) - 1
668	zs = 1 + l * ( nzt_y - nzb_y + 1 )
669	DO j = nys_z, nyn_z
670	DO k = zs, zs + nzt_y - nzb_y
671	DO i = nxl_z, nxr_z
672	f_out(i,j,k) = work(i,k-zs+1,j,l)
673	ENDDO
674	ENDDO
675	ENDDO
676	ENDDO
677	!$OMP END PARALLEL
678	#endif
679
680	ENDIF
681
682	END SUBROUTINE transpose_yz
683
684
685	!------------------------------------------------------------------------------!
686	! Description:
687	! ------------
688	!> Resorting data for the transposition from z to x. The transposition itself
689	!> is carried out in transpose_zx
690	!------------------------------------------------------------------------------!
691	SUBROUTINE resort_for_zx( f_in, f_inv )
692
693
694	USE indices, &
695	ONLY: nxl, nxr, nyn, nys, nz
696
697	USE kinds
698
699	IMPLICIT NONE
700
701	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
702	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
703
704	INTEGER(iwp) :: i !<
705	INTEGER(iwp) :: j !<
706	INTEGER(iwp) :: k !<
707
708	!
709	!-- Rearrange indices of input array in order to make data to be send
710	!-- by MPI contiguous
711	!$OMP PARALLEL PRIVATE ( i, j, k )
712	!$OMP DO
713	DO k = 1,nz
714	DO i = nxl, nxr
715	DO j = nys, nyn
716	f_inv(j,i,k) = f_in(k,j,i)
717	ENDDO
718	ENDDO
719	ENDDO
720	!$OMP END PARALLEL
721
722	END SUBROUTINE resort_for_zx
723
724
725	!------------------------------------------------------------------------------!
726	! Description:
727	! ------------
728	!> Transposition of input array (f_in) from z to x. For the input array, all
729	!> elements along z reside on the same PE, while after transposition, all
730	!> elements along x reside on the same PE.
731	!------------------------------------------------------------------------------!
732	SUBROUTINE transpose_zx( f_inv, f_out )
733
734
735	USE cpulog, &
736	ONLY: cpu_log, cpu_log_nowait, log_point_s
737
738	USE indices, &
739	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
740
741	USE kinds
742
743	USE pegrid
744
745	USE transpose_indices, &
746	ONLY: nyn_x, nys_x, nzb_x, nzt_x
747
748	IMPLICIT NONE
749
750	INTEGER(iwp) :: i !<
751	INTEGER(iwp) :: j !<
752	INTEGER(iwp) :: k !<
753	INTEGER(iwp) :: l !<
754	INTEGER(iwp) :: xs !<
755
756	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
757	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
758
759	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
760
761
762	!
763	!-- If the PE grid is one-dimensional along y, only local reordering
764	!-- of the data is necessary and no transposition has to be done.
765	IF ( pdims(1) == 1 ) THEN
766
767	!$OMP PARALLEL PRIVATE ( i, j, k )
768	!$OMP DO
769	DO k = 1, nz
770	DO i = nxl, nxr
771	DO j = nys, nyn
772	f_out(i,j,k) = f_inv(j,i,k)
773	ENDDO
774	ENDDO
775	ENDDO
776	!$OMP END PARALLEL
777
778	ELSE
779
780	#if defined( __parallel )
781	!
782	!-- Transpose array
783	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
784	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
785	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
786	work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
787	comm1dx, ierr )
788	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
789
790	!
791	!-- Reorder transposed array
792	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
793	!$OMP DO
794	DO l = 0, pdims(1) - 1
795	xs = 0 + l * nnx
796	DO k = nzb_x, nzt_x
797	DO i = xs, xs + nnx - 1
798	DO j = nys_x, nyn_x
799	f_out(i,j,k) = work(j,i-xs+1,k,l)
800	ENDDO
801	ENDDO
802	ENDDO
803	ENDDO
804	!$OMP END PARALLEL
805	#endif
806
807	ENDIF
808
809	END SUBROUTINE transpose_zx
810
811
812	!------------------------------------------------------------------------------!
813	! Description:
814	! ------------
815	!> Resorting data after the transposition from z to y. The transposition itself
816	!> is carried out in transpose_zy
817	!------------------------------------------------------------------------------!
818	SUBROUTINE resort_for_zy( f_inv, f_out )
819
820
821	USE indices, &
822	ONLY: ny
823
824	USE kinds
825
826	USE transpose_indices, &
827	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
828
829	IMPLICIT NONE
830
831	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
832	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
833
834
835	INTEGER(iwp) :: i !<
836	INTEGER(iwp) :: j !<
837	INTEGER(iwp) :: k !<
838
839	!
840	!-- Rearrange indices of input array in order to make data to be send
841	!-- by MPI contiguous
842	!$OMP PARALLEL PRIVATE ( i, j, k )
843	!$OMP DO
844	DO k = nzb_y, nzt_y
845	DO j = 0, ny
846	DO i = nxl_y, nxr_y
847	f_out(j,i,k) = f_inv(i,k,j)
848	ENDDO
849	ENDDO
850	ENDDO
851	!$OMP END PARALLEL
852
853	END SUBROUTINE resort_for_zy
854
855
856	!------------------------------------------------------------------------------!
857	! Description:
858	! ------------
859	!> Transposition of input array (f_in) from z to y. For the input array, all
860	!> elements along z reside on the same PE, while after transposition, all
861	!> elements along y reside on the same PE.
862	!------------------------------------------------------------------------------!
863	SUBROUTINE transpose_zy( f_in, f_inv )
864
865
866	USE cpulog, &
867	ONLY: cpu_log, cpu_log_nowait, log_point_s
868
869	USE indices, &
870	ONLY: ny, nz
871
872	USE kinds
873
874	USE pegrid
875
876	USE transpose_indices, &
877	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
878
879	IMPLICIT NONE
880
881	INTEGER(iwp) :: i !<
882	INTEGER(iwp) :: j !<
883	INTEGER(iwp) :: k !<
884	INTEGER(iwp) :: l !<
885	INTEGER(iwp) :: zs !<
886
887	REAL(wp) :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
888	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
889
890	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
891
892	!
893	!-- If the PE grid is one-dimensional along y, the array has only to be
894	!-- reordered locally and therefore no transposition has to be done.
895	IF ( pdims(1) /= 1 ) THEN
896
897	#if defined( __parallel )
898	!
899	!-- Reorder input array for transposition
900	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
901	!$OMP DO
902	DO l = 0, pdims(1) - 1
903	zs = 1 + l * ( nzt_y - nzb_y + 1 )
904	DO j = nys_z, nyn_z
905	DO k = zs, zs + nzt_y - nzb_y
906	DO i = nxl_z, nxr_z
907	work(i,k-zs+1,j,l) = f_in(i,j,k)
908	ENDDO
909	ENDDO
910	ENDDO
911	ENDDO
912	!$OMP END PARALLEL
913
914	!
915	!-- Transpose array
916	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
917	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
918	CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
919	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
920	comm1dx, ierr )
921	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
922	#endif
923
924	ELSE
925	!
926	!-- Reorder the array in the same way like ALLTOALL did it
927	!$OMP PARALLEL PRIVATE ( i, j, k )
928	!$OMP DO
929	DO k = nzb_y, nzt_y
930	DO j = 0, ny
931	DO i = nxl_y, nxr_y
932	f_inv(i,k,j) = f_in(i,j,k)
933	ENDDO
934	ENDDO
935	ENDDO
936	!$OMP END PARALLEL
937
938	ENDIF
939
940	END SUBROUTINE transpose_zy
941
942
943	!------------------------------------------------------------------------------!
944	! Description:
945	! ------------
946	!> Transposition of input array (f_in) from z to y. For the input array, all
947	!> elements along z reside on the same PE, while after transposition, all
948	!> elements along y reside on the same PE.
949	!> This is a direct transposition for arrays with indices in regular order
950	!> (k,j,i) (cf. transpose_zy).
951	!------------------------------------------------------------------------------!
952	SUBROUTINE transpose_zyd( f_in, f_out )
953
954
955	USE cpulog, &
956	ONLY: cpu_log, cpu_log_nowait, log_point_s
957
958	USE indices, &
959	ONLY: nnx, nny, nnz, nxl, nxr, nyn, nys, ny, nz
960
961	USE kinds
962
963	USE pegrid
964
965	USE transpose_indices, &
966	ONLY: nxl_y, nxl_yd, nxr_y, nxr_yd, nzb_y, nzb_yd, nzt_y, nzt_yd
967
968	IMPLICIT NONE
969
970	INTEGER(iwp) :: i !<
971	INTEGER(iwp) :: j !<
972	INTEGER(iwp) :: k !<
973	INTEGER(iwp) :: l !<
974	INTEGER(iwp) :: m !<
975	INTEGER(iwp) :: ys !<
976
977	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
978	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
979	REAL(wp) :: f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd) !<
980	REAL(wp) :: work(nnxnnynnz) !<
981
982	#if defined( __parallel )
983
984	!
985	!-- Rearrange indices of input array in order to make data to be send
986	!-- by MPI contiguous
987	DO i = nxl, nxr
988	DO j = nys, nyn
989	DO k = 1, nz
990	f_inv(j,i,k) = f_in(k,j,i)
991	ENDDO
992	ENDDO
993	ENDDO
994
995	!
996	!-- Move data to different array, because memory location of work1 is
997	!-- needed further below (work1 = work2).
998	!-- If the PE grid is one-dimensional along x, only local reordering
999	!-- of the data is necessary and no transposition has to be done.
1000	IF ( pdims(2) == 1 ) THEN
1001	DO k = 1, nz
1002	DO i = nxl, nxr
1003	DO j = nys, nyn
1004	f_out(j,i,k) = f_inv(j,i,k)
1005	ENDDO
1006	ENDDO
1007	ENDDO
1008	RETURN
1009	ENDIF
1010
1011	!
1012	!-- Transpose array
1013	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1014	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1015	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
1016	work(1), sendrecvcount_zyd, MPI_REAL, &
1017	comm1dy, ierr )
1018	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1019
1020	!
1021	!-- Reorder transposed array
1022	m = 0
1023	DO l = 0, pdims(2) - 1
1024	ys = 0 + l * nny
1025	DO k = nzb_yd, nzt_yd
1026	DO i = nxl_yd, nxr_yd
1027	DO j = ys, ys + nny - 1
1028	m = m + 1
1029	f_out(j,i,k) = work(m)
1030	ENDDO
1031	ENDDO
1032	ENDDO
1033	ENDDO
1034
1035	#endif
1036
1037	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |