Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 3499

Last change on this file since 3499 was 3241, checked in by raasch, 6 years ago
various changes to avoid compiler warnings (mainly removal of unused variables)
Property svn:keywords set to `Id`
File size: 29.7 KB

Line
1	!> @file transpose.f90
2	!------------------------------------------------------------------------------!
3	! This file is part of the PALM model system.
4	!
5	! PALM is free software: you can redistribute it and/or modify it under the
6	! terms of the GNU General Public License as published by the Free Software
7	! Foundation, either version 3 of the License, or (at your option) any later
8	! version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2018 Leibniz Universitaet Hannover
18	!------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	!
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: transpose.f90 3241 2018-09-12 15:02:00Z suehring $
27	! unused variables removed
28	!
29	! 2718 2018-01-02 08:49:38Z maronga
30	! Corrected "Former revisions" section
31	!
32	! 2696 2017-12-14 17:12:51Z kanani
33	! Change in file header (GPL part)
34	!
35	! 2119 2017-01-17 16:51:50Z raasch
36	!
37	! 2118 2017-01-17 16:38:49Z raasch
38	! OpenACC directives removed
39	!
40	! 2000 2016-08-20 18:09:15Z knoop
41	! Forced header and separation lines into 80 columns
42	!
43	! 1682 2015-10-07 23:56:08Z knoop
44	! Code annotations made doxygen readable
45	!
46	! 1324 2014-03-21 09:13:16Z suehring
47	! Bugfix: ONLY statement for module pegrid removed
48	!
49	! 1320 2014-03-20 08:40:49Z raasch
50	! ONLY-attribute added to USE-statements,
51	! kind-parameters added to all INTEGER and REAL declaration statements,
52	! kinds are defined in new module kinds,
53	! old module precision_kind is removed,
54	! revision history before 2012 removed,
55	! comment fields (!:) to be used for variable explanations added to
56	! all variable declaration statements
57	!
58	! 1318 2014-03-17 13:35:16Z raasch
59	! cpu_log_nowait parameter added to cpu measurements of the transpositions
60	! required for solving the Poisson equation (poisfft),
61	! module interfaces removed
62	!
63	! 1257 2013-11-08 15:18:40Z raasch
64	! openacc loop and loop vector clauses removed
65	!
66	! 1216 2013-08-26 09:31:42Z raasch
67	! re-sorting of the transposed / to be transposed arrays moved to separate
68	! routines resort_for_...
69	!
70	! 1111 2013-03-08 23:54:10Z raasch
71	! openACC directives added,
72	! resorting data from/to work changed, work got 4 dimensions instead of 1
73	!
74	! 1106 2013-03-04 05:31:38Z raasch
75	! preprocessor lines rearranged so that routines can also be used in serial
76	! (non-parallel) mode
77	!
78	! 1092 2013-02-02 11:24:22Z raasch
79	! unused variables removed
80	!
81	! 1036 2012-10-22 13:43:42Z raasch
82	! code put under GPL (PALM 3.9)
83	!
84	! 1003 2012-09-14 14:35:53Z raasch
85	! indices nxa, nya, etc. replaced by nx, ny, etc.
86	!
87	! Revision 1.1 1997/07/24 11:25:18 raasch
88	! Initial revision
89	!
90	!------------------------------------------------------------------------------!
91	! Description:
92	! ------------
93	!> Resorting data for the transposition from x to y. The transposition itself
94	!> is carried out in transpose_xy
95	!------------------------------------------------------------------------------!
96	SUBROUTINE resort_for_xy( f_in, f_inv )
97
98
99	USE indices, &
100	ONLY: nx
101
102	USE kinds
103
104	USE transpose_indices, &
105	ONLY: nyn_x, nys_x, nzb_x, nzt_x
106
107	IMPLICIT NONE
108
109	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
110	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
111
112
113	INTEGER(iwp) :: i !<
114	INTEGER(iwp) :: j !<
115	INTEGER(iwp) :: k !<
116	!
117	!-- Rearrange indices of input array in order to make data to be send
118	!-- by MPI contiguous
119	!$OMP PARALLEL PRIVATE ( i, j, k )
120	!$OMP DO
121	DO i = 0, nx
122	DO k = nzb_x, nzt_x
123	DO j = nys_x, nyn_x
124	f_inv(j,k,i) = f_in(i,j,k)
125	ENDDO
126	ENDDO
127	ENDDO
128	!$OMP END PARALLEL
129
130	END SUBROUTINE resort_for_xy
131
132
133	!------------------------------------------------------------------------------!
134	! Description:
135	! ------------
136	!> Transposition of input array (f_in) from x to y. For the input array, all
137	!> elements along x reside on the same PE, while after transposition, all
138	!> elements along y reside on the same PE.
139	!------------------------------------------------------------------------------!
140	SUBROUTINE transpose_xy( f_inv, f_out )
141
142
143	USE cpulog, &
144	ONLY: cpu_log, cpu_log_nowait, log_point_s
145
146	USE indices, &
147	ONLY: nx, ny
148
149	USE kinds
150
151	USE pegrid
152
153	USE transpose_indices, &
154	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
155
156	IMPLICIT NONE
157
158	INTEGER(iwp) :: i !<
159	INTEGER(iwp) :: j !<
160	INTEGER(iwp) :: k !<
161	INTEGER(iwp) :: l !<
162	INTEGER(iwp) :: ys !<
163
164	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
165	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
166
167	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
168
169
170	IF ( numprocs /= 1 ) THEN
171
172	#if defined( __parallel )
173	!
174	!-- Transpose array
175	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
176	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
177	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
178	work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
179	comm1dy, ierr )
180	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
181
182	!
183	!-- Reorder transposed array
184	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
185	!$OMP DO
186	DO l = 0, pdims(2) - 1
187	ys = 0 + l * ( nyn_x - nys_x + 1 )
188	DO i = nxl_y, nxr_y
189	DO k = nzb_y, nzt_y
190	DO j = ys, ys + nyn_x - nys_x
191	f_out(j,i,k) = work(j-ys+1,k,i,l)
192	ENDDO
193	ENDDO
194	ENDDO
195	ENDDO
196	!$OMP END PARALLEL
197	#endif
198
199	ELSE
200
201	!
202	!-- Reorder transposed array
203	!$OMP PARALLEL PRIVATE ( i, j, k )
204	!$OMP DO
205	DO k = nzb_y, nzt_y
206	DO i = nxl_y, nxr_y
207	DO j = 0, ny
208	f_out(j,i,k) = f_inv(j,k,i)
209	ENDDO
210	ENDDO
211	ENDDO
212	!$OMP END PARALLEL
213
214	ENDIF
215
216	END SUBROUTINE transpose_xy
217
218
219	!------------------------------------------------------------------------------!
220	! Description:
221	! ------------
222	!> Resorting data after the transposition from x to z. The transposition itself
223	!> is carried out in transpose_xz
224	!------------------------------------------------------------------------------!
225	SUBROUTINE resort_for_xz( f_inv, f_out )
226
227
228	USE indices, &
229	ONLY: nxl, nxr, nyn, nys, nz
230
231	USE kinds
232
233	IMPLICIT NONE
234
235	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
236	REAL(wp) :: f_out(1:nz,nys:nyn,nxl:nxr) !<
237
238	INTEGER(iwp) :: i !<
239	INTEGER(iwp) :: j !<
240	INTEGER(iwp) :: k !<
241	!
242	!-- Rearrange indices of input array in order to make data to be send
243	!-- by MPI contiguous.
244	!-- In case of parallel fft/transposition, scattered store is faster in
245	!-- backward direction!!!
246	!$OMP PARALLEL PRIVATE ( i, j, k )
247	!$OMP DO
248	DO k = 1, nz
249	DO i = nxl, nxr
250	DO j = nys, nyn
251	f_out(k,j,i) = f_inv(j,i,k)
252	ENDDO
253	ENDDO
254	ENDDO
255	!$OMP END PARALLEL
256
257	END SUBROUTINE resort_for_xz
258
259
260	!------------------------------------------------------------------------------!
261	! Description:
262	! ------------
263	!> Transposition of input array (f_in) from x to z. For the input array, all
264	!> elements along x reside on the same PE, while after transposition, all
265	!> elements along z reside on the same PE.
266	!------------------------------------------------------------------------------!
267	SUBROUTINE transpose_xz( f_in, f_inv )
268
269
270	USE cpulog, &
271	ONLY: cpu_log, cpu_log_nowait, log_point_s
272
273	USE indices, &
274	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
275
276	USE kinds
277
278	USE pegrid
279
280	USE transpose_indices, &
281	ONLY: nyn_x, nys_x, nzb_x, nzt_x
282
283	IMPLICIT NONE
284
285	INTEGER(iwp) :: i !<
286	INTEGER(iwp) :: j !<
287	INTEGER(iwp) :: k !<
288	INTEGER(iwp) :: l !<
289	INTEGER(iwp) :: xs !<
290
291	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
292	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
293
294	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
295
296
297	!
298	!-- If the PE grid is one-dimensional along y, the array has only to be
299	!-- reordered locally and therefore no transposition has to be done.
300	IF ( pdims(1) /= 1 ) THEN
301
302	#if defined( __parallel )
303	!
304	!-- Reorder input array for transposition
305	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
306	!$OMP DO
307	DO l = 0, pdims(1) - 1
308	xs = 0 + l * nnx
309	DO k = nzb_x, nzt_x
310	DO i = xs, xs + nnx - 1
311	DO j = nys_x, nyn_x
312	work(j,i-xs+1,k,l) = f_in(i,j,k)
313	ENDDO
314	ENDDO
315	ENDDO
316	ENDDO
317	!$OMP END PARALLEL
318
319	!
320	!-- Transpose array
321	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
322	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
323	CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
324	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
325	comm1dx, ierr )
326	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
327	#endif
328
329	ELSE
330
331	!
332	!-- Reorder the array in a way that the z index is in first position
333	!$OMP PARALLEL PRIVATE ( i, j, k )
334	!$OMP DO
335	DO i = nxl, nxr
336	DO j = nys, nyn
337	DO k = 1, nz
338	f_inv(j,i,k) = f_in(i,j,k)
339	ENDDO
340	ENDDO
341	ENDDO
342	!$OMP END PARALLEL
343
344	ENDIF
345
346	END SUBROUTINE transpose_xz
347
348
349	!------------------------------------------------------------------------------!
350	! Description:
351	! ------------
352	!> Resorting data after the transposition from y to x. The transposition itself
353	!> is carried out in transpose_yx
354	!------------------------------------------------------------------------------!
355	SUBROUTINE resort_for_yx( f_inv, f_out )
356
357
358	USE indices, &
359	ONLY: nx
360
361	USE kinds
362
363	USE transpose_indices, &
364	ONLY: nyn_x, nys_x, nzb_x, nzt_x
365
366	IMPLICIT NONE
367
368	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
369	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
370
371
372	INTEGER(iwp) :: i !<
373	INTEGER(iwp) :: j !<
374	INTEGER(iwp) :: k !<
375	!
376	!-- Rearrange indices of input array in order to make data to be send
377	!-- by MPI contiguous
378	!$OMP PARALLEL PRIVATE ( i, j, k )
379	!$OMP DO
380	DO i = 0, nx
381	DO k = nzb_x, nzt_x
382	DO j = nys_x, nyn_x
383	f_out(i,j,k) = f_inv(j,k,i)
384	ENDDO
385	ENDDO
386	ENDDO
387	!$OMP END PARALLEL
388
389	END SUBROUTINE resort_for_yx
390
391
392	!------------------------------------------------------------------------------!
393	! Description:
394	! ------------
395	!> Transposition of input array (f_in) from y to x. For the input array, all
396	!> elements along y reside on the same PE, while after transposition, all
397	!> elements along x reside on the same PE.
398	!------------------------------------------------------------------------------!
399	SUBROUTINE transpose_yx( f_in, f_inv )
400
401
402	USE cpulog, &
403	ONLY: cpu_log, cpu_log_nowait, log_point_s
404
405	USE indices, &
406	ONLY: nx, ny
407
408	USE kinds
409
410	USE pegrid
411
412	USE transpose_indices, &
413	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
414
415	IMPLICIT NONE
416
417	INTEGER(iwp) :: i !<
418	INTEGER(iwp) :: j !<
419	INTEGER(iwp) :: k !<
420	INTEGER(iwp) :: l !<
421	INTEGER(iwp) :: ys !<
422
423	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
424	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
425
426	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
427
428
429	IF ( numprocs /= 1 ) THEN
430
431	#if defined( __parallel )
432	!
433	!-- Reorder input array for transposition
434	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
435	!$OMP DO
436	DO l = 0, pdims(2) - 1
437	ys = 0 + l * ( nyn_x - nys_x + 1 )
438	DO i = nxl_y, nxr_y
439	DO k = nzb_y, nzt_y
440	DO j = ys, ys + nyn_x - nys_x
441	work(j-ys+1,k,i,l) = f_in(j,i,k)
442	ENDDO
443	ENDDO
444	ENDDO
445	ENDDO
446	!$OMP END PARALLEL
447
448	!
449	!-- Transpose array
450	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
451	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
452	CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
453	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
454	comm1dy, ierr )
455	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
456	#endif
457
458	ELSE
459
460	!
461	!-- Reorder array f_in the same way as ALLTOALL did it
462	!$OMP PARALLEL PRIVATE ( i, j, k )
463	!$OMP DO
464	DO i = nxl_y, nxr_y
465	DO k = nzb_y, nzt_y
466	DO j = 0, ny
467	f_inv(j,k,i) = f_in(j,i,k)
468	ENDDO
469	ENDDO
470	ENDDO
471	!$OMP END PARALLEL
472
473	ENDIF
474
475	END SUBROUTINE transpose_yx
476
477
478	!------------------------------------------------------------------------------!
479	! Description:
480	! ------------
481	!> Transposition of input array (f_in) from y to x. For the input array, all
482	!> elements along y reside on the same PE, while after transposition, all
483	!> elements along x reside on the same PE.
484	!> This is a direct transposition for arrays with indices in regular order
485	!> (k,j,i) (cf. transpose_yx).
486	!------------------------------------------------------------------------------!
487	SUBROUTINE transpose_yxd( f_in, f_out )
488
489
490	USE cpulog, &
491	ONLY: cpu_log, log_point_s
492
493	USE indices, &
494	ONLY: nnx, nny, nnz, nx, nxl, nxr, nyn, nys, nz
495
496	USE kinds
497
498	USE pegrid
499
500	USE transpose_indices, &
501	ONLY: nyn_x, nys_x, nzb_x, nzt_x
502
503	IMPLICIT NONE
504
505	INTEGER(iwp) :: i !<
506	INTEGER(iwp) :: j !<
507	INTEGER(iwp) :: k !<
508	INTEGER(iwp) :: l !<
509	INTEGER(iwp) :: m !<
510	INTEGER(iwp) :: xs !<
511
512	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
513	REAL(wp) :: f_inv(nxl:nxr,1:nz,nys:nyn) !<
514	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
515	REAL(wp) :: work(nnxnnynnz) !<
516	#if defined( __parallel )
517
518	!
519	!-- Rearrange indices of input array in order to make data to be send
520	!-- by MPI contiguous
521	DO k = 1, nz
522	DO j = nys, nyn
523	DO i = nxl, nxr
524	f_inv(i,k,j) = f_in(k,j,i)
525	ENDDO
526	ENDDO
527	ENDDO
528
529	!
530	!-- Transpose array
531	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
532	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
533	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
534	work(1), sendrecvcount_xy, MPI_REAL, &
535	comm1dx, ierr )
536	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
537
538	!
539	!-- Reorder transposed array
540	m = 0
541	DO l = 0, pdims(1) - 1
542	xs = 0 + l * nnx
543	DO j = nys_x, nyn_x
544	DO k = 1, nz
545	DO i = xs, xs + nnx - 1
546	m = m + 1
547	f_out(i,j,k) = work(m)
548	ENDDO
549	ENDDO
550	ENDDO
551	ENDDO
552
553	#endif
554
555	END SUBROUTINE transpose_yxd
556
557
558	!------------------------------------------------------------------------------!
559	! Description:
560	! ------------
561	!> Resorting data for the transposition from y to z. The transposition itself
562	!> is carried out in transpose_yz
563	!------------------------------------------------------------------------------!
564	SUBROUTINE resort_for_yz( f_in, f_inv )
565
566
567	USE indices, &
568	ONLY: ny
569
570	USE kinds
571
572	USE transpose_indices, &
573	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
574
575	IMPLICIT NONE
576
577	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
578	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
579
580	INTEGER(iwp) :: i !<
581	INTEGER(iwp) :: j !<
582	INTEGER(iwp) :: k !<
583
584	!
585	!-- Rearrange indices of input array in order to make data to be send
586	!-- by MPI contiguous
587	!$OMP PARALLEL PRIVATE ( i, j, k )
588	!$OMP DO
589	DO j = 0, ny
590	DO k = nzb_y, nzt_y
591	DO i = nxl_y, nxr_y
592	f_inv(i,k,j) = f_in(j,i,k)
593	ENDDO
594	ENDDO
595	ENDDO
596	!$OMP END PARALLEL
597
598	END SUBROUTINE resort_for_yz
599
600
601	!------------------------------------------------------------------------------!
602	! Description:
603	! ------------
604	!> Transposition of input array (f_in) from y to z. For the input array, all
605	!> elements along y reside on the same PE, while after transposition, all
606	!> elements along z reside on the same PE.
607	!------------------------------------------------------------------------------!
608	SUBROUTINE transpose_yz( f_inv, f_out )
609
610
611	USE cpulog, &
612	ONLY: cpu_log, cpu_log_nowait, log_point_s
613
614	USE indices, &
615	ONLY: ny, nz
616
617	USE kinds
618
619	USE pegrid
620
621	USE transpose_indices, &
622	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
623
624	IMPLICIT NONE
625
626	INTEGER(iwp) :: i !<
627	INTEGER(iwp) :: j !<
628	INTEGER(iwp) :: k !<
629	INTEGER(iwp) :: l !<
630	INTEGER(iwp) :: zs !<
631
632	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
633	REAL(wp) :: f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
634
635	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
636
637
638	!
639	!-- If the PE grid is one-dimensional along y, only local reordering
640	!-- of the data is necessary and no transposition has to be done.
641	IF ( pdims(1) == 1 ) THEN
642
643	!$OMP PARALLEL PRIVATE ( i, j, k )
644	!$OMP DO
645	DO j = 0, ny
646	DO k = nzb_y, nzt_y
647	DO i = nxl_y, nxr_y
648	f_out(i,j,k) = f_inv(i,k,j)
649	ENDDO
650	ENDDO
651	ENDDO
652	!$OMP END PARALLEL
653
654	ELSE
655
656	#if defined( __parallel )
657	!
658	!-- Transpose array
659	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
660	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
661	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
662	work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
663	comm1dx, ierr )
664	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
665
666	!
667	!-- Reorder transposed array
668	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
669	!$OMP DO
670	DO l = 0, pdims(1) - 1
671	zs = 1 + l * ( nzt_y - nzb_y + 1 )
672	DO j = nys_z, nyn_z
673	DO k = zs, zs + nzt_y - nzb_y
674	DO i = nxl_z, nxr_z
675	f_out(i,j,k) = work(i,k-zs+1,j,l)
676	ENDDO
677	ENDDO
678	ENDDO
679	ENDDO
680	!$OMP END PARALLEL
681	#endif
682
683	ENDIF
684
685	END SUBROUTINE transpose_yz
686
687
688	!------------------------------------------------------------------------------!
689	! Description:
690	! ------------
691	!> Resorting data for the transposition from z to x. The transposition itself
692	!> is carried out in transpose_zx
693	!------------------------------------------------------------------------------!
694	SUBROUTINE resort_for_zx( f_in, f_inv )
695
696
697	USE indices, &
698	ONLY: nxl, nxr, nyn, nys, nz
699
700	USE kinds
701
702	IMPLICIT NONE
703
704	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
705	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
706
707	INTEGER(iwp) :: i !<
708	INTEGER(iwp) :: j !<
709	INTEGER(iwp) :: k !<
710
711	!
712	!-- Rearrange indices of input array in order to make data to be send
713	!-- by MPI contiguous
714	!$OMP PARALLEL PRIVATE ( i, j, k )
715	!$OMP DO
716	DO k = 1,nz
717	DO i = nxl, nxr
718	DO j = nys, nyn
719	f_inv(j,i,k) = f_in(k,j,i)
720	ENDDO
721	ENDDO
722	ENDDO
723	!$OMP END PARALLEL
724
725	END SUBROUTINE resort_for_zx
726
727
728	!------------------------------------------------------------------------------!
729	! Description:
730	! ------------
731	!> Transposition of input array (f_in) from z to x. For the input array, all
732	!> elements along z reside on the same PE, while after transposition, all
733	!> elements along x reside on the same PE.
734	!------------------------------------------------------------------------------!
735	SUBROUTINE transpose_zx( f_inv, f_out )
736
737
738	USE cpulog, &
739	ONLY: cpu_log, cpu_log_nowait, log_point_s
740
741	USE indices, &
742	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
743
744	USE kinds
745
746	USE pegrid
747
748	USE transpose_indices, &
749	ONLY: nyn_x, nys_x, nzb_x, nzt_x
750
751	IMPLICIT NONE
752
753	INTEGER(iwp) :: i !<
754	INTEGER(iwp) :: j !<
755	INTEGER(iwp) :: k !<
756	INTEGER(iwp) :: l !<
757	INTEGER(iwp) :: xs !<
758
759	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
760	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
761
762	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
763
764
765	!
766	!-- If the PE grid is one-dimensional along y, only local reordering
767	!-- of the data is necessary and no transposition has to be done.
768	IF ( pdims(1) == 1 ) THEN
769
770	!$OMP PARALLEL PRIVATE ( i, j, k )
771	!$OMP DO
772	DO k = 1, nz
773	DO i = nxl, nxr
774	DO j = nys, nyn
775	f_out(i,j,k) = f_inv(j,i,k)
776	ENDDO
777	ENDDO
778	ENDDO
779	!$OMP END PARALLEL
780
781	ELSE
782
783	#if defined( __parallel )
784	!
785	!-- Transpose array
786	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
787	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
788	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
789	work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
790	comm1dx, ierr )
791	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
792
793	!
794	!-- Reorder transposed array
795	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
796	!$OMP DO
797	DO l = 0, pdims(1) - 1
798	xs = 0 + l * nnx
799	DO k = nzb_x, nzt_x
800	DO i = xs, xs + nnx - 1
801	DO j = nys_x, nyn_x
802	f_out(i,j,k) = work(j,i-xs+1,k,l)
803	ENDDO
804	ENDDO
805	ENDDO
806	ENDDO
807	!$OMP END PARALLEL
808	#endif
809
810	ENDIF
811
812	END SUBROUTINE transpose_zx
813
814
815	!------------------------------------------------------------------------------!
816	! Description:
817	! ------------
818	!> Resorting data after the transposition from z to y. The transposition itself
819	!> is carried out in transpose_zy
820	!------------------------------------------------------------------------------!
821	SUBROUTINE resort_for_zy( f_inv, f_out )
822
823
824	USE indices, &
825	ONLY: ny
826
827	USE kinds
828
829	USE transpose_indices, &
830	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
831
832	IMPLICIT NONE
833
834	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
835	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
836
837
838	INTEGER(iwp) :: i !<
839	INTEGER(iwp) :: j !<
840	INTEGER(iwp) :: k !<
841
842	!
843	!-- Rearrange indices of input array in order to make data to be send
844	!-- by MPI contiguous
845	!$OMP PARALLEL PRIVATE ( i, j, k )
846	!$OMP DO
847	DO k = nzb_y, nzt_y
848	DO j = 0, ny
849	DO i = nxl_y, nxr_y
850	f_out(j,i,k) = f_inv(i,k,j)
851	ENDDO
852	ENDDO
853	ENDDO
854	!$OMP END PARALLEL
855
856	END SUBROUTINE resort_for_zy
857
858
859	!------------------------------------------------------------------------------!
860	! Description:cpu_log_nowait
861	! ------------
862	!> Transposition of input array (f_in) from z to y. For the input array, all
863	!> elements along z reside on the same PE, while after transposition, all
864	!> elements along y reside on the same PE.
865	!------------------------------------------------------------------------------!
866	SUBROUTINE transpose_zy( f_in, f_inv )
867
868
869	USE cpulog, &
870	ONLY: cpu_log, cpu_log_nowait, log_point_s
871
872	USE indices, &
873	ONLY: ny, nz
874
875	USE kinds
876
877	USE pegrid
878
879	USE transpose_indices, &
880	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
881
882	IMPLICIT NONE
883
884	INTEGER(iwp) :: i !<
885	INTEGER(iwp) :: j !<
886	INTEGER(iwp) :: k !<
887	INTEGER(iwp) :: l !<
888	INTEGER(iwp) :: zs !<
889
890	REAL(wp) :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
891	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
892
893	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
894
895	!
896	!-- If the PE grid is one-dimensional along y, the array has only to be
897	!-- reordered locally and therefore no transposition has to be done.
898	IF ( pdims(1) /= 1 ) THEN
899
900	#if defined( __parallel )
901	!
902	!-- Reorder input array for transposition
903	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
904	!$OMP DO
905	DO l = 0, pdims(1) - 1
906	zs = 1 + l * ( nzt_y - nzb_y + 1 )
907	DO j = nys_z, nyn_z
908	DO k = zs, zs + nzt_y - nzb_y
909	DO i = nxl_z, nxr_z
910	work(i,k-zs+1,j,l) = f_in(i,j,k)
911	ENDDO
912	ENDDO
913	ENDDO
914	ENDDO
915	!$OMP END PARALLEL
916
917	!
918	!-- Transpose array
919	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
920	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
921	CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
922	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
923	comm1dx, ierr )
924	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
925	#endif
926
927	ELSE
928	!
929	!-- Reorder the array in the same way like ALLTOALL did it
930	!$OMP PARALLEL PRIVATE ( i, j, k )
931	!$OMP DO
932	DO k = nzb_y, nzt_y
933	DO j = 0, ny
934	DO i = nxl_y, nxr_y
935	f_inv(i,k,j) = f_in(i,j,k)
936	ENDDO
937	ENDDO
938	ENDDO
939	!$OMP END PARALLEL
940
941	ENDIF
942
943	END SUBROUTINE transpose_zy
944
945
946	!------------------------------------------------------------------------------!
947	! Description:
948	! ------------
949	!> Transposition of input array (f_in) from z to y. For the input array, all
950	!> elements along z reside on the same PE, while after transposition, all
951	!> elements along y reside on the same PE.
952	!> This is a direct transposition for arrays with indices in regular order
953	!> (k,j,i) (cf. transpose_zy).
954	!------------------------------------------------------------------------------!
955	SUBROUTINE transpose_zyd( f_in, f_out )
956
957
958	USE cpulog, &
959	ONLY: cpu_log, log_point_s
960
961	USE indices, &
962	ONLY: nnx, nny, nnz, nxl, nxr, nyn, nys, ny, nz
963
964	USE kinds
965
966	USE pegrid
967
968	USE transpose_indices, &
969	ONLY: nxl_yd, nxr_yd, nzb_yd, nzt_yd
970
971	IMPLICIT NONE
972
973	INTEGER(iwp) :: i !<
974	INTEGER(iwp) :: j !<
975	INTEGER(iwp) :: k !<
976	INTEGER(iwp) :: l !<
977	INTEGER(iwp) :: m !<
978	INTEGER(iwp) :: ys !<
979
980	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
981	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
982	REAL(wp) :: f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd) !<
983	REAL(wp) :: work(nnxnnynnz) !<
984
985	#if defined( __parallel )
986
987	!
988	!-- Rearrange indices of input array in order to make data to be send
989	!-- by MPI contiguous
990	DO i = nxl, nxr
991	DO j = nys, nyn
992	DO k = 1, nz
993	f_inv(j,i,k) = f_in(k,j,i)
994	ENDDO
995	ENDDO
996	ENDDO
997
998	!
999	!-- Move data to different array, because memory location of work1 is
1000	!-- needed further below (work1 = work2).
1001	!-- If the PE grid is one-dimensional along x, only local reordering
1002	!-- of the data is necessary and no transposition has to be done.
1003	IF ( pdims(2) == 1 ) THEN
1004	DO k = 1, nz
1005	DO i = nxl, nxr
1006	DO j = nys, nyn
1007	f_out(j,i,k) = f_inv(j,i,k)
1008	ENDDO
1009	ENDDO
1010	ENDDO
1011	RETURN
1012	ENDIF
1013
1014	!
1015	!-- Transpose array
1016	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1017	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1018	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
1019	work(1), sendrecvcount_zyd, MPI_REAL, &
1020	comm1dy, ierr )
1021	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1022
1023	!
1024	!-- Reorder transposed array
1025	m = 0
1026	DO l = 0, pdims(2) - 1
1027	ys = 0 + l * nny
1028	DO k = nzb_yd, nzt_yd
1029	DO i = nxl_yd, nxr_yd
1030	DO j = ys, ys + nny - 1
1031	m = m + 1
1032	f_out(j,i,k) = work(m)
1033	ENDDO
1034	ENDDO
1035	ENDDO
1036	ENDDO
1037
1038	#endif
1039
1040	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |