Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 2118

Last change on this file since 2118 was 2118, checked in by raasch, 7 years ago
all OpenACC directives and related parts removed from the code
Property svn:keywords set to `Id`
File size: 29.5 KB

Line
1	!> @file transpose.f90
2	!------------------------------------------------------------------------------!
3	! This file is part of PALM.
4	!
5	! PALM is free software: you can redistribute it and/or modify it under the
6	! terms of the GNU General Public License as published by the Free Software
7	! Foundation, either version 3 of the License, or (at your option) any later
8	! version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2017 Leibniz Universitaet Hannover
18	!------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	! OpenACC directives removed
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: transpose.f90 2118 2017-01-17 16:38:49Z raasch $
27	!
28	! 2000 2016-08-20 18:09:15Z knoop
29	! Forced header and separation lines into 80 columns
30	!
31	! 1682 2015-10-07 23:56:08Z knoop
32	! Code annotations made doxygen readable
33	!
34	! 1324 2014-03-21 09:13:16Z suehring
35	! Bugfix: ONLY statement for module pegrid removed
36	!
37	! 1320 2014-03-20 08:40:49Z raasch
38	! ONLY-attribute added to USE-statements,
39	! kind-parameters added to all INTEGER and REAL declaration statements,
40	! kinds are defined in new module kinds,
41	! old module precision_kind is removed,
42	! revision history before 2012 removed,
43	! comment fields (!:) to be used for variable explanations added to
44	! all variable declaration statements
45	!
46	! 1318 2014-03-17 13:35:16Z raasch
47	! cpu_log_nowait parameter added to cpu measurements of the transpositions
48	! required for solving the Poisson equation (poisfft),
49	! module interfaces removed
50	!
51	! 1257 2013-11-08 15:18:40Z raasch
52	! openacc loop and loop vector clauses removed
53	!
54	! 1216 2013-08-26 09:31:42Z raasch
55	! re-sorting of the transposed / to be transposed arrays moved to separate
56	! routines resort_for_...
57	!
58	! 1111 2013-03-08 23:54:10Z raasch
59	! openACC directives added,
60	! resorting data from/to work changed, work got 4 dimensions instead of 1
61	!
62	! 1106 2013-03-04 05:31:38Z raasch
63	! preprocessor lines rearranged so that routines can also be used in serial
64	! (non-parallel) mode
65	!
66	! 1092 2013-02-02 11:24:22Z raasch
67	! unused variables removed
68	!
69	! 1036 2012-10-22 13:43:42Z raasch
70	! code put under GPL (PALM 3.9)
71	!
72	! 1003 2012-09-14 14:35:53Z raasch
73	! indices nxa, nya, etc. replaced by nx, ny, etc.
74	!
75	! Revision 1.1 1997/07/24 11:25:18 raasch
76	! Initial revision
77	!
78	!------------------------------------------------------------------------------!
79	! Description:
80	! ------------
81	!> Resorting data for the transposition from x to y. The transposition itself
82	!> is carried out in transpose_xy
83	!------------------------------------------------------------------------------!
84	SUBROUTINE resort_for_xy( f_in, f_inv )
85
86
87	USE indices, &
88	ONLY: nx
89
90	USE kinds
91
92	USE transpose_indices, &
93	ONLY: nxl_z, nxr_z, nyn_x, nyn_z, nys_x, nys_z, nzb_x, nzt_x
94
95	IMPLICIT NONE
96
97	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
98	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
99
100
101	INTEGER(iwp) :: i !<
102	INTEGER(iwp) :: j !<
103	INTEGER(iwp) :: k !<
104	!
105	!-- Rearrange indices of input array in order to make data to be send
106	!-- by MPI contiguous
107	!$OMP PARALLEL PRIVATE ( i, j, k )
108	!$OMP DO
109	DO i = 0, nx
110	DO k = nzb_x, nzt_x
111	DO j = nys_x, nyn_x
112	f_inv(j,k,i) = f_in(i,j,k)
113	ENDDO
114	ENDDO
115	ENDDO
116	!$OMP END PARALLEL
117
118	END SUBROUTINE resort_for_xy
119
120
121	!------------------------------------------------------------------------------!
122	! Description:
123	! ------------
124	!> Transposition of input array (f_in) from x to y. For the input array, all
125	!> elements along x reside on the same PE, while after transposition, all
126	!> elements along y reside on the same PE.
127	!------------------------------------------------------------------------------!
128	SUBROUTINE transpose_xy( f_inv, f_out )
129
130
131	USE cpulog, &
132	ONLY: cpu_log, cpu_log_nowait, log_point_s
133
134	USE indices, &
135	ONLY: nx, ny
136
137	USE kinds
138
139	USE pegrid
140
141	USE transpose_indices, &
142	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
143
144	IMPLICIT NONE
145
146	INTEGER(iwp) :: i !<
147	INTEGER(iwp) :: j !<
148	INTEGER(iwp) :: k !<
149	INTEGER(iwp) :: l !<
150	INTEGER(iwp) :: ys !<
151
152	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
153	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
154
155	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
156
157
158	IF ( numprocs /= 1 ) THEN
159
160	#if defined( __parallel )
161	!
162	!-- Transpose array
163	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
164	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
165	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
166	work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
167	comm1dy, ierr )
168	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
169
170	!
171	!-- Reorder transposed array
172	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
173	!$OMP DO
174	DO l = 0, pdims(2) - 1
175	ys = 0 + l * ( nyn_x - nys_x + 1 )
176	DO i = nxl_y, nxr_y
177	DO k = nzb_y, nzt_y
178	DO j = ys, ys + nyn_x - nys_x
179	f_out(j,i,k) = work(j-ys+1,k,i,l)
180	ENDDO
181	ENDDO
182	ENDDO
183	ENDDO
184	!$OMP END PARALLEL
185	#endif
186
187	ELSE
188
189	!
190	!-- Reorder transposed array
191	!$OMP PARALLEL PRIVATE ( i, j, k )
192	!$OMP DO
193	DO k = nzb_y, nzt_y
194	DO i = nxl_y, nxr_y
195	DO j = 0, ny
196	f_out(j,i,k) = f_inv(j,k,i)
197	ENDDO
198	ENDDO
199	ENDDO
200	!$OMP END PARALLEL
201
202	ENDIF
203
204	END SUBROUTINE transpose_xy
205
206
207	!------------------------------------------------------------------------------!
208	! Description:
209	! ------------
210	!> Resorting data after the transposition from x to z. The transposition itself
211	!> is carried out in transpose_xz
212	!------------------------------------------------------------------------------!
213	SUBROUTINE resort_for_xz( f_inv, f_out )
214
215
216	USE indices, &
217	ONLY: nxl, nxr, nyn, nys, nz
218
219	USE kinds
220
221	IMPLICIT NONE
222
223	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
224	REAL(wp) :: f_out(1:nz,nys:nyn,nxl:nxr) !<
225
226	INTEGER(iwp) :: i !<
227	INTEGER(iwp) :: j !<
228	INTEGER(iwp) :: k !<
229	!
230	!-- Rearrange indices of input array in order to make data to be send
231	!-- by MPI contiguous.
232	!-- In case of parallel fft/transposition, scattered store is faster in
233	!-- backward direction!!!
234	!$OMP PARALLEL PRIVATE ( i, j, k )
235	!$OMP DO
236	DO k = 1, nz
237	DO i = nxl, nxr
238	DO j = nys, nyn
239	f_out(k,j,i) = f_inv(j,i,k)
240	ENDDO
241	ENDDO
242	ENDDO
243	!$OMP END PARALLEL
244
245	END SUBROUTINE resort_for_xz
246
247
248	!------------------------------------------------------------------------------!
249	! Description:
250	! ------------
251	!> Transposition of input array (f_in) from x to z. For the input array, all
252	!> elements along x reside on the same PE, while after transposition, all
253	!> elements along z reside on the same PE.
254	!------------------------------------------------------------------------------!
255	SUBROUTINE transpose_xz( f_in, f_inv )
256
257
258	USE cpulog, &
259	ONLY: cpu_log, cpu_log_nowait, log_point_s
260
261	USE indices, &
262	ONLY: nnx, nx, nxl, nxr, ny, nyn, nys, nz
263
264	USE kinds
265
266	USE pegrid
267
268	USE transpose_indices, &
269	ONLY: nyn_x, nys_x, nzb_x, nzt_x
270
271	IMPLICIT NONE
272
273	INTEGER(iwp) :: i !<
274	INTEGER(iwp) :: j !<
275	INTEGER(iwp) :: k !<
276	INTEGER(iwp) :: l !<
277	INTEGER(iwp) :: xs !<
278
279	REAL(wp) :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
280	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
281
282	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
283
284
285	!
286	!-- If the PE grid is one-dimensional along y, the array has only to be
287	!-- reordered locally and therefore no transposition has to be done.
288	IF ( pdims(1) /= 1 ) THEN
289
290	#if defined( __parallel )
291	!
292	!-- Reorder input array for transposition
293	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
294	!$OMP DO
295	DO l = 0, pdims(1) - 1
296	xs = 0 + l * nnx
297	DO k = nzb_x, nzt_x
298	DO i = xs, xs + nnx - 1
299	DO j = nys_x, nyn_x
300	work(j,i-xs+1,k,l) = f_in(i,j,k)
301	ENDDO
302	ENDDO
303	ENDDO
304	ENDDO
305	!$OMP END PARALLEL
306
307	!
308	!-- Transpose array
309	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
310	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
311	CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
312	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
313	comm1dx, ierr )
314	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
315	#endif
316
317	ELSE
318
319	!
320	!-- Reorder the array in a way that the z index is in first position
321	!$OMP PARALLEL PRIVATE ( i, j, k )
322	!$OMP DO
323	DO i = nxl, nxr
324	DO j = nys, nyn
325	DO k = 1, nz
326	f_inv(j,i,k) = f_in(i,j,k)
327	ENDDO
328	ENDDO
329	ENDDO
330	!$OMP END PARALLEL
331
332	ENDIF
333
334	END SUBROUTINE transpose_xz
335
336
337	!------------------------------------------------------------------------------!
338	! Description:
339	! ------------
340	!> Resorting data after the transposition from y to x. The transposition itself
341	!> is carried out in transpose_yx
342	!------------------------------------------------------------------------------!
343	SUBROUTINE resort_for_yx( f_inv, f_out )
344
345
346	USE indices, &
347	ONLY: nx
348
349	USE kinds
350
351	USE transpose_indices, &
352	ONLY: nyn_x, nys_x, nzb_x, nzt_x
353
354	IMPLICIT NONE
355
356	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
357	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
358
359
360	INTEGER(iwp) :: i !<
361	INTEGER(iwp) :: j !<
362	INTEGER(iwp) :: k !<
363	!
364	!-- Rearrange indices of input array in order to make data to be send
365	!-- by MPI contiguous
366	!$OMP PARALLEL PRIVATE ( i, j, k )
367	!$OMP DO
368	DO i = 0, nx
369	DO k = nzb_x, nzt_x
370	DO j = nys_x, nyn_x
371	f_out(i,j,k) = f_inv(j,k,i)
372	ENDDO
373	ENDDO
374	ENDDO
375	!$OMP END PARALLEL
376
377	END SUBROUTINE resort_for_yx
378
379
380	!------------------------------------------------------------------------------!
381	! Description:
382	! ------------
383	!> Transposition of input array (f_in) from y to x. For the input array, all
384	!> elements along y reside on the same PE, while after transposition, all
385	!> elements along x reside on the same PE.
386	!------------------------------------------------------------------------------!
387	SUBROUTINE transpose_yx( f_in, f_inv )
388
389
390	USE cpulog, &
391	ONLY: cpu_log, cpu_log_nowait, log_point_s
392
393	USE indices, &
394	ONLY: nx, ny
395
396	USE kinds
397
398	USE pegrid
399
400	USE transpose_indices, &
401	ONLY: nxl_y, nxr_y, nyn_x, nys_x, nzb_x, nzb_y, nzt_x, nzt_y
402
403	IMPLICIT NONE
404
405	INTEGER(iwp) :: i !<
406	INTEGER(iwp) :: j !<
407	INTEGER(iwp) :: k !<
408	INTEGER(iwp) :: l !<
409	INTEGER(iwp) :: ys !<
410
411	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
412	REAL(wp) :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) !<
413
414	REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work !<
415
416
417	IF ( numprocs /= 1 ) THEN
418
419	#if defined( __parallel )
420	!
421	!-- Reorder input array for transposition
422	!$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
423	!$OMP DO
424	DO l = 0, pdims(2) - 1
425	ys = 0 + l * ( nyn_x - nys_x + 1 )
426	DO i = nxl_y, nxr_y
427	DO k = nzb_y, nzt_y
428	DO j = ys, ys + nyn_x - nys_x
429	work(j-ys+1,k,i,l) = f_in(j,i,k)
430	ENDDO
431	ENDDO
432	ENDDO
433	ENDDO
434	!$OMP END PARALLEL
435
436	!
437	!-- Transpose array
438	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
439	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
440	CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
441	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
442	comm1dy, ierr )
443	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
444	#endif
445
446	ELSE
447
448	!
449	!-- Reorder array f_in the same way as ALLTOALL did it
450	!$OMP PARALLEL PRIVATE ( i, j, k )
451	!$OMP DO
452	DO i = nxl_y, nxr_y
453	DO k = nzb_y, nzt_y
454	DO j = 0, ny
455	f_inv(j,k,i) = f_in(j,i,k)
456	ENDDO
457	ENDDO
458	ENDDO
459	!$OMP END PARALLEL
460
461	ENDIF
462
463	END SUBROUTINE transpose_yx
464
465
466	!------------------------------------------------------------------------------!
467	! Description:
468	! ------------
469	!> Transposition of input array (f_in) from y to x. For the input array, all
470	!> elements along y reside on the same PE, while after transposition, all
471	!> elements along x reside on the same PE.
472	!> This is a direct transposition for arrays with indices in regular order
473	!> (k,j,i) (cf. transpose_yx).
474	!------------------------------------------------------------------------------!
475	SUBROUTINE transpose_yxd( f_in, f_out )
476
477
478	USE cpulog, &
479	ONLY: cpu_log, cpu_log_nowait, log_point_s
480
481	USE indices, &
482	ONLY: nnx, nny, nnz, nx, nxl, nxr, nyn, nys, nz
483
484	USE kinds
485
486	USE pegrid
487
488	USE transpose_indices, &
489	ONLY: nyn_x, nys_x, nzb_x, nzt_x
490
491	IMPLICIT NONE
492
493	INTEGER(iwp) :: i !<
494	INTEGER(iwp) :: j !<
495	INTEGER(iwp) :: k !<
496	INTEGER(iwp) :: l !<
497	INTEGER(iwp) :: m !<
498	INTEGER(iwp) :: xs !<
499
500	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
501	REAL(wp) :: f_inv(nxl:nxr,1:nz,nys:nyn) !<
502	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
503	REAL(wp) :: work(nnxnnynnz) !<
504	#if defined( __parallel )
505
506	!
507	!-- Rearrange indices of input array in order to make data to be send
508	!-- by MPI contiguous
509	DO k = 1, nz
510	DO j = nys, nyn
511	DO i = nxl, nxr
512	f_inv(i,k,j) = f_in(k,j,i)
513	ENDDO
514	ENDDO
515	ENDDO
516
517	!
518	!-- Transpose array
519	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
520	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
521	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
522	work(1), sendrecvcount_xy, MPI_REAL, &
523	comm1dx, ierr )
524	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
525
526	!
527	!-- Reorder transposed array
528	m = 0
529	DO l = 0, pdims(1) - 1
530	xs = 0 + l * nnx
531	DO j = nys_x, nyn_x
532	DO k = 1, nz
533	DO i = xs, xs + nnx - 1
534	m = m + 1
535	f_out(i,j,k) = work(m)
536	ENDDO
537	ENDDO
538	ENDDO
539	ENDDO
540
541	#endif
542
543	END SUBROUTINE transpose_yxd
544
545
546	!------------------------------------------------------------------------------!
547	! Description:
548	! ------------
549	!> Resorting data for the transposition from y to z. The transposition itself
550	!> is carried out in transpose_yz
551	!------------------------------------------------------------------------------!
552	SUBROUTINE resort_for_yz( f_in, f_inv )
553
554
555	USE indices, &
556	ONLY: ny
557
558	USE kinds
559
560	USE transpose_indices, &
561	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
562
563	IMPLICIT NONE
564
565	REAL(wp) :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
566	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
567
568	INTEGER(iwp) :: i !<
569	INTEGER(iwp) :: j !<
570	INTEGER(iwp) :: k !<
571
572	!
573	!-- Rearrange indices of input array in order to make data to be send
574	!-- by MPI contiguous
575	!$OMP PARALLEL PRIVATE ( i, j, k )
576	!$OMP DO
577	DO j = 0, ny
578	DO k = nzb_y, nzt_y
579	DO i = nxl_y, nxr_y
580	f_inv(i,k,j) = f_in(j,i,k)
581	ENDDO
582	ENDDO
583	ENDDO
584	!$OMP END PARALLEL
585
586	END SUBROUTINE resort_for_yz
587
588
589	!------------------------------------------------------------------------------!
590	! Description:
591	! ------------
592	!> Transposition of input array (f_in) from y to z. For the input array, all
593	!> elements along y reside on the same PE, while after transposition, all
594	!> elements along z reside on the same PE.
595	!------------------------------------------------------------------------------!
596	SUBROUTINE transpose_yz( f_inv, f_out )
597
598
599	USE cpulog, &
600	ONLY: cpu_log, cpu_log_nowait, log_point_s
601
602	USE indices, &
603	ONLY: ny, nz
604
605	USE kinds
606
607	USE pegrid
608
609	USE transpose_indices, &
610	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
611
612	IMPLICIT NONE
613
614	INTEGER(iwp) :: i !<
615	INTEGER(iwp) :: j !<
616	INTEGER(iwp) :: k !<
617	INTEGER(iwp) :: l !<
618	INTEGER(iwp) :: zs !<
619
620	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
621	REAL(wp) :: f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
622
623	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
624
625
626	!
627	!-- If the PE grid is one-dimensional along y, only local reordering
628	!-- of the data is necessary and no transposition has to be done.
629	IF ( pdims(1) == 1 ) THEN
630
631	!$OMP PARALLEL PRIVATE ( i, j, k )
632	!$OMP DO
633	DO j = 0, ny
634	DO k = nzb_y, nzt_y
635	DO i = nxl_y, nxr_y
636	f_out(i,j,k) = f_inv(i,k,j)
637	ENDDO
638	ENDDO
639	ENDDO
640	!$OMP END PARALLEL
641
642	ELSE
643
644	#if defined( __parallel )
645	!
646	!-- Transpose array
647	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
648	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
649	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
650	work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
651	comm1dx, ierr )
652	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
653
654	!
655	!-- Reorder transposed array
656	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
657	!$OMP DO
658	DO l = 0, pdims(1) - 1
659	zs = 1 + l * ( nzt_y - nzb_y + 1 )
660	DO j = nys_z, nyn_z
661	DO k = zs, zs + nzt_y - nzb_y
662	DO i = nxl_z, nxr_z
663	f_out(i,j,k) = work(i,k-zs+1,j,l)
664	ENDDO
665	ENDDO
666	ENDDO
667	ENDDO
668	!$OMP END PARALLEL
669	#endif
670
671	ENDIF
672
673	END SUBROUTINE transpose_yz
674
675
676	!------------------------------------------------------------------------------!
677	! Description:
678	! ------------
679	!> Resorting data for the transposition from z to x. The transposition itself
680	!> is carried out in transpose_zx
681	!------------------------------------------------------------------------------!
682	SUBROUTINE resort_for_zx( f_in, f_inv )
683
684
685	USE indices, &
686	ONLY: nxl, nxr, nyn, nys, nz
687
688	USE kinds
689
690	IMPLICIT NONE
691
692	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
693	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
694
695	INTEGER(iwp) :: i !<
696	INTEGER(iwp) :: j !<
697	INTEGER(iwp) :: k !<
698
699	!
700	!-- Rearrange indices of input array in order to make data to be send
701	!-- by MPI contiguous
702	!$OMP PARALLEL PRIVATE ( i, j, k )
703	!$OMP DO
704	DO k = 1,nz
705	DO i = nxl, nxr
706	DO j = nys, nyn
707	f_inv(j,i,k) = f_in(k,j,i)
708	ENDDO
709	ENDDO
710	ENDDO
711	!$OMP END PARALLEL
712
713	END SUBROUTINE resort_for_zx
714
715
716	!------------------------------------------------------------------------------!
717	! Description:
718	! ------------
719	!> Transposition of input array (f_in) from z to x. For the input array, all
720	!> elements along z reside on the same PE, while after transposition, all
721	!> elements along x reside on the same PE.
722	!------------------------------------------------------------------------------!
723	SUBROUTINE transpose_zx( f_inv, f_out )
724
725
726	USE cpulog, &
727	ONLY: cpu_log, cpu_log_nowait, log_point_s
728
729	USE indices, &
730	ONLY: nnx, nx, nxl, nxr, nyn, nys, nz
731
732	USE kinds
733
734	USE pegrid
735
736	USE transpose_indices, &
737	ONLY: nyn_x, nys_x, nzb_x, nzt_x
738
739	IMPLICIT NONE
740
741	INTEGER(iwp) :: i !<
742	INTEGER(iwp) :: j !<
743	INTEGER(iwp) :: k !<
744	INTEGER(iwp) :: l !<
745	INTEGER(iwp) :: xs !<
746
747	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
748	REAL(wp) :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) !<
749
750	REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work !<
751
752
753	!
754	!-- If the PE grid is one-dimensional along y, only local reordering
755	!-- of the data is necessary and no transposition has to be done.
756	IF ( pdims(1) == 1 ) THEN
757
758	!$OMP PARALLEL PRIVATE ( i, j, k )
759	!$OMP DO
760	DO k = 1, nz
761	DO i = nxl, nxr
762	DO j = nys, nyn
763	f_out(i,j,k) = f_inv(j,i,k)
764	ENDDO
765	ENDDO
766	ENDDO
767	!$OMP END PARALLEL
768
769	ELSE
770
771	#if defined( __parallel )
772	!
773	!-- Transpose array
774	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
775	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
776	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
777	work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
778	comm1dx, ierr )
779	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
780
781	!
782	!-- Reorder transposed array
783	!$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
784	!$OMP DO
785	DO l = 0, pdims(1) - 1
786	xs = 0 + l * nnx
787	DO k = nzb_x, nzt_x
788	DO i = xs, xs + nnx - 1
789	DO j = nys_x, nyn_x
790	f_out(i,j,k) = work(j,i-xs+1,k,l)
791	ENDDO
792	ENDDO
793	ENDDO
794	ENDDO
795	!$OMP END PARALLEL
796	#endif
797
798	ENDIF
799
800	END SUBROUTINE transpose_zx
801
802
803	!------------------------------------------------------------------------------!
804	! Description:
805	! ------------
806	!> Resorting data after the transposition from z to y. The transposition itself
807	!> is carried out in transpose_zy
808	!------------------------------------------------------------------------------!
809	SUBROUTINE resort_for_zy( f_inv, f_out )
810
811
812	USE indices, &
813	ONLY: ny
814
815	USE kinds
816
817	USE transpose_indices, &
818	ONLY: nxl_y, nxr_y, nzb_y, nzt_y
819
820	IMPLICIT NONE
821
822	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
823	REAL(wp) :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) !<
824
825
826	INTEGER(iwp) :: i !<
827	INTEGER(iwp) :: j !<
828	INTEGER(iwp) :: k !<
829
830	!
831	!-- Rearrange indices of input array in order to make data to be send
832	!-- by MPI contiguous
833	!$OMP PARALLEL PRIVATE ( i, j, k )
834	!$OMP DO
835	DO k = nzb_y, nzt_y
836	DO j = 0, ny
837	DO i = nxl_y, nxr_y
838	f_out(j,i,k) = f_inv(i,k,j)
839	ENDDO
840	ENDDO
841	ENDDO
842	!$OMP END PARALLEL
843
844	END SUBROUTINE resort_for_zy
845
846
847	!------------------------------------------------------------------------------!
848	! Description:
849	! ------------
850	!> Transposition of input array (f_in) from z to y. For the input array, all
851	!> elements along z reside on the same PE, while after transposition, all
852	!> elements along y reside on the same PE.
853	!------------------------------------------------------------------------------!
854	SUBROUTINE transpose_zy( f_in, f_inv )
855
856
857	USE cpulog, &
858	ONLY: cpu_log, cpu_log_nowait, log_point_s
859
860	USE indices, &
861	ONLY: ny, nz
862
863	USE kinds
864
865	USE pegrid
866
867	USE transpose_indices, &
868	ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nyn_z, nys_z, nzb_y, nzt_y
869
870	IMPLICIT NONE
871
872	INTEGER(iwp) :: i !<
873	INTEGER(iwp) :: j !<
874	INTEGER(iwp) :: k !<
875	INTEGER(iwp) :: l !<
876	INTEGER(iwp) :: zs !<
877
878	REAL(wp) :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz) !<
879	REAL(wp) :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) !<
880
881	REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work !<
882
883	!
884	!-- If the PE grid is one-dimensional along y, the array has only to be
885	!-- reordered locally and therefore no transposition has to be done.
886	IF ( pdims(1) /= 1 ) THEN
887
888	#if defined( __parallel )
889	!
890	!-- Reorder input array for transposition
891	!$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
892	!$OMP DO
893	DO l = 0, pdims(1) - 1
894	zs = 1 + l * ( nzt_y - nzb_y + 1 )
895	DO j = nys_z, nyn_z
896	DO k = zs, zs + nzt_y - nzb_y
897	DO i = nxl_z, nxr_z
898	work(i,k-zs+1,j,l) = f_in(i,j,k)
899	ENDDO
900	ENDDO
901	ENDDO
902	ENDDO
903	!$OMP END PARALLEL
904
905	!
906	!-- Transpose array
907	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
908	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
909	CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
910	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
911	comm1dx, ierr )
912	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
913	#endif
914
915	ELSE
916	!
917	!-- Reorder the array in the same way like ALLTOALL did it
918	!$OMP PARALLEL PRIVATE ( i, j, k )
919	!$OMP DO
920	DO k = nzb_y, nzt_y
921	DO j = 0, ny
922	DO i = nxl_y, nxr_y
923	f_inv(i,k,j) = f_in(i,j,k)
924	ENDDO
925	ENDDO
926	ENDDO
927	!$OMP END PARALLEL
928
929	ENDIF
930
931	END SUBROUTINE transpose_zy
932
933
934	!------------------------------------------------------------------------------!
935	! Description:
936	! ------------
937	!> Transposition of input array (f_in) from z to y. For the input array, all
938	!> elements along z reside on the same PE, while after transposition, all
939	!> elements along y reside on the same PE.
940	!> This is a direct transposition for arrays with indices in regular order
941	!> (k,j,i) (cf. transpose_zy).
942	!------------------------------------------------------------------------------!
943	SUBROUTINE transpose_zyd( f_in, f_out )
944
945
946	USE cpulog, &
947	ONLY: cpu_log, cpu_log_nowait, log_point_s
948
949	USE indices, &
950	ONLY: nnx, nny, nnz, nxl, nxr, nyn, nys, ny, nz
951
952	USE kinds
953
954	USE pegrid
955
956	USE transpose_indices, &
957	ONLY: nxl_y, nxl_yd, nxr_y, nxr_yd, nzb_y, nzb_yd, nzt_y, nzt_yd
958
959	IMPLICIT NONE
960
961	INTEGER(iwp) :: i !<
962	INTEGER(iwp) :: j !<
963	INTEGER(iwp) :: k !<
964	INTEGER(iwp) :: l !<
965	INTEGER(iwp) :: m !<
966	INTEGER(iwp) :: ys !<
967
968	REAL(wp) :: f_in(1:nz,nys:nyn,nxl:nxr) !<
969	REAL(wp) :: f_inv(nys:nyn,nxl:nxr,1:nz) !<
970	REAL(wp) :: f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd) !<
971	REAL(wp) :: work(nnxnnynnz) !<
972
973	#if defined( __parallel )
974
975	!
976	!-- Rearrange indices of input array in order to make data to be send
977	!-- by MPI contiguous
978	DO i = nxl, nxr
979	DO j = nys, nyn
980	DO k = 1, nz
981	f_inv(j,i,k) = f_in(k,j,i)
982	ENDDO
983	ENDDO
984	ENDDO
985
986	!
987	!-- Move data to different array, because memory location of work1 is
988	!-- needed further below (work1 = work2).
989	!-- If the PE grid is one-dimensional along x, only local reordering
990	!-- of the data is necessary and no transposition has to be done.
991	IF ( pdims(2) == 1 ) THEN
992	DO k = 1, nz
993	DO i = nxl, nxr
994	DO j = nys, nyn
995	f_out(j,i,k) = f_inv(j,i,k)
996	ENDDO
997	ENDDO
998	ENDDO
999	RETURN
1000	ENDIF
1001
1002	!
1003	!-- Transpose array
1004	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1005	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1006	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
1007	work(1), sendrecvcount_zyd, MPI_REAL, &
1008	comm1dy, ierr )
1009	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1010
1011	!
1012	!-- Reorder transposed array
1013	m = 0
1014	DO l = 0, pdims(2) - 1
1015	ys = 0 + l * nny
1016	DO k = nzb_yd, nzt_yd
1017	DO i = nxl_yd, nxr_yd
1018	DO j = ys, ys + nny - 1
1019	m = m + 1
1020	f_out(j,i,k) = work(m)
1021	ENDDO
1022	ENDDO
1023	ENDDO
1024	ENDDO
1025
1026	#endif
1027
1028	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |