Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 1108

Last change on this file since 1108 was 1107, checked in by raasch, 12 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 21.6 KB

Line
1	SUBROUTINE transpose_xy( f_in, work, f_out )
2
3	!--------------------------------------------------------------------------------!
4	! This file is part of PALM.
5	!
6	! PALM is free software: you can redistribute it and/or modify it under the terms
7	! of the GNU General Public License as published by the Free Software Foundation,
8	! either version 3 of the License, or (at your option) any later version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2012 Leibniz University Hannover
18	!--------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	!
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: transpose.f90 1107 2013-03-04 06:23:14Z raasch $
27	!
28	! 1106 2013-03-04 05:31:38Z raasch
29	! preprocessor lines rearranged so that routines can also be used in serial
30	! (non-parallel) mode
31	!
32	! 1092 2013-02-02 11:24:22Z raasch
33	! unused variables removed
34	!
35	! 1036 2012-10-22 13:43:42Z raasch
36	! code put under GPL (PALM 3.9)
37	!
38	! 1003 2012-09-14 14:35:53Z raasch
39	! indices nxa, nya, etc. replaced by nx, ny, etc.
40	!
41	! 683 2011-02-09 14:25:15Z raasch
42	! openMP parallelization of transpositions for 2d-domain-decomposition
43	!
44	! 622 2010-12-10 08:08:13Z raasch
45	! optional barriers included in order to speed up collective operations
46	!
47	! 164 2008-05-15 08:46:15Z raasch
48	! f_inv changed from subroutine argument to automatic array in order to do
49	! re-ordering from f_in to f_inv in one step, one array work is needed instead
50	! of work1 and work2
51	!
52	! February 2007
53	! RCS Log replace by Id keyword, revision history cleaned up
54	!
55	! Revision 1.2 2004/04/30 13:12:17 raasch
56	! Switched from mpi_alltoallv to the simpler mpi_alltoall,
57	! all former transpose-routine files collected in this file, enlarged
58	! transposition arrays introduced
59	!
60	! Revision 1.1 2004/04/30 13:08:16 raasch
61	! Initial revision (collection of former routines transpose_xy, transpose_xz,
62	! transpose_yx, transpose_yz, transpose_zx, transpose_zy)
63	!
64	! Revision 1.1 1997/07/24 11:25:18 raasch
65	! Initial revision
66	!
67	!
68	! Description:
69	! ------------
70	! Transposition of input array (f_in) from x to y. For the input array, all
71	! elements along x reside on the same PE, while after transposition, all
72	! elements along y reside on the same PE.
73	!------------------------------------------------------------------------------!
74
75	USE cpulog
76	USE indices
77	USE interfaces
78	USE pegrid
79	USE transpose_indices
80
81	IMPLICIT NONE
82
83	INTEGER :: i, j, k, l, m, ys
84
85	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
86	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
87	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
88	work(nnxnnynnz)
89
90	!
91	!-- Rearrange indices of input array in order to make data to be send
92	!-- by MPI contiguous
93	!$OMP PARALLEL PRIVATE ( i, j, k )
94	!$OMP DO
95	DO i = 0, nx
96	DO k = nzb_x, nzt_x
97	DO j = nys_x, nyn_x
98	f_inv(j,k,i) = f_in(i,j,k)
99	ENDDO
100	ENDDO
101	ENDDO
102	!$OMP END PARALLEL
103
104	IF ( numprocs /= 1 ) THEN
105
106	#if defined( __parallel )
107	!
108	!-- Transpose array
109	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
110	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
111	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
112	work(1), sendrecvcount_xy, MPI_REAL, &
113	comm1dy, ierr )
114	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
115
116	!
117	!-- Reorder transposed array
118	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
119	!$OMP DO
120	DO l = 0, pdims(2) - 1
121	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
122	( nyn_x - nys_x + 1 )
123	ys = 0 + l * ( nyn_x - nys_x + 1 )
124	DO i = nxl_y, nxr_y
125	DO k = nzb_y, nzt_y
126	DO j = ys, ys + nyn_x - nys_x
127	m = m + 1
128	f_out(j,i,k) = work(m)
129	ENDDO
130	ENDDO
131	ENDDO
132	ENDDO
133	!$OMP END PARALLEL
134	#endif
135
136	ELSE
137
138	!
139	!-- Reorder transposed array
140	!$OMP PARALLEL PRIVATE ( i, j, k )
141	!$OMP DO
142	DO k = nzb_y, nzt_y
143	DO i = nxl_y, nxr_y
144	DO j = 0, ny
145	f_out(j,i,k) = f_inv(j,k,i)
146	ENDDO
147	ENDDO
148	ENDDO
149	!$OMP END PARALLEL
150
151	ENDIF
152
153	END SUBROUTINE transpose_xy
154
155
156	SUBROUTINE transpose_xz( f_in, work, f_out )
157
158	!------------------------------------------------------------------------------!
159	! Description:
160	! ------------
161	! Transposition of input array (f_in) from x to z. For the input array, all
162	! elements along x reside on the same PE, while after transposition, all
163	! elements along z reside on the same PE.
164	!------------------------------------------------------------------------------!
165
166	USE cpulog
167	USE indices
168	USE interfaces
169	USE pegrid
170	USE transpose_indices
171
172	IMPLICIT NONE
173
174	INTEGER :: i, j, k, l, m, xs
175
176	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
177	f_inv(nys:nyn,nxl:nxr,1:nz), &
178	f_out(1:nz,nys:nyn,nxl:nxr), &
179	work(nnxnnynnz)
180
181
182	!
183	!-- If the PE grid is one-dimensional along y, the array has only to be
184	!-- reordered locally and therefore no transposition has to be done.
185	IF ( pdims(1) /= 1 ) THEN
186
187	#if defined( __parallel )
188	!
189	!-- Reorder input array for transposition
190	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
191	!$OMP DO
192	DO l = 0, pdims(1) - 1
193	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
194	xs = 0 + l * nnx
195	DO k = nzb_x, nzt_x
196	DO i = xs, xs + nnx - 1
197	DO j = nys_x, nyn_x
198	m = m + 1
199	work(m) = f_in(i,j,k)
200	ENDDO
201	ENDDO
202	ENDDO
203	ENDDO
204	!$OMP END PARALLEL
205
206	!
207	!-- Transpose array
208	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
209	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
210	CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, &
211	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
212	comm1dx, ierr )
213	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
214
215	!
216	!-- Reorder transposed array in a way that the z index is in first position
217	!$OMP PARALLEL PRIVATE ( i, j, k )
218	!$OMP DO
219	DO k = 1, nz
220	DO i = nxl, nxr
221	DO j = nys, nyn
222	f_out(k,j,i) = f_inv(j,i,k)
223	ENDDO
224	ENDDO
225	ENDDO
226	!$OMP END PARALLEL
227	#endif
228
229	ELSE
230
231	!
232	!-- Reorder the array in a way that the z index is in first position
233	!$OMP PARALLEL PRIVATE ( i, j, k )
234	!$OMP DO
235	DO i = nxl, nxr
236	DO j = nys, nyn
237	DO k = 1, nz
238	f_inv(j,i,k) = f_in(i,j,k)
239	ENDDO
240	ENDDO
241	ENDDO
242	!$OMP END PARALLEL
243
244	!$OMP PARALLEL PRIVATE ( i, j, k )
245	!$OMP DO
246	DO k = 1, nz
247	DO i = nxl, nxr
248	DO j = nys, nyn
249	f_out(k,j,i) = f_inv(j,i,k)
250	ENDDO
251	ENDDO
252	ENDDO
253	!$OMP END PARALLEL
254
255	ENDIF
256
257	END SUBROUTINE transpose_xz
258
259
260	SUBROUTINE transpose_yx( f_in, work, f_out )
261
262	!------------------------------------------------------------------------------!
263	! Description:
264	! ------------
265	! Transposition of input array (f_in) from y to x. For the input array, all
266	! elements along y reside on the same PE, while after transposition, all
267	! elements along x reside on the same PE.
268	!------------------------------------------------------------------------------!
269
270	USE cpulog
271	USE indices
272	USE interfaces
273	USE pegrid
274	USE transpose_indices
275
276	IMPLICIT NONE
277
278	INTEGER :: i, j, k, l, m, ys
279
280	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
281	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
282	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
283	work(nnxnnynnz)
284
285	IF ( numprocs /= 1 ) THEN
286
287	#if defined( __parallel )
288	!
289	!-- Reorder input array for transposition
290	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
291	!$OMP DO
292	DO l = 0, pdims(2) - 1
293	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
294	( nyn_x - nys_x + 1 )
295	ys = 0 + l * ( nyn_x - nys_x + 1 )
296	DO i = nxl_y, nxr_y
297	DO k = nzb_y, nzt_y
298	DO j = ys, ys + nyn_x - nys_x
299	m = m + 1
300	work(m) = f_in(j,i,k)
301	ENDDO
302	ENDDO
303	ENDDO
304	ENDDO
305	!$OMP END PARALLEL
306
307	!
308	!-- Transpose array
309	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
310	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
311	CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, &
312	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
313	comm1dy, ierr )
314	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
315	#endif
316
317	ELSE
318
319	!
320	!-- Reorder array f_in the same way as ALLTOALL did it
321	!$OMP PARALLEL PRIVATE ( i, j, k )
322	!$OMP DO
323	DO i = nxl_y, nxr_y
324	DO k = nzb_y, nzt_y
325	DO j = 0, ny
326	f_inv(j,k,i) = f_in(j,i,k)
327	ENDDO
328	ENDDO
329	ENDDO
330	!$OMP END PARALLEL
331
332	ENDIF
333
334	!
335	!-- Reorder transposed array in a way that the x index is in first position
336	!$OMP PARALLEL PRIVATE ( i, j, k )
337	!$OMP DO
338	DO i = 0, nx
339	DO k = nzb_x, nzt_x
340	DO j = nys_x, nyn_x
341	f_out(i,j,k) = f_inv(j,k,i)
342	ENDDO
343	ENDDO
344	ENDDO
345	!$OMP END PARALLEL
346
347	END SUBROUTINE transpose_yx
348
349
350	SUBROUTINE transpose_yxd( f_in, work, f_out )
351
352	!------------------------------------------------------------------------------!
353	! Description:
354	! ------------
355	! Transposition of input array (f_in) from y to x. For the input array, all
356	! elements along y reside on the same PE, while after transposition, all
357	! elements along x reside on the same PE.
358	! This is a direct transposition for arrays with indices in regular order
359	! (k,j,i) (cf. transpose_yx).
360	!------------------------------------------------------------------------------!
361
362	USE cpulog
363	USE indices
364	USE interfaces
365	USE pegrid
366	USE transpose_indices
367
368	IMPLICIT NONE
369
370	INTEGER :: i, j, k, l, m, xs
371
372	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nxl:nxr,1:nz,nys:nyn), &
373	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
374	work(nnxnnynnz)
375
376	#if defined( __parallel )
377
378	!
379	!-- Rearrange indices of input array in order to make data to be send
380	!-- by MPI contiguous
381	DO k = 1, nz
382	DO j = nys, nyn
383	DO i = nxl, nxr
384	f_inv(i,k,j) = f_in(k,j,i)
385	ENDDO
386	ENDDO
387	ENDDO
388
389	!
390	!-- Transpose array
391	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
392	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
393	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
394	work(1), sendrecvcount_xy, MPI_REAL, &
395	comm1dx, ierr )
396	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
397
398	!
399	!-- Reorder transposed array
400	m = 0
401	DO l = 0, pdims(1) - 1
402	xs = 0 + l * nnx
403	DO j = nys_x, nyn_x
404	DO k = 1, nz
405	DO i = xs, xs + nnx - 1
406	m = m + 1
407	f_out(i,j,k) = work(m)
408	ENDDO
409	ENDDO
410	ENDDO
411	ENDDO
412
413	#endif
414
415	END SUBROUTINE transpose_yxd
416
417
418	SUBROUTINE transpose_yz( f_in, work, f_out )
419
420	!------------------------------------------------------------------------------!
421	! Description:
422	! ------------
423	! Transposition of input array (f_in) from y to z. For the input array, all
424	! elements along y reside on the same PE, while after transposition, all
425	! elements along z reside on the same PE.
426	!------------------------------------------------------------------------------!
427
428	USE cpulog
429	USE indices
430	USE interfaces
431	USE pegrid
432	USE transpose_indices
433
434	IMPLICIT NONE
435
436	INTEGER :: i, j, k, l, m, zs
437
438	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
439	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
440	f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
441	work(nnxnnynnz)
442
443	!
444	!-- Rearrange indices of input array in order to make data to be send
445	!-- by MPI contiguous
446	!$OMP PARALLEL PRIVATE ( i, j, k )
447	!$OMP DO
448	DO j = 0, ny
449	DO k = nzb_y, nzt_y
450	DO i = nxl_y, nxr_y
451	f_inv(i,k,j) = f_in(j,i,k)
452	ENDDO
453	ENDDO
454	ENDDO
455	!$OMP END PARALLEL
456
457	!
458	!-- Move data to different array, because memory location of work1 is
459	!-- needed further below (work1 = work2).
460	!-- If the PE grid is one-dimensional along y, only local reordering
461	!-- of the data is necessary and no transposition has to be done.
462	IF ( pdims(1) == 1 ) THEN
463
464	!$OMP PARALLEL PRIVATE ( i, j, k )
465	!$OMP DO
466	DO j = 0, ny
467	DO k = nzb_y, nzt_y
468	DO i = nxl_y, nxr_y
469	f_out(i,j,k) = f_inv(i,k,j)
470	ENDDO
471	ENDDO
472	ENDDO
473	!$OMP END PARALLEL
474
475	ELSE
476
477	#if defined( __parallel )
478	!
479	!-- Transpose array
480	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
481	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
482	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
483	work(1), sendrecvcount_yz, MPI_REAL, &
484	comm1dx, ierr )
485	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
486
487	!
488	!-- Reorder transposed array
489	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
490	!$OMP DO
491	DO l = 0, pdims(1) - 1
492	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
493	( nxr_z - nxl_z + 1 )
494	zs = 1 + l * ( nzt_y - nzb_y + 1 )
495	DO j = nys_z, nyn_z
496	DO k = zs, zs + nzt_y - nzb_y
497	DO i = nxl_z, nxr_z
498	m = m + 1
499	f_out(i,j,k) = work(m)
500	ENDDO
501	ENDDO
502	ENDDO
503	ENDDO
504	!$OMP END PARALLEL
505	#endif
506
507	ENDIF
508
509	END SUBROUTINE transpose_yz
510
511
512	SUBROUTINE transpose_zx( f_in, work, f_out )
513
514	!------------------------------------------------------------------------------!
515	! Description:
516	! ------------
517	! Transposition of input array (f_in) from z to x. For the input array, all
518	! elements along z reside on the same PE, while after transposition, all
519	! elements along x reside on the same PE.
520	!------------------------------------------------------------------------------!
521
522	USE cpulog
523	USE indices
524	USE interfaces
525	USE pegrid
526	USE transpose_indices
527
528	IMPLICIT NONE
529
530	INTEGER :: i, j, k, l, m, xs
531
532	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
533	work(nnxnnynnz)
534
535	!$acc declare create ( f_inv )
536	REAL :: f_inv(nys:nyn,nxl:nxr,1:nz)
537
538
539	!
540	!-- Rearrange indices of input array in order to make data to be send
541	!-- by MPI contiguous
542	!$OMP PARALLEL PRIVATE ( i, j, k )
543	!$OMP DO
544	!$acc kernels present( f_in )
545	!$acc loop
546	DO k = 1,nz
547	DO i = nxl, nxr
548	!$acc loop vector( 32 )
549	DO j = nys, nyn
550	f_inv(j,i,k) = f_in(k,j,i)
551	ENDDO
552	ENDDO
553	ENDDO
554	!$OMP END PARALLEL
555
556	!
557	!-- Move data to different array, because memory location of work1 is
558	!-- needed further below (work1 = work2).
559	!-- If the PE grid is one-dimensional along y, only local reordering
560	!-- of the data is necessary and no transposition has to be done.
561	IF ( pdims(1) == 1 ) THEN
562
563	!$OMP PARALLEL PRIVATE ( i, j, k )
564	!$OMP DO
565	!$acc kernels present( f_out )
566	!$acc loop
567	DO k = 1, nz
568	DO i = nxl, nxr
569	!$acc loop vector( 32 )
570	DO j = nys, nyn
571	f_out(i,j,k) = f_inv(j,i,k)
572	ENDDO
573	ENDDO
574	ENDDO
575	!$OMP END PARALLEL
576
577	ELSE
578
579	#if defined( __parallel )
580	!
581	!-- Transpose array
582	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
583	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
584	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
585	work(1), sendrecvcount_zx, MPI_REAL, &
586	comm1dx, ierr )
587	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
588
589	!
590	!-- Reorder transposed array
591	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
592	!$OMP DO
593	DO l = 0, pdims(1) - 1
594	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
595	xs = 0 + l * nnx
596	DO k = nzb_x, nzt_x
597	DO i = xs, xs + nnx - 1
598	DO j = nys_x, nyn_x
599	m = m + 1
600	f_out(i,j,k) = work(m)
601	ENDDO
602	ENDDO
603	ENDDO
604	ENDDO
605	!$OMP END PARALLEL
606	#endif
607
608	ENDIF
609
610	END SUBROUTINE transpose_zx
611
612
613	SUBROUTINE transpose_zy( f_in, work, f_out )
614
615	!------------------------------------------------------------------------------!
616	! Description:
617	! ------------
618	! Transposition of input array (f_in) from z to y. For the input array, all
619	! elements along z reside on the same PE, while after transposition, all
620	! elements along y reside on the same PE.
621	!------------------------------------------------------------------------------!
622
623	USE cpulog
624	USE indices
625	USE interfaces
626	USE pegrid
627	USE transpose_indices
628
629	IMPLICIT NONE
630
631	INTEGER :: i, j, k, l, m, zs
632
633	REAL :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
634	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
635	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
636	work(nnxnnynnz)
637
638	!
639	!-- If the PE grid is one-dimensional along y, the array has only to be
640	!-- reordered locally and therefore no transposition has to be done.
641	IF ( pdims(1) /= 1 ) THEN
642
643	#if defined( __parallel )
644	!
645	!-- Reorder input array for transposition
646	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
647	!$OMP DO
648	DO l = 0, pdims(1) - 1
649	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
650	( nxr_z - nxl_z + 1 )
651	zs = 1 + l * ( nzt_y - nzb_y + 1 )
652	DO j = nys_z, nyn_z
653	DO k = zs, zs + nzt_y - nzb_y
654	DO i = nxl_z, nxr_z
655	m = m + 1
656	work(m) = f_in(i,j,k)
657	ENDDO
658	ENDDO
659	ENDDO
660	ENDDO
661	!$OMP END PARALLEL
662
663	!
664	!-- Transpose array
665	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
666	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
667	CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, &
668	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
669	comm1dx, ierr )
670	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
671	#endif
672
673	ELSE
674	!
675	!-- Reorder the array in the same way like ALLTOALL did it
676	!$OMP PARALLEL PRIVATE ( i, j, k )
677	!$OMP DO
678	DO k = nzb_y, nzt_y
679	DO j = 0, ny
680	DO i = nxl_y, nxr_y
681	f_inv(i,k,j) = f_in(i,j,k)
682	ENDDO
683	ENDDO
684	ENDDO
685	!$OMP END PARALLEL
686
687	ENDIF
688
689	!
690	!-- Reorder transposed array in a way that the y index is in first position
691	!$OMP PARALLEL PRIVATE ( i, j, k )
692	!$OMP DO
693	DO k = nzb_y, nzt_y
694	DO i = nxl_y, nxr_y
695	DO j = 0, ny
696	f_out(j,i,k) = f_inv(i,k,j)
697	ENDDO
698	ENDDO
699	ENDDO
700	!$OMP END PARALLEL
701
702	END SUBROUTINE transpose_zy
703
704
705	SUBROUTINE transpose_zyd( f_in, work, f_out )
706
707	!------------------------------------------------------------------------------!
708	! Description:
709	! ------------
710	! Transposition of input array (f_in) from z to y. For the input array, all
711	! elements along z reside on the same PE, while after transposition, all
712	! elements along y reside on the same PE.
713	! This is a direct transposition for arrays with indices in regular order
714	! (k,j,i) (cf. transpose_zy).
715	!------------------------------------------------------------------------------!
716
717	USE cpulog
718	USE indices
719	USE interfaces
720	USE pegrid
721	USE transpose_indices
722
723	IMPLICIT NONE
724
725	INTEGER :: i, j, k, l, m, ys
726
727	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
728	f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd), &
729	work(nnxnnynnz)
730
731	#if defined( __parallel )
732
733	!
734	!-- Rearrange indices of input array in order to make data to be send
735	!-- by MPI contiguous
736	DO i = nxl, nxr
737	DO j = nys, nyn
738	DO k = 1, nz
739	f_inv(j,i,k) = f_in(k,j,i)
740	ENDDO
741	ENDDO
742	ENDDO
743
744	!
745	!-- Move data to different array, because memory location of work1 is
746	!-- needed further below (work1 = work2).
747	!-- If the PE grid is one-dimensional along x, only local reordering
748	!-- of the data is necessary and no transposition has to be done.
749	IF ( pdims(2) == 1 ) THEN
750	DO k = 1, nz
751	DO i = nxl, nxr
752	DO j = nys, nyn
753	f_out(j,i,k) = f_inv(j,i,k)
754	ENDDO
755	ENDDO
756	ENDDO
757	RETURN
758	ENDIF
759
760	!
761	!-- Transpose array
762	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
763	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
764	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
765	work(1), sendrecvcount_zyd, MPI_REAL, &
766	comm1dy, ierr )
767	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
768
769	!
770	!-- Reorder transposed array
771	m = 0
772	DO l = 0, pdims(2) - 1
773	ys = 0 + l * nny
774	DO k = nzb_yd, nzt_yd
775	DO i = nxl_yd, nxr_yd
776	DO j = ys, ys + nny - 1
777	m = m + 1
778	f_out(j,i,k) = work(m)
779	ENDDO
780	ENDDO
781	ENDDO
782	ENDDO
783
784	#endif
785
786	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |