Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 1036

Last change on this file since 1036 was 1036, checked in by raasch, 12 years ago
code has been put under the GNU General Public License (v3)
Property svn:keywords set to `Id`
File size: 20.6 KB

Line
1	SUBROUTINE transpose_xy( f_in, work, f_out )
2
3	!--------------------------------------------------------------------------------!
4	! This file is part of PALM.
5	!
6	! PALM is free software: you can redistribute it and/or modify it under the terms
7	! of the GNU General Public License as published by the Free Software Foundation,
8	! either version 3 of the License, or (at your option) any later version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2012 Leibniz University Hannover
18	!--------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	!
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: transpose.f90 1036 2012-10-22 13:43:42Z raasch $
27	!
28	! 1003 2012-09-14 14:35:53Z raasch
29	! indices nxa, nya, etc. replaced by nx, ny, etc.
30	!
31	! 683 2011-02-09 14:25:15Z raasch
32	! openMP parallelization of transpositions for 2d-domain-decomposition
33	!
34	! 622 2010-12-10 08:08:13Z raasch
35	! optional barriers included in order to speed up collective operations
36	!
37	! 164 2008-05-15 08:46:15Z raasch
38	! f_inv changed from subroutine argument to automatic array in order to do
39	! re-ordering from f_in to f_inv in one step, one array work is needed instead
40	! of work1 and work2
41	!
42	! February 2007
43	! RCS Log replace by Id keyword, revision history cleaned up
44	!
45	! Revision 1.2 2004/04/30 13:12:17 raasch
46	! Switched from mpi_alltoallv to the simpler mpi_alltoall,
47	! all former transpose-routine files collected in this file, enlarged
48	! transposition arrays introduced
49	!
50	! Revision 1.1 2004/04/30 13:08:16 raasch
51	! Initial revision (collection of former routines transpose_xy, transpose_xz,
52	! transpose_yx, transpose_yz, transpose_zx, transpose_zy)
53	!
54	! Revision 1.1 1997/07/24 11:25:18 raasch
55	! Initial revision
56	!
57	!
58	! Description:
59	! ------------
60	! Transposition of input array (f_in) from x to y. For the input array, all
61	! elements along x reside on the same PE, while after transposition, all
62	! elements along y reside on the same PE.
63	!------------------------------------------------------------------------------!
64
65	USE cpulog
66	USE indices
67	USE interfaces
68	USE pegrid
69	USE transpose_indices
70
71	IMPLICIT NONE
72
73	INTEGER :: i, j, k, l, m, ys
74
75	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
76	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
77	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
78	work(nnxnnynnz)
79
80	#if defined( __parallel )
81
82	!
83	!-- Rearrange indices of input array in order to make data to be send
84	!-- by MPI contiguous
85	!$OMP PARALLEL PRIVATE ( i, j, k )
86	!$OMP DO
87	DO i = 0, nx
88	DO k = nzb_x, nzt_x
89	DO j = nys_x, nyn_x
90	f_inv(j,k,i) = f_in(i,j,k)
91	ENDDO
92	ENDDO
93	ENDDO
94	!$OMP END PARALLEL
95
96	!
97	!-- Transpose array
98	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
99	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
100	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
101	work(1), sendrecvcount_xy, MPI_REAL, &
102	comm1dy, ierr )
103	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
104
105	!
106	!-- Reorder transposed array
107	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
108	!$OMP DO
109	DO l = 0, pdims(2) - 1
110	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
111	( nyn_x - nys_x + 1 )
112	ys = 0 + l * ( nyn_x - nys_x + 1 )
113	DO i = nxl_y, nxr_y
114	DO k = nzb_y, nzt_y
115	DO j = ys, ys + nyn_x - nys_x
116	m = m + 1
117	f_out(j,i,k) = work(m)
118	ENDDO
119	ENDDO
120	ENDDO
121	ENDDO
122	!$OMP END PARALLEL
123
124	#endif
125
126	END SUBROUTINE transpose_xy
127
128
129	SUBROUTINE transpose_xz( f_in, work, f_out )
130
131	!------------------------------------------------------------------------------!
132	! Description:
133	! ------------
134	! Transposition of input array (f_in) from x to z. For the input array, all
135	! elements along x reside on the same PE, while after transposition, all
136	! elements along z reside on the same PE.
137	!------------------------------------------------------------------------------!
138
139	USE cpulog
140	USE indices
141	USE interfaces
142	USE pegrid
143	USE transpose_indices
144
145	IMPLICIT NONE
146
147	INTEGER :: i, j, k, l, m, xs
148
149	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
150	f_inv(nys:nyn,nxl:nxr,1:nz), &
151	f_out(1:nz,nys:nyn,nxl:nxr), &
152	work(nnxnnynnz)
153
154	#if defined( __parallel )
155
156	!
157	!-- If the PE grid is one-dimensional along y, the array has only to be
158	!-- reordered locally and therefore no transposition has to be done.
159	IF ( pdims(1) /= 1 ) THEN
160	!
161	!-- Reorder input array for transposition
162	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
163	!$OMP DO
164	DO l = 0, pdims(1) - 1
165	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
166	xs = 0 + l * nnx
167	DO k = nzb_x, nzt_x
168	DO i = xs, xs + nnx - 1
169	DO j = nys_x, nyn_x
170	m = m + 1
171	work(m) = f_in(i,j,k)
172	ENDDO
173	ENDDO
174	ENDDO
175	ENDDO
176	!$OMP END PARALLEL
177
178	!
179	!-- Transpose array
180	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
181	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
182	CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, &
183	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
184	comm1dx, ierr )
185	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
186
187	!
188	!-- Reorder transposed array in a way that the z index is in first position
189	!$OMP PARALLEL PRIVATE ( i, j, k )
190	!$OMP DO
191	DO k = 1, nz
192	DO i = nxl, nxr
193	DO j = nys, nyn
194	f_out(k,j,i) = f_inv(j,i,k)
195	ENDDO
196	ENDDO
197	ENDDO
198	!$OMP END PARALLEL
199	ELSE
200	!
201	!-- Reorder the array in a way that the z index is in first position
202	!$OMP PARALLEL PRIVATE ( i, j, k )
203	!$OMP DO
204	DO i = nxl, nxr
205	DO j = nys, nyn
206	DO k = 1, nz
207	f_inv(j,i,k) = f_in(i,j,k)
208	ENDDO
209	ENDDO
210	ENDDO
211	!$OMP END PARALLEL
212
213	!$OMP PARALLEL PRIVATE ( i, j, k )
214	!$OMP DO
215	DO k = 1, nz
216	DO i = nxl, nxr
217	DO j = nys, nyn
218	f_out(k,j,i) = f_inv(j,i,k)
219	ENDDO
220	ENDDO
221	ENDDO
222	!$OMP END PARALLEL
223
224	ENDIF
225
226
227	#endif
228
229	END SUBROUTINE transpose_xz
230
231
232	SUBROUTINE transpose_yx( f_in, work, f_out )
233
234	!------------------------------------------------------------------------------!
235	! Description:
236	! ------------
237	! Transposition of input array (f_in) from y to x. For the input array, all
238	! elements along y reside on the same PE, while after transposition, all
239	! elements along x reside on the same PE.
240	!------------------------------------------------------------------------------!
241
242	USE cpulog
243	USE indices
244	USE interfaces
245	USE pegrid
246	USE transpose_indices
247
248	IMPLICIT NONE
249
250	INTEGER :: i, j, k, l, m, ys
251
252	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
253	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
254	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
255	work(nnxnnynnz)
256
257	#if defined( __parallel )
258
259	!
260	!-- Reorder input array for transposition
261	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
262	!$OMP DO
263	DO l = 0, pdims(2) - 1
264	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
265	( nyn_x - nys_x + 1 )
266	ys = 0 + l * ( nyn_x - nys_x + 1 )
267	DO i = nxl_y, nxr_y
268	DO k = nzb_y, nzt_y
269	DO j = ys, ys + nyn_x - nys_x
270	m = m + 1
271	work(m) = f_in(j,i,k)
272	ENDDO
273	ENDDO
274	ENDDO
275	ENDDO
276	!$OMP END PARALLEL
277
278	!
279	!-- Transpose array
280	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
281	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
282	CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, &
283	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
284	comm1dy, ierr )
285	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
286
287	!
288	!-- Reorder transposed array in a way that the x index is in first position
289	!$OMP PARALLEL PRIVATE ( i, j, k )
290	!$OMP DO
291	DO i = 0, nx
292	DO k = nzb_x, nzt_x
293	DO j = nys_x, nyn_x
294	f_out(i,j,k) = f_inv(j,k,i)
295	ENDDO
296	ENDDO
297	ENDDO
298	!$OMP END PARALLEL
299
300	#endif
301
302	END SUBROUTINE transpose_yx
303
304
305	SUBROUTINE transpose_yxd( f_in, work, f_out )
306
307	!------------------------------------------------------------------------------!
308	! Description:
309	! ------------
310	! Transposition of input array (f_in) from y to x. For the input array, all
311	! elements along y reside on the same PE, while after transposition, all
312	! elements along x reside on the same PE.
313	! This is a direct transposition for arrays with indices in regular order
314	! (k,j,i) (cf. transpose_yx).
315	!------------------------------------------------------------------------------!
316
317	USE cpulog
318	USE indices
319	USE interfaces
320	USE pegrid
321	USE transpose_indices
322
323	IMPLICIT NONE
324
325	INTEGER :: i, j, k, l, m, recvcount_yx, sendcount_yx, xs
326
327	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nxl:nxr,1:nz,nys:nyn), &
328	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
329	work(nnxnnynnz)
330
331	#if defined( __parallel )
332
333	!
334	!-- Rearrange indices of input array in order to make data to be send
335	!-- by MPI contiguous
336	DO k = 1, nz
337	DO j = nys, nyn
338	DO i = nxl, nxr
339	f_inv(i,k,j) = f_in(k,j,i)
340	ENDDO
341	ENDDO
342	ENDDO
343
344	!
345	!-- Transpose array
346	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
347	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
348	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
349	work(1), sendrecvcount_xy, MPI_REAL, &
350	comm1dx, ierr )
351	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
352
353	!
354	!-- Reorder transposed array
355	m = 0
356	DO l = 0, pdims(1) - 1
357	xs = 0 + l * nnx
358	DO j = nys_x, nyn_x
359	DO k = 1, nz
360	DO i = xs, xs + nnx - 1
361	m = m + 1
362	f_out(i,j,k) = work(m)
363	ENDDO
364	ENDDO
365	ENDDO
366	ENDDO
367
368	#endif
369
370	END SUBROUTINE transpose_yxd
371
372
373	SUBROUTINE transpose_yz( f_in, work, f_out )
374
375	!------------------------------------------------------------------------------!
376	! Description:
377	! ------------
378	! Transposition of input array (f_in) from y to z. For the input array, all
379	! elements along y reside on the same PE, while after transposition, all
380	! elements along z reside on the same PE.
381	!------------------------------------------------------------------------------!
382
383	USE cpulog
384	USE indices
385	USE interfaces
386	USE pegrid
387	USE transpose_indices
388
389	IMPLICIT NONE
390
391	INTEGER :: i, j, k, l, m, zs
392
393	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
394	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
395	f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
396	work(nnxnnynnz)
397
398	#if defined( __parallel )
399
400	!
401	!-- Rearrange indices of input array in order to make data to be send
402	!-- by MPI contiguous
403	!$OMP PARALLEL PRIVATE ( i, j, k )
404	!$OMP DO
405	DO j = 0, ny
406	DO k = nzb_y, nzt_y
407	DO i = nxl_y, nxr_y
408	f_inv(i,k,j) = f_in(j,i,k)
409	ENDDO
410	ENDDO
411	ENDDO
412	!$OMP END PARALLEL
413
414	!
415	!-- Move data to different array, because memory location of work1 is
416	!-- needed further below (work1 = work2).
417	!-- If the PE grid is one-dimensional along y, only local reordering
418	!-- of the data is necessary and no transposition has to be done.
419	IF ( pdims(1) == 1 ) THEN
420	!$OMP PARALLEL PRIVATE ( i, j, k )
421	!$OMP DO
422	DO j = 0, ny
423	DO k = nzb_y, nzt_y
424	DO i = nxl_y, nxr_y
425	f_out(i,j,k) = f_inv(i,k,j)
426	ENDDO
427	ENDDO
428	ENDDO
429	!$OMP END PARALLEL
430	RETURN
431	ENDIF
432
433	!
434	!-- Transpose array
435	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
436	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
437	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
438	work(1), sendrecvcount_yz, MPI_REAL, &
439	comm1dx, ierr )
440	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
441
442	!
443	!-- Reorder transposed array
444	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
445	!$OMP DO
446	DO l = 0, pdims(1) - 1
447	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
448	( nxr_z - nxl_z + 1 )
449	zs = 1 + l * ( nzt_y - nzb_y + 1 )
450	DO j = nys_z, nyn_z
451	DO k = zs, zs + nzt_y - nzb_y
452	DO i = nxl_z, nxr_z
453	m = m + 1
454	f_out(i,j,k) = work(m)
455	ENDDO
456	ENDDO
457	ENDDO
458	ENDDO
459	!$OMP END PARALLEL
460
461	#endif
462
463	END SUBROUTINE transpose_yz
464
465
466	SUBROUTINE transpose_zx( f_in, work, f_out )
467
468	!------------------------------------------------------------------------------!
469	! Description:
470	! ------------
471	! Transposition of input array (f_in) from z to x. For the input array, all
472	! elements along z reside on the same PE, while after transposition, all
473	! elements along x reside on the same PE.
474	!------------------------------------------------------------------------------!
475
476	USE cpulog
477	USE indices
478	USE interfaces
479	USE pegrid
480	USE transpose_indices
481
482	IMPLICIT NONE
483
484	INTEGER :: i, j, k, l, m, xs
485
486	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
487	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
488	work(nnxnnynnz)
489
490	#if defined( __parallel )
491
492	!
493	!-- Rearrange indices of input array in order to make data to be send
494	!-- by MPI contiguous
495	!$OMP PARALLEL PRIVATE ( i, j, k )
496	!$OMP DO
497	DO k = 1,nz
498	DO i = nxl, nxr
499	DO j = nys, nyn
500	f_inv(j,i,k) = f_in(k,j,i)
501	ENDDO
502	ENDDO
503	ENDDO
504	!$OMP END PARALLEL
505
506	!
507	!-- Move data to different array, because memory location of work1 is
508	!-- needed further below (work1 = work2).
509	!-- If the PE grid is one-dimensional along y, only local reordering
510	!-- of the data is necessary and no transposition has to be done.
511	IF ( pdims(1) == 1 ) THEN
512	!$OMP PARALLEL PRIVATE ( i, j, k )
513	!$OMP DO
514	DO k = 1, nz
515	DO i = nxl, nxr
516	DO j = nys, nyn
517	f_out(i,j,k) = f_inv(j,i,k)
518	ENDDO
519	ENDDO
520	ENDDO
521	!$OMP END PARALLEL
522	RETURN
523	ENDIF
524
525	!
526	!-- Transpose array
527	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
528	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
529	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
530	work(1), sendrecvcount_zx, MPI_REAL, &
531	comm1dx, ierr )
532	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
533
534	!
535	!-- Reorder transposed array
536	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
537	!$OMP DO
538	DO l = 0, pdims(1) - 1
539	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
540	xs = 0 + l * nnx
541	DO k = nzb_x, nzt_x
542	DO i = xs, xs + nnx - 1
543	DO j = nys_x, nyn_x
544	m = m + 1
545	f_out(i,j,k) = work(m)
546	ENDDO
547	ENDDO
548	ENDDO
549	ENDDO
550	!$OMP END PARALLEL
551
552	#endif
553
554	END SUBROUTINE transpose_zx
555
556
557	SUBROUTINE transpose_zy( f_in, work, f_out )
558
559	!------------------------------------------------------------------------------!
560	! Description:
561	! ------------
562	! Transposition of input array (f_in) from z to y. For the input array, all
563	! elements along z reside on the same PE, while after transposition, all
564	! elements along y reside on the same PE.
565	!------------------------------------------------------------------------------!
566
567	USE cpulog
568	USE indices
569	USE interfaces
570	USE pegrid
571	USE transpose_indices
572
573	IMPLICIT NONE
574
575	INTEGER :: i, j, k, l, m, zs
576
577	REAL :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
578	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
579	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
580	work(nnxnnynnz)
581
582	#if defined( __parallel )
583
584	!
585	!-- If the PE grid is one-dimensional along y, the array has only to be
586	!-- reordered locally and therefore no transposition has to be done.
587	IF ( pdims(1) /= 1 ) THEN
588	!
589	!-- Reorder input array for transposition
590	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
591	!$OMP DO
592	DO l = 0, pdims(1) - 1
593	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
594	( nxr_z - nxl_z + 1 )
595	zs = 1 + l * ( nzt_y - nzb_y + 1 )
596	DO j = nys_z, nyn_z
597	DO k = zs, zs + nzt_y - nzb_y
598	DO i = nxl_z, nxr_z
599	m = m + 1
600	work(m) = f_in(i,j,k)
601	ENDDO
602	ENDDO
603	ENDDO
604	ENDDO
605	!$OMP END PARALLEL
606
607	!
608	!-- Transpose array
609	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
610	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
611	CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, &
612	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
613	comm1dx, ierr )
614	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
615
616	!
617	!-- Reorder transposed array in a way that the y index is in first position
618	!$OMP PARALLEL PRIVATE ( i, j, k )
619	!$OMP DO
620	DO j = 0, ny
621	DO k = nzb_y, nzt_y
622	DO i = nxl_y, nxr_y
623	f_out(j,i,k) = f_inv(i,k,j)
624	ENDDO
625	ENDDO
626	ENDDO
627	!$OMP END PARALLEL
628	ELSE
629	!
630	!-- Reorder the array in a way that the y index is in first position
631	!$OMP PARALLEL PRIVATE ( i, j, k )
632	!$OMP DO
633	DO k = nzb_y, nzt_y
634	DO j = 0, ny
635	DO i = nxl_y, nxr_y
636	f_inv(i,k,j) = f_in(i,j,k)
637	ENDDO
638	ENDDO
639	ENDDO
640	!$OMP END PARALLEL
641	!
642	!-- Move data to output array
643	!$OMP PARALLEL PRIVATE ( i, j, k )
644	!$OMP DO
645	DO k = nzb_y, nzt_y
646	DO i = nxl_y, nxr_y
647	DO j = 0, ny
648	f_out(j,i,k) = f_inv(i,k,j)
649	ENDDO
650	ENDDO
651	ENDDO
652	!$OMP END PARALLEL
653
654	ENDIF
655
656	#endif
657
658	END SUBROUTINE transpose_zy
659
660
661	SUBROUTINE transpose_zyd( f_in, work, f_out )
662
663	!------------------------------------------------------------------------------!
664	! Description:
665	! ------------
666	! Transposition of input array (f_in) from z to y. For the input array, all
667	! elements along z reside on the same PE, while after transposition, all
668	! elements along y reside on the same PE.
669	! This is a direct transposition for arrays with indices in regular order
670	! (k,j,i) (cf. transpose_zy).
671	!------------------------------------------------------------------------------!
672
673	USE cpulog
674	USE indices
675	USE interfaces
676	USE pegrid
677	USE transpose_indices
678
679	IMPLICIT NONE
680
681	INTEGER :: i, j, k, l, m, ys
682
683	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
684	f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd), &
685	work(nnxnnynnz)
686
687	#if defined( __parallel )
688
689	!
690	!-- Rearrange indices of input array in order to make data to be send
691	!-- by MPI contiguous
692	DO i = nxl, nxr
693	DO j = nys, nyn
694	DO k = 1, nz
695	f_inv(j,i,k) = f_in(k,j,i)
696	ENDDO
697	ENDDO
698	ENDDO
699
700	!
701	!-- Move data to different array, because memory location of work1 is
702	!-- needed further below (work1 = work2).
703	!-- If the PE grid is one-dimensional along x, only local reordering
704	!-- of the data is necessary and no transposition has to be done.
705	IF ( pdims(2) == 1 ) THEN
706	DO k = 1, nz
707	DO i = nxl, nxr
708	DO j = nys, nyn
709	f_out(j,i,k) = f_inv(j,i,k)
710	ENDDO
711	ENDDO
712	ENDDO
713	RETURN
714	ENDIF
715
716	!
717	!-- Transpose array
718	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
719	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
720	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
721	work(1), sendrecvcount_zyd, MPI_REAL, &
722	comm1dy, ierr )
723	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
724
725	!
726	!-- Reorder transposed array
727	m = 0
728	DO l = 0, pdims(2) - 1
729	ys = 0 + l * nny
730	DO k = nzb_yd, nzt_yd
731	DO i = nxl_yd, nxr_yd
732	DO j = ys, ys + nny - 1
733	m = m + 1
734	f_out(j,i,k) = work(m)
735	ENDDO
736	ENDDO
737	ENDDO
738	ENDDO
739
740	#endif
741
742	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |