Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 1076

Last change on this file since 1076 was 1037, checked in by raasch, 12 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 20.7 KB

Line
1	SUBROUTINE transpose_xy( f_in, work, f_out )
2
3	!--------------------------------------------------------------------------------!
4	! This file is part of PALM.
5	!
6	! PALM is free software: you can redistribute it and/or modify it under the terms
7	! of the GNU General Public License as published by the Free Software Foundation,
8	! either version 3 of the License, or (at your option) any later version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2012 Leibniz University Hannover
18	!--------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	!
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: transpose.f90 1037 2012-10-22 14:10:22Z hoffmann $
27	!
28	! 1036 2012-10-22 13:43:42Z raasch
29	! code put under GPL (PALM 3.9)
30	!
31	! 1003 2012-09-14 14:35:53Z raasch
32	! indices nxa, nya, etc. replaced by nx, ny, etc.
33	!
34	! 683 2011-02-09 14:25:15Z raasch
35	! openMP parallelization of transpositions for 2d-domain-decomposition
36	!
37	! 622 2010-12-10 08:08:13Z raasch
38	! optional barriers included in order to speed up collective operations
39	!
40	! 164 2008-05-15 08:46:15Z raasch
41	! f_inv changed from subroutine argument to automatic array in order to do
42	! re-ordering from f_in to f_inv in one step, one array work is needed instead
43	! of work1 and work2
44	!
45	! February 2007
46	! RCS Log replace by Id keyword, revision history cleaned up
47	!
48	! Revision 1.2 2004/04/30 13:12:17 raasch
49	! Switched from mpi_alltoallv to the simpler mpi_alltoall,
50	! all former transpose-routine files collected in this file, enlarged
51	! transposition arrays introduced
52	!
53	! Revision 1.1 2004/04/30 13:08:16 raasch
54	! Initial revision (collection of former routines transpose_xy, transpose_xz,
55	! transpose_yx, transpose_yz, transpose_zx, transpose_zy)
56	!
57	! Revision 1.1 1997/07/24 11:25:18 raasch
58	! Initial revision
59	!
60	!
61	! Description:
62	! ------------
63	! Transposition of input array (f_in) from x to y. For the input array, all
64	! elements along x reside on the same PE, while after transposition, all
65	! elements along y reside on the same PE.
66	!------------------------------------------------------------------------------!
67
68	USE cpulog
69	USE indices
70	USE interfaces
71	USE pegrid
72	USE transpose_indices
73
74	IMPLICIT NONE
75
76	INTEGER :: i, j, k, l, m, ys
77
78	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
79	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
80	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
81	work(nnxnnynnz)
82
83	#if defined( __parallel )
84
85	!
86	!-- Rearrange indices of input array in order to make data to be send
87	!-- by MPI contiguous
88	!$OMP PARALLEL PRIVATE ( i, j, k )
89	!$OMP DO
90	DO i = 0, nx
91	DO k = nzb_x, nzt_x
92	DO j = nys_x, nyn_x
93	f_inv(j,k,i) = f_in(i,j,k)
94	ENDDO
95	ENDDO
96	ENDDO
97	!$OMP END PARALLEL
98
99	!
100	!-- Transpose array
101	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
102	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
103	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
104	work(1), sendrecvcount_xy, MPI_REAL, &
105	comm1dy, ierr )
106	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
107
108	!
109	!-- Reorder transposed array
110	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
111	!$OMP DO
112	DO l = 0, pdims(2) - 1
113	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
114	( nyn_x - nys_x + 1 )
115	ys = 0 + l * ( nyn_x - nys_x + 1 )
116	DO i = nxl_y, nxr_y
117	DO k = nzb_y, nzt_y
118	DO j = ys, ys + nyn_x - nys_x
119	m = m + 1
120	f_out(j,i,k) = work(m)
121	ENDDO
122	ENDDO
123	ENDDO
124	ENDDO
125	!$OMP END PARALLEL
126
127	#endif
128
129	END SUBROUTINE transpose_xy
130
131
132	SUBROUTINE transpose_xz( f_in, work, f_out )
133
134	!------------------------------------------------------------------------------!
135	! Description:
136	! ------------
137	! Transposition of input array (f_in) from x to z. For the input array, all
138	! elements along x reside on the same PE, while after transposition, all
139	! elements along z reside on the same PE.
140	!------------------------------------------------------------------------------!
141
142	USE cpulog
143	USE indices
144	USE interfaces
145	USE pegrid
146	USE transpose_indices
147
148	IMPLICIT NONE
149
150	INTEGER :: i, j, k, l, m, xs
151
152	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
153	f_inv(nys:nyn,nxl:nxr,1:nz), &
154	f_out(1:nz,nys:nyn,nxl:nxr), &
155	work(nnxnnynnz)
156
157	#if defined( __parallel )
158
159	!
160	!-- If the PE grid is one-dimensional along y, the array has only to be
161	!-- reordered locally and therefore no transposition has to be done.
162	IF ( pdims(1) /= 1 ) THEN
163	!
164	!-- Reorder input array for transposition
165	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
166	!$OMP DO
167	DO l = 0, pdims(1) - 1
168	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
169	xs = 0 + l * nnx
170	DO k = nzb_x, nzt_x
171	DO i = xs, xs + nnx - 1
172	DO j = nys_x, nyn_x
173	m = m + 1
174	work(m) = f_in(i,j,k)
175	ENDDO
176	ENDDO
177	ENDDO
178	ENDDO
179	!$OMP END PARALLEL
180
181	!
182	!-- Transpose array
183	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
184	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
185	CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, &
186	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
187	comm1dx, ierr )
188	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
189
190	!
191	!-- Reorder transposed array in a way that the z index is in first position
192	!$OMP PARALLEL PRIVATE ( i, j, k )
193	!$OMP DO
194	DO k = 1, nz
195	DO i = nxl, nxr
196	DO j = nys, nyn
197	f_out(k,j,i) = f_inv(j,i,k)
198	ENDDO
199	ENDDO
200	ENDDO
201	!$OMP END PARALLEL
202	ELSE
203	!
204	!-- Reorder the array in a way that the z index is in first position
205	!$OMP PARALLEL PRIVATE ( i, j, k )
206	!$OMP DO
207	DO i = nxl, nxr
208	DO j = nys, nyn
209	DO k = 1, nz
210	f_inv(j,i,k) = f_in(i,j,k)
211	ENDDO
212	ENDDO
213	ENDDO
214	!$OMP END PARALLEL
215
216	!$OMP PARALLEL PRIVATE ( i, j, k )
217	!$OMP DO
218	DO k = 1, nz
219	DO i = nxl, nxr
220	DO j = nys, nyn
221	f_out(k,j,i) = f_inv(j,i,k)
222	ENDDO
223	ENDDO
224	ENDDO
225	!$OMP END PARALLEL
226
227	ENDIF
228
229
230	#endif
231
232	END SUBROUTINE transpose_xz
233
234
235	SUBROUTINE transpose_yx( f_in, work, f_out )
236
237	!------------------------------------------------------------------------------!
238	! Description:
239	! ------------
240	! Transposition of input array (f_in) from y to x. For the input array, all
241	! elements along y reside on the same PE, while after transposition, all
242	! elements along x reside on the same PE.
243	!------------------------------------------------------------------------------!
244
245	USE cpulog
246	USE indices
247	USE interfaces
248	USE pegrid
249	USE transpose_indices
250
251	IMPLICIT NONE
252
253	INTEGER :: i, j, k, l, m, ys
254
255	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
256	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
257	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
258	work(nnxnnynnz)
259
260	#if defined( __parallel )
261
262	!
263	!-- Reorder input array for transposition
264	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
265	!$OMP DO
266	DO l = 0, pdims(2) - 1
267	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
268	( nyn_x - nys_x + 1 )
269	ys = 0 + l * ( nyn_x - nys_x + 1 )
270	DO i = nxl_y, nxr_y
271	DO k = nzb_y, nzt_y
272	DO j = ys, ys + nyn_x - nys_x
273	m = m + 1
274	work(m) = f_in(j,i,k)
275	ENDDO
276	ENDDO
277	ENDDO
278	ENDDO
279	!$OMP END PARALLEL
280
281	!
282	!-- Transpose array
283	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
284	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
285	CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, &
286	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
287	comm1dy, ierr )
288	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
289
290	!
291	!-- Reorder transposed array in a way that the x index is in first position
292	!$OMP PARALLEL PRIVATE ( i, j, k )
293	!$OMP DO
294	DO i = 0, nx
295	DO k = nzb_x, nzt_x
296	DO j = nys_x, nyn_x
297	f_out(i,j,k) = f_inv(j,k,i)
298	ENDDO
299	ENDDO
300	ENDDO
301	!$OMP END PARALLEL
302
303	#endif
304
305	END SUBROUTINE transpose_yx
306
307
308	SUBROUTINE transpose_yxd( f_in, work, f_out )
309
310	!------------------------------------------------------------------------------!
311	! Description:
312	! ------------
313	! Transposition of input array (f_in) from y to x. For the input array, all
314	! elements along y reside on the same PE, while after transposition, all
315	! elements along x reside on the same PE.
316	! This is a direct transposition for arrays with indices in regular order
317	! (k,j,i) (cf. transpose_yx).
318	!------------------------------------------------------------------------------!
319
320	USE cpulog
321	USE indices
322	USE interfaces
323	USE pegrid
324	USE transpose_indices
325
326	IMPLICIT NONE
327
328	INTEGER :: i, j, k, l, m, recvcount_yx, sendcount_yx, xs
329
330	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nxl:nxr,1:nz,nys:nyn), &
331	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
332	work(nnxnnynnz)
333
334	#if defined( __parallel )
335
336	!
337	!-- Rearrange indices of input array in order to make data to be send
338	!-- by MPI contiguous
339	DO k = 1, nz
340	DO j = nys, nyn
341	DO i = nxl, nxr
342	f_inv(i,k,j) = f_in(k,j,i)
343	ENDDO
344	ENDDO
345	ENDDO
346
347	!
348	!-- Transpose array
349	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
350	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
351	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
352	work(1), sendrecvcount_xy, MPI_REAL, &
353	comm1dx, ierr )
354	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
355
356	!
357	!-- Reorder transposed array
358	m = 0
359	DO l = 0, pdims(1) - 1
360	xs = 0 + l * nnx
361	DO j = nys_x, nyn_x
362	DO k = 1, nz
363	DO i = xs, xs + nnx - 1
364	m = m + 1
365	f_out(i,j,k) = work(m)
366	ENDDO
367	ENDDO
368	ENDDO
369	ENDDO
370
371	#endif
372
373	END SUBROUTINE transpose_yxd
374
375
376	SUBROUTINE transpose_yz( f_in, work, f_out )
377
378	!------------------------------------------------------------------------------!
379	! Description:
380	! ------------
381	! Transposition of input array (f_in) from y to z. For the input array, all
382	! elements along y reside on the same PE, while after transposition, all
383	! elements along z reside on the same PE.
384	!------------------------------------------------------------------------------!
385
386	USE cpulog
387	USE indices
388	USE interfaces
389	USE pegrid
390	USE transpose_indices
391
392	IMPLICIT NONE
393
394	INTEGER :: i, j, k, l, m, zs
395
396	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
397	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
398	f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
399	work(nnxnnynnz)
400
401	#if defined( __parallel )
402
403	!
404	!-- Rearrange indices of input array in order to make data to be send
405	!-- by MPI contiguous
406	!$OMP PARALLEL PRIVATE ( i, j, k )
407	!$OMP DO
408	DO j = 0, ny
409	DO k = nzb_y, nzt_y
410	DO i = nxl_y, nxr_y
411	f_inv(i,k,j) = f_in(j,i,k)
412	ENDDO
413	ENDDO
414	ENDDO
415	!$OMP END PARALLEL
416
417	!
418	!-- Move data to different array, because memory location of work1 is
419	!-- needed further below (work1 = work2).
420	!-- If the PE grid is one-dimensional along y, only local reordering
421	!-- of the data is necessary and no transposition has to be done.
422	IF ( pdims(1) == 1 ) THEN
423	!$OMP PARALLEL PRIVATE ( i, j, k )
424	!$OMP DO
425	DO j = 0, ny
426	DO k = nzb_y, nzt_y
427	DO i = nxl_y, nxr_y
428	f_out(i,j,k) = f_inv(i,k,j)
429	ENDDO
430	ENDDO
431	ENDDO
432	!$OMP END PARALLEL
433	RETURN
434	ENDIF
435
436	!
437	!-- Transpose array
438	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
439	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
440	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
441	work(1), sendrecvcount_yz, MPI_REAL, &
442	comm1dx, ierr )
443	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
444
445	!
446	!-- Reorder transposed array
447	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
448	!$OMP DO
449	DO l = 0, pdims(1) - 1
450	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
451	( nxr_z - nxl_z + 1 )
452	zs = 1 + l * ( nzt_y - nzb_y + 1 )
453	DO j = nys_z, nyn_z
454	DO k = zs, zs + nzt_y - nzb_y
455	DO i = nxl_z, nxr_z
456	m = m + 1
457	f_out(i,j,k) = work(m)
458	ENDDO
459	ENDDO
460	ENDDO
461	ENDDO
462	!$OMP END PARALLEL
463
464	#endif
465
466	END SUBROUTINE transpose_yz
467
468
469	SUBROUTINE transpose_zx( f_in, work, f_out )
470
471	!------------------------------------------------------------------------------!
472	! Description:
473	! ------------
474	! Transposition of input array (f_in) from z to x. For the input array, all
475	! elements along z reside on the same PE, while after transposition, all
476	! elements along x reside on the same PE.
477	!------------------------------------------------------------------------------!
478
479	USE cpulog
480	USE indices
481	USE interfaces
482	USE pegrid
483	USE transpose_indices
484
485	IMPLICIT NONE
486
487	INTEGER :: i, j, k, l, m, xs
488
489	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
490	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
491	work(nnxnnynnz)
492
493	#if defined( __parallel )
494
495	!
496	!-- Rearrange indices of input array in order to make data to be send
497	!-- by MPI contiguous
498	!$OMP PARALLEL PRIVATE ( i, j, k )
499	!$OMP DO
500	DO k = 1,nz
501	DO i = nxl, nxr
502	DO j = nys, nyn
503	f_inv(j,i,k) = f_in(k,j,i)
504	ENDDO
505	ENDDO
506	ENDDO
507	!$OMP END PARALLEL
508
509	!
510	!-- Move data to different array, because memory location of work1 is
511	!-- needed further below (work1 = work2).
512	!-- If the PE grid is one-dimensional along y, only local reordering
513	!-- of the data is necessary and no transposition has to be done.
514	IF ( pdims(1) == 1 ) THEN
515	!$OMP PARALLEL PRIVATE ( i, j, k )
516	!$OMP DO
517	DO k = 1, nz
518	DO i = nxl, nxr
519	DO j = nys, nyn
520	f_out(i,j,k) = f_inv(j,i,k)
521	ENDDO
522	ENDDO
523	ENDDO
524	!$OMP END PARALLEL
525	RETURN
526	ENDIF
527
528	!
529	!-- Transpose array
530	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
531	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
532	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
533	work(1), sendrecvcount_zx, MPI_REAL, &
534	comm1dx, ierr )
535	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
536
537	!
538	!-- Reorder transposed array
539	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
540	!$OMP DO
541	DO l = 0, pdims(1) - 1
542	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
543	xs = 0 + l * nnx
544	DO k = nzb_x, nzt_x
545	DO i = xs, xs + nnx - 1
546	DO j = nys_x, nyn_x
547	m = m + 1
548	f_out(i,j,k) = work(m)
549	ENDDO
550	ENDDO
551	ENDDO
552	ENDDO
553	!$OMP END PARALLEL
554
555	#endif
556
557	END SUBROUTINE transpose_zx
558
559
560	SUBROUTINE transpose_zy( f_in, work, f_out )
561
562	!------------------------------------------------------------------------------!
563	! Description:
564	! ------------
565	! Transposition of input array (f_in) from z to y. For the input array, all
566	! elements along z reside on the same PE, while after transposition, all
567	! elements along y reside on the same PE.
568	!------------------------------------------------------------------------------!
569
570	USE cpulog
571	USE indices
572	USE interfaces
573	USE pegrid
574	USE transpose_indices
575
576	IMPLICIT NONE
577
578	INTEGER :: i, j, k, l, m, zs
579
580	REAL :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
581	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
582	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
583	work(nnxnnynnz)
584
585	#if defined( __parallel )
586
587	!
588	!-- If the PE grid is one-dimensional along y, the array has only to be
589	!-- reordered locally and therefore no transposition has to be done.
590	IF ( pdims(1) /= 1 ) THEN
591	!
592	!-- Reorder input array for transposition
593	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
594	!$OMP DO
595	DO l = 0, pdims(1) - 1
596	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
597	( nxr_z - nxl_z + 1 )
598	zs = 1 + l * ( nzt_y - nzb_y + 1 )
599	DO j = nys_z, nyn_z
600	DO k = zs, zs + nzt_y - nzb_y
601	DO i = nxl_z, nxr_z
602	m = m + 1
603	work(m) = f_in(i,j,k)
604	ENDDO
605	ENDDO
606	ENDDO
607	ENDDO
608	!$OMP END PARALLEL
609
610	!
611	!-- Transpose array
612	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
613	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
614	CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, &
615	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
616	comm1dx, ierr )
617	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
618
619	!
620	!-- Reorder transposed array in a way that the y index is in first position
621	!$OMP PARALLEL PRIVATE ( i, j, k )
622	!$OMP DO
623	DO j = 0, ny
624	DO k = nzb_y, nzt_y
625	DO i = nxl_y, nxr_y
626	f_out(j,i,k) = f_inv(i,k,j)
627	ENDDO
628	ENDDO
629	ENDDO
630	!$OMP END PARALLEL
631	ELSE
632	!
633	!-- Reorder the array in a way that the y index is in first position
634	!$OMP PARALLEL PRIVATE ( i, j, k )
635	!$OMP DO
636	DO k = nzb_y, nzt_y
637	DO j = 0, ny
638	DO i = nxl_y, nxr_y
639	f_inv(i,k,j) = f_in(i,j,k)
640	ENDDO
641	ENDDO
642	ENDDO
643	!$OMP END PARALLEL
644	!
645	!-- Move data to output array
646	!$OMP PARALLEL PRIVATE ( i, j, k )
647	!$OMP DO
648	DO k = nzb_y, nzt_y
649	DO i = nxl_y, nxr_y
650	DO j = 0, ny
651	f_out(j,i,k) = f_inv(i,k,j)
652	ENDDO
653	ENDDO
654	ENDDO
655	!$OMP END PARALLEL
656
657	ENDIF
658
659	#endif
660
661	END SUBROUTINE transpose_zy
662
663
664	SUBROUTINE transpose_zyd( f_in, work, f_out )
665
666	!------------------------------------------------------------------------------!
667	! Description:
668	! ------------
669	! Transposition of input array (f_in) from z to y. For the input array, all
670	! elements along z reside on the same PE, while after transposition, all
671	! elements along y reside on the same PE.
672	! This is a direct transposition for arrays with indices in regular order
673	! (k,j,i) (cf. transpose_zy).
674	!------------------------------------------------------------------------------!
675
676	USE cpulog
677	USE indices
678	USE interfaces
679	USE pegrid
680	USE transpose_indices
681
682	IMPLICIT NONE
683
684	INTEGER :: i, j, k, l, m, ys
685
686	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
687	f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd), &
688	work(nnxnnynnz)
689
690	#if defined( __parallel )
691
692	!
693	!-- Rearrange indices of input array in order to make data to be send
694	!-- by MPI contiguous
695	DO i = nxl, nxr
696	DO j = nys, nyn
697	DO k = 1, nz
698	f_inv(j,i,k) = f_in(k,j,i)
699	ENDDO
700	ENDDO
701	ENDDO
702
703	!
704	!-- Move data to different array, because memory location of work1 is
705	!-- needed further below (work1 = work2).
706	!-- If the PE grid is one-dimensional along x, only local reordering
707	!-- of the data is necessary and no transposition has to be done.
708	IF ( pdims(2) == 1 ) THEN
709	DO k = 1, nz
710	DO i = nxl, nxr
711	DO j = nys, nyn
712	f_out(j,i,k) = f_inv(j,i,k)
713	ENDDO
714	ENDDO
715	ENDDO
716	RETURN
717	ENDIF
718
719	!
720	!-- Transpose array
721	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
722	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
723	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
724	work(1), sendrecvcount_zyd, MPI_REAL, &
725	comm1dy, ierr )
726	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
727
728	!
729	!-- Reorder transposed array
730	m = 0
731	DO l = 0, pdims(2) - 1
732	ys = 0 + l * nny
733	DO k = nzb_yd, nzt_yd
734	DO i = nxl_yd, nxr_yd
735	DO j = ys, ys + nny - 1
736	m = m + 1
737	f_out(j,i,k) = work(m)
738	ENDDO
739	ENDDO
740	ENDDO
741	ENDDO
742
743	#endif
744
745	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |