Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 1094

Last change on this file since 1094 was 1093, checked in by raasch, 11 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 20.7 KB

Line
1	SUBROUTINE transpose_xy( f_in, work, f_out )
2
3	!--------------------------------------------------------------------------------!
4	! This file is part of PALM.
5	!
6	! PALM is free software: you can redistribute it and/or modify it under the terms
7	! of the GNU General Public License as published by the Free Software Foundation,
8	! either version 3 of the License, or (at your option) any later version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2012 Leibniz University Hannover
18	!--------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	!
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: transpose.f90 1093 2013-02-02 12:58:49Z raasch $
27	!
28	! 1092 2013-02-02 11:24:22Z raasch
29	! unused variables removed
30	!
31	! 1036 2012-10-22 13:43:42Z raasch
32	! code put under GPL (PALM 3.9)
33	!
34	! 1003 2012-09-14 14:35:53Z raasch
35	! indices nxa, nya, etc. replaced by nx, ny, etc.
36	!
37	! 683 2011-02-09 14:25:15Z raasch
38	! openMP parallelization of transpositions for 2d-domain-decomposition
39	!
40	! 622 2010-12-10 08:08:13Z raasch
41	! optional barriers included in order to speed up collective operations
42	!
43	! 164 2008-05-15 08:46:15Z raasch
44	! f_inv changed from subroutine argument to automatic array in order to do
45	! re-ordering from f_in to f_inv in one step, one array work is needed instead
46	! of work1 and work2
47	!
48	! February 2007
49	! RCS Log replace by Id keyword, revision history cleaned up
50	!
51	! Revision 1.2 2004/04/30 13:12:17 raasch
52	! Switched from mpi_alltoallv to the simpler mpi_alltoall,
53	! all former transpose-routine files collected in this file, enlarged
54	! transposition arrays introduced
55	!
56	! Revision 1.1 2004/04/30 13:08:16 raasch
57	! Initial revision (collection of former routines transpose_xy, transpose_xz,
58	! transpose_yx, transpose_yz, transpose_zx, transpose_zy)
59	!
60	! Revision 1.1 1997/07/24 11:25:18 raasch
61	! Initial revision
62	!
63	!
64	! Description:
65	! ------------
66	! Transposition of input array (f_in) from x to y. For the input array, all
67	! elements along x reside on the same PE, while after transposition, all
68	! elements along y reside on the same PE.
69	!------------------------------------------------------------------------------!
70
71	USE cpulog
72	USE indices
73	USE interfaces
74	USE pegrid
75	USE transpose_indices
76
77	IMPLICIT NONE
78
79	INTEGER :: i, j, k, l, m, ys
80
81	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
82	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
83	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
84	work(nnxnnynnz)
85
86	#if defined( __parallel )
87
88	!
89	!-- Rearrange indices of input array in order to make data to be send
90	!-- by MPI contiguous
91	!$OMP PARALLEL PRIVATE ( i, j, k )
92	!$OMP DO
93	DO i = 0, nx
94	DO k = nzb_x, nzt_x
95	DO j = nys_x, nyn_x
96	f_inv(j,k,i) = f_in(i,j,k)
97	ENDDO
98	ENDDO
99	ENDDO
100	!$OMP END PARALLEL
101
102	!
103	!-- Transpose array
104	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
105	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
106	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
107	work(1), sendrecvcount_xy, MPI_REAL, &
108	comm1dy, ierr )
109	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
110
111	!
112	!-- Reorder transposed array
113	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
114	!$OMP DO
115	DO l = 0, pdims(2) - 1
116	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
117	( nyn_x - nys_x + 1 )
118	ys = 0 + l * ( nyn_x - nys_x + 1 )
119	DO i = nxl_y, nxr_y
120	DO k = nzb_y, nzt_y
121	DO j = ys, ys + nyn_x - nys_x
122	m = m + 1
123	f_out(j,i,k) = work(m)
124	ENDDO
125	ENDDO
126	ENDDO
127	ENDDO
128	!$OMP END PARALLEL
129
130	#endif
131
132	END SUBROUTINE transpose_xy
133
134
135	SUBROUTINE transpose_xz( f_in, work, f_out )
136
137	!------------------------------------------------------------------------------!
138	! Description:
139	! ------------
140	! Transposition of input array (f_in) from x to z. For the input array, all
141	! elements along x reside on the same PE, while after transposition, all
142	! elements along z reside on the same PE.
143	!------------------------------------------------------------------------------!
144
145	USE cpulog
146	USE indices
147	USE interfaces
148	USE pegrid
149	USE transpose_indices
150
151	IMPLICIT NONE
152
153	INTEGER :: i, j, k, l, m, xs
154
155	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
156	f_inv(nys:nyn,nxl:nxr,1:nz), &
157	f_out(1:nz,nys:nyn,nxl:nxr), &
158	work(nnxnnynnz)
159
160	#if defined( __parallel )
161
162	!
163	!-- If the PE grid is one-dimensional along y, the array has only to be
164	!-- reordered locally and therefore no transposition has to be done.
165	IF ( pdims(1) /= 1 ) THEN
166	!
167	!-- Reorder input array for transposition
168	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
169	!$OMP DO
170	DO l = 0, pdims(1) - 1
171	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
172	xs = 0 + l * nnx
173	DO k = nzb_x, nzt_x
174	DO i = xs, xs + nnx - 1
175	DO j = nys_x, nyn_x
176	m = m + 1
177	work(m) = f_in(i,j,k)
178	ENDDO
179	ENDDO
180	ENDDO
181	ENDDO
182	!$OMP END PARALLEL
183
184	!
185	!-- Transpose array
186	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
187	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
188	CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, &
189	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
190	comm1dx, ierr )
191	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
192
193	!
194	!-- Reorder transposed array in a way that the z index is in first position
195	!$OMP PARALLEL PRIVATE ( i, j, k )
196	!$OMP DO
197	DO k = 1, nz
198	DO i = nxl, nxr
199	DO j = nys, nyn
200	f_out(k,j,i) = f_inv(j,i,k)
201	ENDDO
202	ENDDO
203	ENDDO
204	!$OMP END PARALLEL
205	ELSE
206	!
207	!-- Reorder the array in a way that the z index is in first position
208	!$OMP PARALLEL PRIVATE ( i, j, k )
209	!$OMP DO
210	DO i = nxl, nxr
211	DO j = nys, nyn
212	DO k = 1, nz
213	f_inv(j,i,k) = f_in(i,j,k)
214	ENDDO
215	ENDDO
216	ENDDO
217	!$OMP END PARALLEL
218
219	!$OMP PARALLEL PRIVATE ( i, j, k )
220	!$OMP DO
221	DO k = 1, nz
222	DO i = nxl, nxr
223	DO j = nys, nyn
224	f_out(k,j,i) = f_inv(j,i,k)
225	ENDDO
226	ENDDO
227	ENDDO
228	!$OMP END PARALLEL
229
230	ENDIF
231
232
233	#endif
234
235	END SUBROUTINE transpose_xz
236
237
238	SUBROUTINE transpose_yx( f_in, work, f_out )
239
240	!------------------------------------------------------------------------------!
241	! Description:
242	! ------------
243	! Transposition of input array (f_in) from y to x. For the input array, all
244	! elements along y reside on the same PE, while after transposition, all
245	! elements along x reside on the same PE.
246	!------------------------------------------------------------------------------!
247
248	USE cpulog
249	USE indices
250	USE interfaces
251	USE pegrid
252	USE transpose_indices
253
254	IMPLICIT NONE
255
256	INTEGER :: i, j, k, l, m, ys
257
258	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
259	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
260	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
261	work(nnxnnynnz)
262
263	#if defined( __parallel )
264
265	!
266	!-- Reorder input array for transposition
267	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
268	!$OMP DO
269	DO l = 0, pdims(2) - 1
270	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
271	( nyn_x - nys_x + 1 )
272	ys = 0 + l * ( nyn_x - nys_x + 1 )
273	DO i = nxl_y, nxr_y
274	DO k = nzb_y, nzt_y
275	DO j = ys, ys + nyn_x - nys_x
276	m = m + 1
277	work(m) = f_in(j,i,k)
278	ENDDO
279	ENDDO
280	ENDDO
281	ENDDO
282	!$OMP END PARALLEL
283
284	!
285	!-- Transpose array
286	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
287	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
288	CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, &
289	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
290	comm1dy, ierr )
291	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
292
293	!
294	!-- Reorder transposed array in a way that the x index is in first position
295	!$OMP PARALLEL PRIVATE ( i, j, k )
296	!$OMP DO
297	DO i = 0, nx
298	DO k = nzb_x, nzt_x
299	DO j = nys_x, nyn_x
300	f_out(i,j,k) = f_inv(j,k,i)
301	ENDDO
302	ENDDO
303	ENDDO
304	!$OMP END PARALLEL
305
306	#endif
307
308	END SUBROUTINE transpose_yx
309
310
311	SUBROUTINE transpose_yxd( f_in, work, f_out )
312
313	!------------------------------------------------------------------------------!
314	! Description:
315	! ------------
316	! Transposition of input array (f_in) from y to x. For the input array, all
317	! elements along y reside on the same PE, while after transposition, all
318	! elements along x reside on the same PE.
319	! This is a direct transposition for arrays with indices in regular order
320	! (k,j,i) (cf. transpose_yx).
321	!------------------------------------------------------------------------------!
322
323	USE cpulog
324	USE indices
325	USE interfaces
326	USE pegrid
327	USE transpose_indices
328
329	IMPLICIT NONE
330
331	INTEGER :: i, j, k, l, m, xs
332
333	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nxl:nxr,1:nz,nys:nyn), &
334	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
335	work(nnxnnynnz)
336
337	#if defined( __parallel )
338
339	!
340	!-- Rearrange indices of input array in order to make data to be send
341	!-- by MPI contiguous
342	DO k = 1, nz
343	DO j = nys, nyn
344	DO i = nxl, nxr
345	f_inv(i,k,j) = f_in(k,j,i)
346	ENDDO
347	ENDDO
348	ENDDO
349
350	!
351	!-- Transpose array
352	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
353	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
354	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
355	work(1), sendrecvcount_xy, MPI_REAL, &
356	comm1dx, ierr )
357	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
358
359	!
360	!-- Reorder transposed array
361	m = 0
362	DO l = 0, pdims(1) - 1
363	xs = 0 + l * nnx
364	DO j = nys_x, nyn_x
365	DO k = 1, nz
366	DO i = xs, xs + nnx - 1
367	m = m + 1
368	f_out(i,j,k) = work(m)
369	ENDDO
370	ENDDO
371	ENDDO
372	ENDDO
373
374	#endif
375
376	END SUBROUTINE transpose_yxd
377
378
379	SUBROUTINE transpose_yz( f_in, work, f_out )
380
381	!------------------------------------------------------------------------------!
382	! Description:
383	! ------------
384	! Transposition of input array (f_in) from y to z. For the input array, all
385	! elements along y reside on the same PE, while after transposition, all
386	! elements along z reside on the same PE.
387	!------------------------------------------------------------------------------!
388
389	USE cpulog
390	USE indices
391	USE interfaces
392	USE pegrid
393	USE transpose_indices
394
395	IMPLICIT NONE
396
397	INTEGER :: i, j, k, l, m, zs
398
399	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
400	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
401	f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
402	work(nnxnnynnz)
403
404	#if defined( __parallel )
405
406	!
407	!-- Rearrange indices of input array in order to make data to be send
408	!-- by MPI contiguous
409	!$OMP PARALLEL PRIVATE ( i, j, k )
410	!$OMP DO
411	DO j = 0, ny
412	DO k = nzb_y, nzt_y
413	DO i = nxl_y, nxr_y
414	f_inv(i,k,j) = f_in(j,i,k)
415	ENDDO
416	ENDDO
417	ENDDO
418	!$OMP END PARALLEL
419
420	!
421	!-- Move data to different array, because memory location of work1 is
422	!-- needed further below (work1 = work2).
423	!-- If the PE grid is one-dimensional along y, only local reordering
424	!-- of the data is necessary and no transposition has to be done.
425	IF ( pdims(1) == 1 ) THEN
426	!$OMP PARALLEL PRIVATE ( i, j, k )
427	!$OMP DO
428	DO j = 0, ny
429	DO k = nzb_y, nzt_y
430	DO i = nxl_y, nxr_y
431	f_out(i,j,k) = f_inv(i,k,j)
432	ENDDO
433	ENDDO
434	ENDDO
435	!$OMP END PARALLEL
436	RETURN
437	ENDIF
438
439	!
440	!-- Transpose array
441	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
442	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
443	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
444	work(1), sendrecvcount_yz, MPI_REAL, &
445	comm1dx, ierr )
446	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
447
448	!
449	!-- Reorder transposed array
450	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
451	!$OMP DO
452	DO l = 0, pdims(1) - 1
453	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
454	( nxr_z - nxl_z + 1 )
455	zs = 1 + l * ( nzt_y - nzb_y + 1 )
456	DO j = nys_z, nyn_z
457	DO k = zs, zs + nzt_y - nzb_y
458	DO i = nxl_z, nxr_z
459	m = m + 1
460	f_out(i,j,k) = work(m)
461	ENDDO
462	ENDDO
463	ENDDO
464	ENDDO
465	!$OMP END PARALLEL
466
467	#endif
468
469	END SUBROUTINE transpose_yz
470
471
472	SUBROUTINE transpose_zx( f_in, work, f_out )
473
474	!------------------------------------------------------------------------------!
475	! Description:
476	! ------------
477	! Transposition of input array (f_in) from z to x. For the input array, all
478	! elements along z reside on the same PE, while after transposition, all
479	! elements along x reside on the same PE.
480	!------------------------------------------------------------------------------!
481
482	USE cpulog
483	USE indices
484	USE interfaces
485	USE pegrid
486	USE transpose_indices
487
488	IMPLICIT NONE
489
490	INTEGER :: i, j, k, l, m, xs
491
492	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
493	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
494	work(nnxnnynnz)
495
496	#if defined( __parallel )
497
498	!
499	!-- Rearrange indices of input array in order to make data to be send
500	!-- by MPI contiguous
501	!$OMP PARALLEL PRIVATE ( i, j, k )
502	!$OMP DO
503	DO k = 1,nz
504	DO i = nxl, nxr
505	DO j = nys, nyn
506	f_inv(j,i,k) = f_in(k,j,i)
507	ENDDO
508	ENDDO
509	ENDDO
510	!$OMP END PARALLEL
511
512	!
513	!-- Move data to different array, because memory location of work1 is
514	!-- needed further below (work1 = work2).
515	!-- If the PE grid is one-dimensional along y, only local reordering
516	!-- of the data is necessary and no transposition has to be done.
517	IF ( pdims(1) == 1 ) THEN
518	!$OMP PARALLEL PRIVATE ( i, j, k )
519	!$OMP DO
520	DO k = 1, nz
521	DO i = nxl, nxr
522	DO j = nys, nyn
523	f_out(i,j,k) = f_inv(j,i,k)
524	ENDDO
525	ENDDO
526	ENDDO
527	!$OMP END PARALLEL
528	RETURN
529	ENDIF
530
531	!
532	!-- Transpose array
533	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
534	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
535	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
536	work(1), sendrecvcount_zx, MPI_REAL, &
537	comm1dx, ierr )
538	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
539
540	!
541	!-- Reorder transposed array
542	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
543	!$OMP DO
544	DO l = 0, pdims(1) - 1
545	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
546	xs = 0 + l * nnx
547	DO k = nzb_x, nzt_x
548	DO i = xs, xs + nnx - 1
549	DO j = nys_x, nyn_x
550	m = m + 1
551	f_out(i,j,k) = work(m)
552	ENDDO
553	ENDDO
554	ENDDO
555	ENDDO
556	!$OMP END PARALLEL
557
558	#endif
559
560	END SUBROUTINE transpose_zx
561
562
563	SUBROUTINE transpose_zy( f_in, work, f_out )
564
565	!------------------------------------------------------------------------------!
566	! Description:
567	! ------------
568	! Transposition of input array (f_in) from z to y. For the input array, all
569	! elements along z reside on the same PE, while after transposition, all
570	! elements along y reside on the same PE.
571	!------------------------------------------------------------------------------!
572
573	USE cpulog
574	USE indices
575	USE interfaces
576	USE pegrid
577	USE transpose_indices
578
579	IMPLICIT NONE
580
581	INTEGER :: i, j, k, l, m, zs
582
583	REAL :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
584	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
585	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
586	work(nnxnnynnz)
587
588	#if defined( __parallel )
589
590	!
591	!-- If the PE grid is one-dimensional along y, the array has only to be
592	!-- reordered locally and therefore no transposition has to be done.
593	IF ( pdims(1) /= 1 ) THEN
594	!
595	!-- Reorder input array for transposition
596	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
597	!$OMP DO
598	DO l = 0, pdims(1) - 1
599	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
600	( nxr_z - nxl_z + 1 )
601	zs = 1 + l * ( nzt_y - nzb_y + 1 )
602	DO j = nys_z, nyn_z
603	DO k = zs, zs + nzt_y - nzb_y
604	DO i = nxl_z, nxr_z
605	m = m + 1
606	work(m) = f_in(i,j,k)
607	ENDDO
608	ENDDO
609	ENDDO
610	ENDDO
611	!$OMP END PARALLEL
612
613	!
614	!-- Transpose array
615	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
616	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
617	CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, &
618	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
619	comm1dx, ierr )
620	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
621
622	!
623	!-- Reorder transposed array in a way that the y index is in first position
624	!$OMP PARALLEL PRIVATE ( i, j, k )
625	!$OMP DO
626	DO j = 0, ny
627	DO k = nzb_y, nzt_y
628	DO i = nxl_y, nxr_y
629	f_out(j,i,k) = f_inv(i,k,j)
630	ENDDO
631	ENDDO
632	ENDDO
633	!$OMP END PARALLEL
634	ELSE
635	!
636	!-- Reorder the array in a way that the y index is in first position
637	!$OMP PARALLEL PRIVATE ( i, j, k )
638	!$OMP DO
639	DO k = nzb_y, nzt_y
640	DO j = 0, ny
641	DO i = nxl_y, nxr_y
642	f_inv(i,k,j) = f_in(i,j,k)
643	ENDDO
644	ENDDO
645	ENDDO
646	!$OMP END PARALLEL
647	!
648	!-- Move data to output array
649	!$OMP PARALLEL PRIVATE ( i, j, k )
650	!$OMP DO
651	DO k = nzb_y, nzt_y
652	DO i = nxl_y, nxr_y
653	DO j = 0, ny
654	f_out(j,i,k) = f_inv(i,k,j)
655	ENDDO
656	ENDDO
657	ENDDO
658	!$OMP END PARALLEL
659
660	ENDIF
661
662	#endif
663
664	END SUBROUTINE transpose_zy
665
666
667	SUBROUTINE transpose_zyd( f_in, work, f_out )
668
669	!------------------------------------------------------------------------------!
670	! Description:
671	! ------------
672	! Transposition of input array (f_in) from z to y. For the input array, all
673	! elements along z reside on the same PE, while after transposition, all
674	! elements along y reside on the same PE.
675	! This is a direct transposition for arrays with indices in regular order
676	! (k,j,i) (cf. transpose_zy).
677	!------------------------------------------------------------------------------!
678
679	USE cpulog
680	USE indices
681	USE interfaces
682	USE pegrid
683	USE transpose_indices
684
685	IMPLICIT NONE
686
687	INTEGER :: i, j, k, l, m, ys
688
689	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
690	f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd), &
691	work(nnxnnynnz)
692
693	#if defined( __parallel )
694
695	!
696	!-- Rearrange indices of input array in order to make data to be send
697	!-- by MPI contiguous
698	DO i = nxl, nxr
699	DO j = nys, nyn
700	DO k = 1, nz
701	f_inv(j,i,k) = f_in(k,j,i)
702	ENDDO
703	ENDDO
704	ENDDO
705
706	!
707	!-- Move data to different array, because memory location of work1 is
708	!-- needed further below (work1 = work2).
709	!-- If the PE grid is one-dimensional along x, only local reordering
710	!-- of the data is necessary and no transposition has to be done.
711	IF ( pdims(2) == 1 ) THEN
712	DO k = 1, nz
713	DO i = nxl, nxr
714	DO j = nys, nyn
715	f_out(j,i,k) = f_inv(j,i,k)
716	ENDDO
717	ENDDO
718	ENDDO
719	RETURN
720	ENDIF
721
722	!
723	!-- Transpose array
724	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
725	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
726	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
727	work(1), sendrecvcount_zyd, MPI_REAL, &
728	comm1dy, ierr )
729	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
730
731	!
732	!-- Reorder transposed array
733	m = 0
734	DO l = 0, pdims(2) - 1
735	ys = 0 + l * nny
736	DO k = nzb_yd, nzt_yd
737	DO i = nxl_yd, nxr_yd
738	DO j = ys, ys + nny - 1
739	m = m + 1
740	f_out(j,i,k) = work(m)
741	ENDDO
742	ENDDO
743	ENDDO
744	ENDDO
745
746	#endif
747
748	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |