Home

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 182

Last change on this file since 182 was 164, checked in by raasch, 17 years ago
optimization of transpositions for 2D decompositions, workaround for using -env option with mpiexec, adjustments for lcxt4
Property svn:keywords set to `Id`
File size: 17.7 KB

Line
1	SUBROUTINE transpose_xy( f_in, work, f_out )
2
3	!------------------------------------------------------------------------------!
4	! Actual revisions:
5	! -----------------
6	! f_inv changed from subroutine argument to automatic array in order to do
7	! re-ordering from f_in to f_inv in one step, one array work is needed instead
8	! of work1 and work2
9	!
10	! Former revisions:
11	! -----------------
12	! $Id: transpose.f90 164 2008-05-15 08:46:15Z raasch $
13	! RCS Log replace by Id keyword, revision history cleaned up
14	!
15	! Revision 1.2 2004/04/30 13:12:17 raasch
16	! Switched from mpi_alltoallv to the simpler mpi_alltoall,
17	! all former transpose-routine files collected in this file, enlarged
18	! transposition arrays introduced
19	!
20	! Revision 1.1 2004/04/30 13:08:16 raasch
21	! Initial revision (collection of former routines transpose_xy, transpose_xz,
22	! transpose_yx, transpose_yz, transpose_zx, transpose_zy)
23	!
24	! Revision 1.1 1997/07/24 11:25:18 raasch
25	! Initial revision
26	!
27	!
28	! Description:
29	! ------------
30	! Transposition of input array (f_in) from x to y. For the input array, all
31	! elements along x reside on the same PE, while after transposition, all
32	! elements along y reside on the same PE.
33	!------------------------------------------------------------------------------!
34
35	USE cpulog
36	USE indices
37	USE interfaces
38	USE pegrid
39	USE transpose_indices
40
41	IMPLICIT NONE
42
43	INTEGER :: i, j, k, l, m, ys
44
45	REAL :: f_in(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
46	f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), &
47	f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
48	work(nnxnnynnz)
49
50	#if defined( __parallel )
51
52	!
53	!-- Rearrange indices of input array in order to make data to be send
54	!-- by MPI contiguous
55	DO i = 0, nxa
56	DO k = nzb_x, nzt_xa
57	DO j = nys_x, nyn_xa
58	f_inv(j,k,i) = f_in(i,j,k)
59	ENDDO
60	ENDDO
61	ENDDO
62
63	!
64	!-- Transpose array
65	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
66	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
67	work(1), sendrecvcount_xy, MPI_REAL, &
68	comm1dy, ierr )
69	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
70
71	!
72	!-- Reorder transposed array
73	m = 0
74	DO l = 0, pdims(2) - 1
75	ys = 0 + l * ( nyn_xa - nys_x + 1 )
76	DO i = nxl_y, nxr_ya
77	DO k = nzb_y, nzt_ya
78	DO j = ys, ys + nyn_xa - nys_x
79	m = m + 1
80	f_out(j,i,k) = work(m)
81	ENDDO
82	ENDDO
83	ENDDO
84	ENDDO
85
86	#endif
87
88	END SUBROUTINE transpose_xy
89
90
91	SUBROUTINE transpose_xz( f_in, work, f_out )
92
93	!------------------------------------------------------------------------------!
94	! Description:
95	! ------------
96	! Transposition of input array (f_in) from x to z. For the input array, all
97	! elements along x reside on the same PE, while after transposition, all
98	! elements along z reside on the same PE.
99	!------------------------------------------------------------------------------!
100
101	USE cpulog
102	USE indices
103	USE interfaces
104	USE pegrid
105	USE transpose_indices
106
107	IMPLICIT NONE
108
109	INTEGER :: i, j, k, l, m, xs
110
111	REAL :: f_in(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
112	f_inv(nys:nyna,nxl:nxra,1:nza), &
113	f_out(1:nza,nys:nyna,nxl:nxra), &
114	work(nnxnnynnz)
115
116	#if defined( __parallel )
117
118	!
119	!-- If the PE grid is one-dimensional along y, the array has only to be
120	!-- reordered locally and therefore no transposition has to be done.
121	IF ( pdims(1) /= 1 ) THEN
122	!
123	!-- Reorder input array for transposition
124	m = 0
125	DO l = 0, pdims(1) - 1
126	xs = 0 + l * nnx
127	DO k = nzb_x, nzt_xa
128	DO i = xs, xs + nnx - 1
129	DO j = nys_x, nyn_xa
130	m = m + 1
131	work(m) = f_in(i,j,k)
132	ENDDO
133	ENDDO
134	ENDDO
135	ENDDO
136
137	!
138	!-- Transpose array
139	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
140	CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, &
141	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
142	comm1dx, ierr )
143	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
144
145	!
146	!-- Reorder transposed array in a way that the z index is in first position
147	DO k = 1, nza
148	DO i = nxl, nxra
149	DO j = nys, nyna
150	f_out(k,j,i) = f_inv(j,i,k)
151	ENDDO
152	ENDDO
153	ENDDO
154	ELSE
155	!
156	!-- Reorder the array in a way that the z index is in first position
157	DO i = nxl, nxra
158	DO j = nys, nyna
159	DO k = 1, nza
160	f_inv(j,i,k) = f_in(i,j,k)
161	ENDDO
162	ENDDO
163	ENDDO
164
165	DO k = 1, nza
166	DO i = nxl, nxra
167	DO j = nys, nyna
168	f_out(k,j,i) = f_inv(j,i,k)
169	ENDDO
170	ENDDO
171	ENDDO
172
173	ENDIF
174
175
176	#endif
177
178	END SUBROUTINE transpose_xz
179
180
181	SUBROUTINE transpose_yx( f_in, work, f_out )
182
183	!------------------------------------------------------------------------------!
184	! Description:
185	! ------------
186	! Transposition of input array (f_in) from y to x. For the input array, all
187	! elements along y reside on the same PE, while after transposition, all
188	! elements along x reside on the same PE.
189	!------------------------------------------------------------------------------!
190
191	USE cpulog
192	USE indices
193	USE interfaces
194	USE pegrid
195	USE transpose_indices
196
197	IMPLICIT NONE
198
199	INTEGER :: i, j, k, l, m, ys
200
201	REAL :: f_in(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
202	f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), &
203	f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
204	work(nnxnnynnz)
205
206	#if defined( __parallel )
207
208	!
209	!-- Reorder input array for transposition
210	m = 0
211	DO l = 0, pdims(2) - 1
212	ys = 0 + l * ( nyn_xa - nys_x + 1 )
213	DO i = nxl_y, nxr_ya
214	DO k = nzb_y, nzt_ya
215	DO j = ys, ys + nyn_xa - nys_x
216	m = m + 1
217	work(m) = f_in(j,i,k)
218	ENDDO
219	ENDDO
220	ENDDO
221	ENDDO
222
223	!
224	!-- Transpose array
225	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
226	CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, &
227	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
228	comm1dy, ierr )
229	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
230
231	!
232	!-- Reorder transposed array in a way that the x index is in first position
233	DO i = 0, nxa
234	DO k = nzb_x, nzt_xa
235	DO j = nys_x, nyn_xa
236	f_out(i,j,k) = f_inv(j,k,i)
237	ENDDO
238	ENDDO
239	ENDDO
240
241	#endif
242
243	END SUBROUTINE transpose_yx
244
245
246	SUBROUTINE transpose_yxd( f_in, work, f_out )
247
248	!------------------------------------------------------------------------------!
249	! Description:
250	! ------------
251	! Transposition of input array (f_in) from y to x. For the input array, all
252	! elements along y reside on the same PE, while after transposition, all
253	! elements along x reside on the same PE.
254	! This is a direct transposition for arrays with indices in regular order
255	! (k,j,i) (cf. transpose_yx).
256	!------------------------------------------------------------------------------!
257
258	USE cpulog
259	USE indices
260	USE interfaces
261	USE pegrid
262	USE transpose_indices
263
264	IMPLICIT NONE
265
266	INTEGER :: i, j, k, l, m, recvcount_yx, sendcount_yx, xs
267
268	REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nxl:nxra,1:nza,nys:nyna), &
269	f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
270	work(nnxnnynnz)
271
272	#if defined( __parallel )
273
274	!
275	!-- Rearrange indices of input array in order to make data to be send
276	!-- by MPI contiguous
277	DO k = 1, nza
278	DO j = nys, nyna
279	DO i = nxl, nxra
280	f_inv(i,k,j) = f_in(k,j,i)
281	ENDDO
282	ENDDO
283	ENDDO
284
285	!
286	!-- Transpose array
287	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
288	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
289	work(1), sendrecvcount_xy, MPI_REAL, &
290	comm1dx, ierr )
291	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
292
293	!
294	!-- Reorder transposed array
295	m = 0
296	DO l = 0, pdims(1) - 1
297	xs = 0 + l * nnx
298	DO j = nys_x, nyn_xa
299	DO k = 1, nza
300	DO i = xs, xs + nnx - 1
301	m = m + 1
302	f_out(i,j,k) = work(m)
303	ENDDO
304	ENDDO
305	ENDDO
306	ENDDO
307
308	#endif
309
310	END SUBROUTINE transpose_yxd
311
312
313	SUBROUTINE transpose_yz( f_in, work, f_out )
314
315	!------------------------------------------------------------------------------!
316	! Description:
317	! ------------
318	! Transposition of input array (f_in) from y to z. For the input array, all
319	! elements along y reside on the same PE, while after transposition, all
320	! elements along z reside on the same PE.
321	!------------------------------------------------------------------------------!
322
323	USE cpulog
324	USE indices
325	USE interfaces
326	USE pegrid
327	USE transpose_indices
328
329	IMPLICIT NONE
330
331	INTEGER :: i, j, k, l, m, zs
332
333	REAL :: f_in(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
334	f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), &
335	f_out(nxl_z:nxr_za,nys_z:nyn_za,1:nza), &
336	work(nnxnnynnz)
337
338	#if defined( __parallel )
339
340	!
341	!-- Rearrange indices of input array in order to make data to be send
342	!-- by MPI contiguous
343	DO j = 0, nya
344	DO k = nzb_y, nzt_ya
345	DO i = nxl_y, nxr_ya
346	f_inv(i,k,j) = f_in(j,i,k)
347	ENDDO
348	ENDDO
349	ENDDO
350
351	!
352	!-- Move data to different array, because memory location of work1 is
353	!-- needed further below (work1 = work2).
354	!-- If the PE grid is one-dimensional along y, only local reordering
355	!-- of the data is necessary and no transposition has to be done.
356	IF ( pdims(1) == 1 ) THEN
357	DO j = 0, nya
358	DO k = nzb_y, nzt_ya
359	DO i = nxl_y, nxr_ya
360	f_out(i,j,k) = f_inv(i,k,j)
361	ENDDO
362	ENDDO
363	ENDDO
364	RETURN
365	ENDIF
366
367	!
368	!-- Transpose array
369	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
370	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
371	work(1), sendrecvcount_yz, MPI_REAL, &
372	comm1dx, ierr )
373	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
374
375	!
376	!-- Reorder transposed array
377	m = 0
378	DO l = 0, pdims(1) - 1
379	zs = 1 + l * ( nzt_ya - nzb_y + 1 )
380	DO j = nys_z, nyn_za
381	DO k = zs, zs + nzt_ya - nzb_y
382	DO i = nxl_z, nxr_za
383	m = m + 1
384	f_out(i,j,k) = work(m)
385	ENDDO
386	ENDDO
387	ENDDO
388	ENDDO
389
390	#endif
391
392	END SUBROUTINE transpose_yz
393
394
395	SUBROUTINE transpose_zx( f_in, work, f_out )
396
397	!------------------------------------------------------------------------------!
398	! Description:
399	! ------------
400	! Transposition of input array (f_in) from z to x. For the input array, all
401	! elements along z reside on the same PE, while after transposition, all
402	! elements along x reside on the same PE.
403	!------------------------------------------------------------------------------!
404
405	USE cpulog
406	USE indices
407	USE interfaces
408	USE pegrid
409	USE transpose_indices
410
411	IMPLICIT NONE
412
413	INTEGER :: i, j, k, l, m, xs
414
415	REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), &
416	f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
417	work(nnxnnynnz)
418
419	#if defined( __parallel )
420
421	!
422	!-- Rearrange indices of input array in order to make data to be send
423	!-- by MPI contiguous
424	DO k = 1,nza
425	DO i = nxl, nxra
426	DO j = nys, nyna
427	f_inv(j,i,k) = f_in(k,j,i)
428	ENDDO
429	ENDDO
430	ENDDO
431
432	!
433	!-- Move data to different array, because memory location of work1 is
434	!-- needed further below (work1 = work2).
435	!-- If the PE grid is one-dimensional along y, only local reordering
436	!-- of the data is necessary and no transposition has to be done.
437	IF ( pdims(1) == 1 ) THEN
438	DO k = 1, nza
439	DO i = nxl, nxra
440	DO j = nys, nyna
441	f_out(i,j,k) = f_inv(j,i,k)
442	ENDDO
443	ENDDO
444	ENDDO
445	RETURN
446	ENDIF
447
448	!
449	!-- Transpose array
450	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
451	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
452	work(1), sendrecvcount_zx, MPI_REAL, &
453	comm1dx, ierr )
454	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
455
456	!
457	!-- Reorder transposed array
458	m = 0
459	DO l = 0, pdims(1) - 1
460	xs = 0 + l * nnx
461	DO k = nzb_x, nzt_xa
462	DO i = xs, xs + nnx - 1
463	DO j = nys_x, nyn_xa
464	m = m + 1
465	f_out(i,j,k) = work(m)
466	ENDDO
467	ENDDO
468	ENDDO
469	ENDDO
470
471	#endif
472
473	END SUBROUTINE transpose_zx
474
475
476	SUBROUTINE transpose_zy( f_in, work, f_out )
477
478	!------------------------------------------------------------------------------!
479	! Description:
480	! ------------
481	! Transposition of input array (f_in) from z to y. For the input array, all
482	! elements along z reside on the same PE, while after transposition, all
483	! elements along y reside on the same PE.
484	!------------------------------------------------------------------------------!
485
486	USE cpulog
487	USE indices
488	USE interfaces
489	USE pegrid
490	USE transpose_indices
491
492	IMPLICIT NONE
493
494	INTEGER :: i, j, k, l, m, zs
495
496	REAL :: f_in(nxl_z:nxr_za,nys_z:nyn_za,1:nza), &
497	f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), &
498	f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
499	work(nnxnnynnz)
500
501	#if defined( __parallel )
502
503	!
504	!-- If the PE grid is one-dimensional along y, the array has only to be
505	!-- reordered locally and therefore no transposition has to be done.
506	IF ( pdims(1) /= 1 ) THEN
507	!
508	!-- Reorder input array for transposition
509	m = 0
510	DO l = 0, pdims(1) - 1
511	zs = 1 + l * ( nzt_ya - nzb_y + 1 )
512	DO j = nys_z, nyn_za
513	DO k = zs, zs + nzt_ya - nzb_y
514	DO i = nxl_z, nxr_za
515	m = m + 1
516	work(m) = f_in(i,j,k)
517	ENDDO
518	ENDDO
519	ENDDO
520	ENDDO
521
522	!
523	!-- Transpose array
524	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
525	CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, &
526	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
527	comm1dx, ierr )
528	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
529
530	!
531	!-- Reorder transposed array in a way that the y index is in first position
532	DO j = 0, nya
533	DO k = nzb_y, nzt_ya
534	DO i = nxl_y, nxr_ya
535	f_out(j,i,k) = f_inv(i,k,j)
536	ENDDO
537	ENDDO
538	ENDDO
539	ELSE
540	!
541	!-- Reorder the array in a way that the y index is in first position
542	DO k = nzb_y, nzt_ya
543	DO j = 0, nya
544	DO i = nxl_y, nxr_ya
545	f_inv(i,k,j) = f_in(i,j,k)
546	ENDDO
547	ENDDO
548	ENDDO
549	!
550	!-- Move data to output array
551	DO k = nzb_y, nzt_ya
552	DO i = nxl_y, nxr_ya
553	DO j = 0, nya
554	f_out(j,i,k) = f_inv(i,k,j)
555	ENDDO
556	ENDDO
557	ENDDO
558
559	ENDIF
560
561	#endif
562
563	END SUBROUTINE transpose_zy
564
565
566	SUBROUTINE transpose_zyd( f_in, work, f_out )
567
568	!------------------------------------------------------------------------------!
569	! Description:
570	! ------------
571	! Transposition of input array (f_in) from z to y. For the input array, all
572	! elements along z reside on the same PE, while after transposition, all
573	! elements along y reside on the same PE.
574	! This is a direct transposition for arrays with indices in regular order
575	! (k,j,i) (cf. transpose_zy).
576	!------------------------------------------------------------------------------!
577
578	USE cpulog
579	USE indices
580	USE interfaces
581	USE pegrid
582	USE transpose_indices
583
584	IMPLICIT NONE
585
586	INTEGER :: i, j, k, l, m, ys
587
588	REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), &
589	f_out(0:nya,nxl_yd:nxr_yda,nzb_yd:nzt_yda), &
590	work(nnxnnynnz)
591
592	#if defined( __parallel )
593
594	!
595	!-- Rearrange indices of input array in order to make data to be send
596	!-- by MPI contiguous
597	DO i = nxl, nxra
598	DO j = nys, nyna
599	DO k = 1, nza
600	f_inv(j,i,k) = f_in(k,j,i)
601	ENDDO
602	ENDDO
603	ENDDO
604
605	!
606	!-- Move data to different array, because memory location of work1 is
607	!-- needed further below (work1 = work2).
608	!-- If the PE grid is one-dimensional along x, only local reordering
609	!-- of the data is necessary and no transposition has to be done.
610	IF ( pdims(2) == 1 ) THEN
611	DO k = 1, nza
612	DO i = nxl, nxra
613	DO j = nys, nyna
614	f_out(j,i,k) = f_inv(j,i,k)
615	ENDDO
616	ENDDO
617	ENDDO
618	RETURN
619	ENDIF
620
621	!
622	!-- Transpose array
623	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
624	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
625	work(1), sendrecvcount_zyd, MPI_REAL, &
626	comm1dy, ierr )
627	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
628
629	!
630	!-- Reorder transposed array
631	m = 0
632	DO l = 0, pdims(2) - 1
633	ys = 0 + l * nny
634	DO k = nzb_yd, nzt_yda
635	DO i = nxl_yd, nxr_yda
636	DO j = ys, ys + nny - 1
637	m = m + 1
638	f_out(j,i,k) = work(m)
639	ENDDO
640	ENDDO
641	ENDDO
642	ENDDO
643
644	#endif
645
646	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |