Home

Context Navigation

transpose.f90 @ 1106

Last change on this file since 1106 was 1106, checked in by raasch, 11 years ago

New:
---

Porting of FFT-solver for serial runs to GPU using CUDA FFT,
preprocessor lines in transpose routines rearranged, so that routines can also
be used in serial (non-parallel) mode,
transpositions also carried out in serial mode, routines fftx, fftxp replaced
by calls of fft_x, fft_x replaced by fft_x_1d in the 1D-decomposition routines
(Makefile, Makefile_check, fft_xy, poisfft, poisfft_hybrid, transpose, new: cuda_fft_interfaces)

--stdin argument for mpiexec on lckyuh, -y and -Y settings output to header (mrun)

Changed:

Module array_kind renamed precision_kind
(check_open, data_output_3d, fft_xy, modules, user_data_output_3d)

some format changes for coupled atmosphere-ocean runs (header)
small changes in code formatting (microphysics, prognostic_equations)

Errors:

bugfix: default value (0) assigned to coupling_start_time (modules)
bugfix: initial time for preruns of coupled runs is output as -coupling_start_time (data_output_profiles)

Property svn:keywords set to Id

File size: 21.6 KB

Line
1	SUBROUTINE transpose_xy( f_in, work, f_out )
2
3	!--------------------------------------------------------------------------------!
4	! This file is part of PALM.
5	!
6	! PALM is free software: you can redistribute it and/or modify it under the terms
7	! of the GNU General Public License as published by the Free Software Foundation,
8	! either version 3 of the License, or (at your option) any later version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2012 Leibniz University Hannover
18	!--------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	! preprocessor lines rearranged so that routines can also be used in serial
23	! (non-parallel) mode
24	!
25	! Former revisions:
26	! -----------------
27	! $Id: transpose.f90 1106 2013-03-04 05:31:38Z raasch $
28	!
29	! 1092 2013-02-02 11:24:22Z raasch
30	! unused variables removed
31	!
32	! 1036 2012-10-22 13:43:42Z raasch
33	! code put under GPL (PALM 3.9)
34	!
35	! 1003 2012-09-14 14:35:53Z raasch
36	! indices nxa, nya, etc. replaced by nx, ny, etc.
37	!
38	! 683 2011-02-09 14:25:15Z raasch
39	! openMP parallelization of transpositions for 2d-domain-decomposition
40	!
41	! 622 2010-12-10 08:08:13Z raasch
42	! optional barriers included in order to speed up collective operations
43	!
44	! 164 2008-05-15 08:46:15Z raasch
45	! f_inv changed from subroutine argument to automatic array in order to do
46	! re-ordering from f_in to f_inv in one step, one array work is needed instead
47	! of work1 and work2
48	!
49	! February 2007
50	! RCS Log replace by Id keyword, revision history cleaned up
51	!
52	! Revision 1.2 2004/04/30 13:12:17 raasch
53	! Switched from mpi_alltoallv to the simpler mpi_alltoall,
54	! all former transpose-routine files collected in this file, enlarged
55	! transposition arrays introduced
56	!
57	! Revision 1.1 2004/04/30 13:08:16 raasch
58	! Initial revision (collection of former routines transpose_xy, transpose_xz,
59	! transpose_yx, transpose_yz, transpose_zx, transpose_zy)
60	!
61	! Revision 1.1 1997/07/24 11:25:18 raasch
62	! Initial revision
63	!
64	!
65	! Description:
66	! ------------
67	! Transposition of input array (f_in) from x to y. For the input array, all
68	! elements along x reside on the same PE, while after transposition, all
69	! elements along y reside on the same PE.
70	!------------------------------------------------------------------------------!
71
72	USE cpulog
73	USE indices
74	USE interfaces
75	USE pegrid
76	USE transpose_indices
77
78	IMPLICIT NONE
79
80	INTEGER :: i, j, k, l, m, ys
81
82	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
83	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
84	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
85	work(nnxnnynnz)
86
87	!
88	!-- Rearrange indices of input array in order to make data to be send
89	!-- by MPI contiguous
90	!$OMP PARALLEL PRIVATE ( i, j, k )
91	!$OMP DO
92	DO i = 0, nx
93	DO k = nzb_x, nzt_x
94	DO j = nys_x, nyn_x
95	f_inv(j,k,i) = f_in(i,j,k)
96	ENDDO
97	ENDDO
98	ENDDO
99	!$OMP END PARALLEL
100
101	IF ( numprocs /= 1 ) THEN
102
103	#if defined( __parallel )
104	!
105	!-- Transpose array
106	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
107	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
108	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
109	work(1), sendrecvcount_xy, MPI_REAL, &
110	comm1dy, ierr )
111	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
112
113	!
114	!-- Reorder transposed array
115	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
116	!$OMP DO
117	DO l = 0, pdims(2) - 1
118	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
119	( nyn_x - nys_x + 1 )
120	ys = 0 + l * ( nyn_x - nys_x + 1 )
121	DO i = nxl_y, nxr_y
122	DO k = nzb_y, nzt_y
123	DO j = ys, ys + nyn_x - nys_x
124	m = m + 1
125	f_out(j,i,k) = work(m)
126	ENDDO
127	ENDDO
128	ENDDO
129	ENDDO
130	!$OMP END PARALLEL
131	#endif
132
133	ELSE
134
135	!
136	!-- Reorder transposed array
137	!$OMP PARALLEL PRIVATE ( i, j, k )
138	!$OMP DO
139	DO k = nzb_y, nzt_y
140	DO i = nxl_y, nxr_y
141	DO j = 0, ny
142	f_out(j,i,k) = f_inv(j,k,i)
143	ENDDO
144	ENDDO
145	ENDDO
146	!$OMP END PARALLEL
147
148	ENDIF
149
150	END SUBROUTINE transpose_xy
151
152
153	SUBROUTINE transpose_xz( f_in, work, f_out )
154
155	!------------------------------------------------------------------------------!
156	! Description:
157	! ------------
158	! Transposition of input array (f_in) from x to z. For the input array, all
159	! elements along x reside on the same PE, while after transposition, all
160	! elements along z reside on the same PE.
161	!------------------------------------------------------------------------------!
162
163	USE cpulog
164	USE indices
165	USE interfaces
166	USE pegrid
167	USE transpose_indices
168
169	IMPLICIT NONE
170
171	INTEGER :: i, j, k, l, m, xs
172
173	REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
174	f_inv(nys:nyn,nxl:nxr,1:nz), &
175	f_out(1:nz,nys:nyn,nxl:nxr), &
176	work(nnxnnynnz)
177
178
179	!
180	!-- If the PE grid is one-dimensional along y, the array has only to be
181	!-- reordered locally and therefore no transposition has to be done.
182	IF ( pdims(1) /= 1 ) THEN
183
184	#if defined( __parallel )
185	!
186	!-- Reorder input array for transposition
187	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
188	!$OMP DO
189	DO l = 0, pdims(1) - 1
190	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
191	xs = 0 + l * nnx
192	DO k = nzb_x, nzt_x
193	DO i = xs, xs + nnx - 1
194	DO j = nys_x, nyn_x
195	m = m + 1
196	work(m) = f_in(i,j,k)
197	ENDDO
198	ENDDO
199	ENDDO
200	ENDDO
201	!$OMP END PARALLEL
202
203	!
204	!-- Transpose array
205	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
206	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
207	CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, &
208	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
209	comm1dx, ierr )
210	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
211
212	!
213	!-- Reorder transposed array in a way that the z index is in first position
214	!$OMP PARALLEL PRIVATE ( i, j, k )
215	!$OMP DO
216	DO k = 1, nz
217	DO i = nxl, nxr
218	DO j = nys, nyn
219	f_out(k,j,i) = f_inv(j,i,k)
220	ENDDO
221	ENDDO
222	ENDDO
223	!$OMP END PARALLEL
224	#endif
225
226	ELSE
227
228	!
229	!-- Reorder the array in a way that the z index is in first position
230	!$OMP PARALLEL PRIVATE ( i, j, k )
231	!$OMP DO
232	DO i = nxl, nxr
233	DO j = nys, nyn
234	DO k = 1, nz
235	f_inv(j,i,k) = f_in(i,j,k)
236	ENDDO
237	ENDDO
238	ENDDO
239	!$OMP END PARALLEL
240
241	!$OMP PARALLEL PRIVATE ( i, j, k )
242	!$OMP DO
243	DO k = 1, nz
244	DO i = nxl, nxr
245	DO j = nys, nyn
246	f_out(k,j,i) = f_inv(j,i,k)
247	ENDDO
248	ENDDO
249	ENDDO
250	!$OMP END PARALLEL
251
252	ENDIF
253
254	END SUBROUTINE transpose_xz
255
256
257	SUBROUTINE transpose_yx( f_in, work, f_out )
258
259	!------------------------------------------------------------------------------!
260	! Description:
261	! ------------
262	! Transposition of input array (f_in) from y to x. For the input array, all
263	! elements along y reside on the same PE, while after transposition, all
264	! elements along x reside on the same PE.
265	!------------------------------------------------------------------------------!
266
267	USE cpulog
268	USE indices
269	USE interfaces
270	USE pegrid
271	USE transpose_indices
272
273	IMPLICIT NONE
274
275	INTEGER :: i, j, k, l, m, ys
276
277	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
278	f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), &
279	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
280	work(nnxnnynnz)
281
282	IF ( numprocs /= 1 ) THEN
283
284	#if defined( __parallel )
285	!
286	!-- Reorder input array for transposition
287	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys )
288	!$OMP DO
289	DO l = 0, pdims(2) - 1
290	m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * &
291	( nyn_x - nys_x + 1 )
292	ys = 0 + l * ( nyn_x - nys_x + 1 )
293	DO i = nxl_y, nxr_y
294	DO k = nzb_y, nzt_y
295	DO j = ys, ys + nyn_x - nys_x
296	m = m + 1
297	work(m) = f_in(j,i,k)
298	ENDDO
299	ENDDO
300	ENDDO
301	ENDDO
302	!$OMP END PARALLEL
303
304	!
305	!-- Transpose array
306	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
307	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
308	CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, &
309	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
310	comm1dy, ierr )
311	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
312	#endif
313
314	ELSE
315
316	!
317	!-- Reorder array f_in the same way as ALLTOALL did it
318	!$OMP PARALLEL PRIVATE ( i, j, k )
319	!$OMP DO
320	DO i = nxl_y, nxr_y
321	DO k = nzb_y, nzt_y
322	DO j = 0, ny
323	f_inv(j,k,i) = f_in(j,i,k)
324	ENDDO
325	ENDDO
326	ENDDO
327	!$OMP END PARALLEL
328
329	ENDIF
330
331	!
332	!-- Reorder transposed array in a way that the x index is in first position
333	!$OMP PARALLEL PRIVATE ( i, j, k )
334	!$OMP DO
335	DO i = 0, nx
336	DO k = nzb_x, nzt_x
337	DO j = nys_x, nyn_x
338	f_out(i,j,k) = f_inv(j,k,i)
339	ENDDO
340	ENDDO
341	ENDDO
342	!$OMP END PARALLEL
343
344	END SUBROUTINE transpose_yx
345
346
347	SUBROUTINE transpose_yxd( f_in, work, f_out )
348
349	!------------------------------------------------------------------------------!
350	! Description:
351	! ------------
352	! Transposition of input array (f_in) from y to x. For the input array, all
353	! elements along y reside on the same PE, while after transposition, all
354	! elements along x reside on the same PE.
355	! This is a direct transposition for arrays with indices in regular order
356	! (k,j,i) (cf. transpose_yx).
357	!------------------------------------------------------------------------------!
358
359	USE cpulog
360	USE indices
361	USE interfaces
362	USE pegrid
363	USE transpose_indices
364
365	IMPLICIT NONE
366
367	INTEGER :: i, j, k, l, m, xs
368
369	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nxl:nxr,1:nz,nys:nyn), &
370	f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
371	work(nnxnnynnz)
372
373	#if defined( __parallel )
374
375	!
376	!-- Rearrange indices of input array in order to make data to be send
377	!-- by MPI contiguous
378	DO k = 1, nz
379	DO j = nys, nyn
380	DO i = nxl, nxr
381	f_inv(i,k,j) = f_in(k,j,i)
382	ENDDO
383	ENDDO
384	ENDDO
385
386	!
387	!-- Transpose array
388	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
389	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
390	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
391	work(1), sendrecvcount_xy, MPI_REAL, &
392	comm1dx, ierr )
393	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
394
395	!
396	!-- Reorder transposed array
397	m = 0
398	DO l = 0, pdims(1) - 1
399	xs = 0 + l * nnx
400	DO j = nys_x, nyn_x
401	DO k = 1, nz
402	DO i = xs, xs + nnx - 1
403	m = m + 1
404	f_out(i,j,k) = work(m)
405	ENDDO
406	ENDDO
407	ENDDO
408	ENDDO
409
410	#endif
411
412	END SUBROUTINE transpose_yxd
413
414
415	SUBROUTINE transpose_yz( f_in, work, f_out )
416
417	!------------------------------------------------------------------------------!
418	! Description:
419	! ------------
420	! Transposition of input array (f_in) from y to z. For the input array, all
421	! elements along y reside on the same PE, while after transposition, all
422	! elements along z reside on the same PE.
423	!------------------------------------------------------------------------------!
424
425	USE cpulog
426	USE indices
427	USE interfaces
428	USE pegrid
429	USE transpose_indices
430
431	IMPLICIT NONE
432
433	INTEGER :: i, j, k, l, m, zs
434
435	REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
436	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
437	f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
438	work(nnxnnynnz)
439
440	!
441	!-- Rearrange indices of input array in order to make data to be send
442	!-- by MPI contiguous
443	!$OMP PARALLEL PRIVATE ( i, j, k )
444	!$OMP DO
445	DO j = 0, ny
446	DO k = nzb_y, nzt_y
447	DO i = nxl_y, nxr_y
448	f_inv(i,k,j) = f_in(j,i,k)
449	ENDDO
450	ENDDO
451	ENDDO
452	!$OMP END PARALLEL
453
454	!
455	!-- Move data to different array, because memory location of work1 is
456	!-- needed further below (work1 = work2).
457	!-- If the PE grid is one-dimensional along y, only local reordering
458	!-- of the data is necessary and no transposition has to be done.
459	IF ( pdims(1) == 1 ) THEN
460
461	!$OMP PARALLEL PRIVATE ( i, j, k )
462	!$OMP DO
463	DO j = 0, ny
464	DO k = nzb_y, nzt_y
465	DO i = nxl_y, nxr_y
466	f_out(i,j,k) = f_inv(i,k,j)
467	ENDDO
468	ENDDO
469	ENDDO
470	!$OMP END PARALLEL
471
472	ELSE
473
474	#if defined( __parallel )
475	!
476	!-- Transpose array
477	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
478	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
479	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
480	work(1), sendrecvcount_yz, MPI_REAL, &
481	comm1dx, ierr )
482	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
483
484	!
485	!-- Reorder transposed array
486	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
487	!$OMP DO
488	DO l = 0, pdims(1) - 1
489	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
490	( nxr_z - nxl_z + 1 )
491	zs = 1 + l * ( nzt_y - nzb_y + 1 )
492	DO j = nys_z, nyn_z
493	DO k = zs, zs + nzt_y - nzb_y
494	DO i = nxl_z, nxr_z
495	m = m + 1
496	f_out(i,j,k) = work(m)
497	ENDDO
498	ENDDO
499	ENDDO
500	ENDDO
501	!$OMP END PARALLEL
502	#endif
503
504	ENDIF
505
506	END SUBROUTINE transpose_yz
507
508
509	SUBROUTINE transpose_zx( f_in, work, f_out )
510
511	!------------------------------------------------------------------------------!
512	! Description:
513	! ------------
514	! Transposition of input array (f_in) from z to x. For the input array, all
515	! elements along z reside on the same PE, while after transposition, all
516	! elements along x reside on the same PE.
517	!------------------------------------------------------------------------------!
518
519	USE cpulog
520	USE indices
521	USE interfaces
522	USE pegrid
523	USE transpose_indices
524
525	IMPLICIT NONE
526
527	INTEGER :: i, j, k, l, m, xs
528
529	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), &
530	work(nnxnnynnz)
531
532	!$acc declare create ( f_inv )
533	REAL :: f_inv(nys:nyn,nxl:nxr,1:nz)
534
535
536	!
537	!-- Rearrange indices of input array in order to make data to be send
538	!-- by MPI contiguous
539	!$OMP PARALLEL PRIVATE ( i, j, k )
540	!$OMP DO
541	!$acc kernels present( f_in )
542	!$acc loop
543	DO k = 1,nz
544	DO i = nxl, nxr
545	!$acc loop vector( 32 )
546	DO j = nys, nyn
547	f_inv(j,i,k) = f_in(k,j,i)
548	ENDDO
549	ENDDO
550	ENDDO
551	!$OMP END PARALLEL
552
553	!
554	!-- Move data to different array, because memory location of work1 is
555	!-- needed further below (work1 = work2).
556	!-- If the PE grid is one-dimensional along y, only local reordering
557	!-- of the data is necessary and no transposition has to be done.
558	IF ( pdims(1) == 1 ) THEN
559
560	!$OMP PARALLEL PRIVATE ( i, j, k )
561	!$OMP DO
562	!$acc kernels present( f_out )
563	!$acc loop
564	DO k = 1, nz
565	DO i = nxl, nxr
566	!$acc loop vector( 32 )
567	DO j = nys, nyn
568	f_out(i,j,k) = f_inv(j,i,k)
569	ENDDO
570	ENDDO
571	ENDDO
572	!$OMP END PARALLEL
573
574	ELSE
575
576	#if defined( __parallel )
577	!
578	!-- Transpose array
579	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
580	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
581	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
582	work(1), sendrecvcount_zx, MPI_REAL, &
583	comm1dx, ierr )
584	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
585
586	!
587	!-- Reorder transposed array
588	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )
589	!$OMP DO
590	DO l = 0, pdims(1) - 1
591	m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )
592	xs = 0 + l * nnx
593	DO k = nzb_x, nzt_x
594	DO i = xs, xs + nnx - 1
595	DO j = nys_x, nyn_x
596	m = m + 1
597	f_out(i,j,k) = work(m)
598	ENDDO
599	ENDDO
600	ENDDO
601	ENDDO
602	!$OMP END PARALLEL
603	#endif
604
605	ENDIF
606
607	END SUBROUTINE transpose_zx
608
609
610	SUBROUTINE transpose_zy( f_in, work, f_out )
611
612	!------------------------------------------------------------------------------!
613	! Description:
614	! ------------
615	! Transposition of input array (f_in) from z to y. For the input array, all
616	! elements along z reside on the same PE, while after transposition, all
617	! elements along y reside on the same PE.
618	!------------------------------------------------------------------------------!
619
620	USE cpulog
621	USE indices
622	USE interfaces
623	USE pegrid
624	USE transpose_indices
625
626	IMPLICIT NONE
627
628	INTEGER :: i, j, k, l, m, zs
629
630	REAL :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz), &
631	f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), &
632	f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), &
633	work(nnxnnynnz)
634
635	!
636	!-- If the PE grid is one-dimensional along y, the array has only to be
637	!-- reordered locally and therefore no transposition has to be done.
638	IF ( pdims(1) /= 1 ) THEN
639
640	#if defined( __parallel )
641	!
642	!-- Reorder input array for transposition
643	!$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs )
644	!$OMP DO
645	DO l = 0, pdims(1) - 1
646	m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * &
647	( nxr_z - nxl_z + 1 )
648	zs = 1 + l * ( nzt_y - nzb_y + 1 )
649	DO j = nys_z, nyn_z
650	DO k = zs, zs + nzt_y - nzb_y
651	DO i = nxl_z, nxr_z
652	m = m + 1
653	work(m) = f_in(i,j,k)
654	ENDDO
655	ENDDO
656	ENDDO
657	ENDDO
658	!$OMP END PARALLEL
659
660	!
661	!-- Transpose array
662	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
663	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
664	CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, &
665	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
666	comm1dx, ierr )
667	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
668	#endif
669
670	ELSE
671	!
672	!-- Reorder the array in the same way like ALLTOALL did it
673	!$OMP PARALLEL PRIVATE ( i, j, k )
674	!$OMP DO
675	DO k = nzb_y, nzt_y
676	DO j = 0, ny
677	DO i = nxl_y, nxr_y
678	f_inv(i,k,j) = f_in(i,j,k)
679	ENDDO
680	ENDDO
681	ENDDO
682	!$OMP END PARALLEL
683
684	ENDIF
685
686	!
687	!-- Reorder transposed array in a way that the y index is in first position
688	!$OMP PARALLEL PRIVATE ( i, j, k )
689	!$OMP DO
690	DO k = nzb_y, nzt_y
691	DO i = nxl_y, nxr_y
692	DO j = 0, ny
693	f_out(j,i,k) = f_inv(i,k,j)
694	ENDDO
695	ENDDO
696	ENDDO
697	!$OMP END PARALLEL
698
699	END SUBROUTINE transpose_zy
700
701
702	SUBROUTINE transpose_zyd( f_in, work, f_out )
703
704	!------------------------------------------------------------------------------!
705	! Description:
706	! ------------
707	! Transposition of input array (f_in) from z to y. For the input array, all
708	! elements along z reside on the same PE, while after transposition, all
709	! elements along y reside on the same PE.
710	! This is a direct transposition for arrays with indices in regular order
711	! (k,j,i) (cf. transpose_zy).
712	!------------------------------------------------------------------------------!
713
714	USE cpulog
715	USE indices
716	USE interfaces
717	USE pegrid
718	USE transpose_indices
719
720	IMPLICIT NONE
721
722	INTEGER :: i, j, k, l, m, ys
723
724	REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), &
725	f_out(0:ny,nxl_yd:nxr_yd,nzb_yd:nzt_yd), &
726	work(nnxnnynnz)
727
728	#if defined( __parallel )
729
730	!
731	!-- Rearrange indices of input array in order to make data to be send
732	!-- by MPI contiguous
733	DO i = nxl, nxr
734	DO j = nys, nyn
735	DO k = 1, nz
736	f_inv(j,i,k) = f_in(k,j,i)
737	ENDDO
738	ENDDO
739	ENDDO
740
741	!
742	!-- Move data to different array, because memory location of work1 is
743	!-- needed further below (work1 = work2).
744	!-- If the PE grid is one-dimensional along x, only local reordering
745	!-- of the data is necessary and no transposition has to be done.
746	IF ( pdims(2) == 1 ) THEN
747	DO k = 1, nz
748	DO i = nxl, nxr
749	DO j = nys, nyn
750	f_out(j,i,k) = f_inv(j,i,k)
751	ENDDO
752	ENDDO
753	ENDDO
754	RETURN
755	ENDIF
756
757	!
758	!-- Transpose array
759	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
760	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
761	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
762	work(1), sendrecvcount_zyd, MPI_REAL, &
763	comm1dy, ierr )
764	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
765
766	!
767	!-- Reorder transposed array
768	m = 0
769	DO l = 0, pdims(2) - 1
770	ys = 0 + l * nny
771	DO k = nzb_yd, nzt_yd
772	DO i = nxl_yd, nxr_yd
773	DO j = ys, ys + nny - 1
774	m = m + 1
775	f_out(j,i,k) = work(m)
776	ENDDO
777	ENDDO
778	ENDDO
779	ENDDO
780
781	#endif
782
783	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 1106

Download in other formats: