Home

Context Navigation

transpose.f90 @ 622

Last change on this file since 622 was 622, checked in by raasch, 13 years ago

New:
---

Optional barriers included in order to speed up collective operations
MPI_ALLTOALL and MPI_ALLREDUCE. This feature is controlled with new initial
parameter collective_wait. Default is .FALSE, but .TRUE. on SGI-type
systems. (advec_particles, advec_s_bc, buoyancy, check_for_restart,
cpu_statistics, data_output_2d, data_output_ptseries, flow_statistics,
global_min_max, inflow_turbulence, init_3d_model, init_particles, init_pegrid,
init_slope, parin, pres, poismg, set_particle_attributes, timestep,
read_var_list, user_statistics, write_compressed, write_var_list)

Adjustments for Kyushu Univ. (lcrte, ibmku). Concerning hybrid
(MPI/openMP) runs, the number of openMP threads per MPI tasks can now
be given as an argument to mrun-option -O. (mbuild, mrun, subjob)

Changed:

Initialization of the module command changed for SGI-ICE/lcsgi (mbuild, subjob)

Errors:

Property svn:keywords set to Id

File size: 18.3 KB

Line
1	SUBROUTINE transpose_xy( f_in, work, f_out )
2
3	!------------------------------------------------------------------------------!
4	! Current revisions:
5	! -----------------
6	! optional barriers included in order to speed up collective operations
7	!
8	! Former revisions:
9	! -----------------
10	! $Id: transpose.f90 622 2010-12-10 08:08:13Z raasch $
11	!
12	! 164 2008-05-15 08:46:15Z raasch
13	! f_inv changed from subroutine argument to automatic array in order to do
14	! re-ordering from f_in to f_inv in one step, one array work is needed instead
15	! of work1 and work2
16	!
17	! February 2007
18	! RCS Log replace by Id keyword, revision history cleaned up
19	!
20	! Revision 1.2 2004/04/30 13:12:17 raasch
21	! Switched from mpi_alltoallv to the simpler mpi_alltoall,
22	! all former transpose-routine files collected in this file, enlarged
23	! transposition arrays introduced
24	!
25	! Revision 1.1 2004/04/30 13:08:16 raasch
26	! Initial revision (collection of former routines transpose_xy, transpose_xz,
27	! transpose_yx, transpose_yz, transpose_zx, transpose_zy)
28	!
29	! Revision 1.1 1997/07/24 11:25:18 raasch
30	! Initial revision
31	!
32	!
33	! Description:
34	! ------------
35	! Transposition of input array (f_in) from x to y. For the input array, all
36	! elements along x reside on the same PE, while after transposition, all
37	! elements along y reside on the same PE.
38	!------------------------------------------------------------------------------!
39
40	USE cpulog
41	USE indices
42	USE interfaces
43	USE pegrid
44	USE transpose_indices
45
46	IMPLICIT NONE
47
48	INTEGER :: i, j, k, l, m, ys
49
50	REAL :: f_in(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
51	f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), &
52	f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
53	work(nnxnnynnz)
54
55	#if defined( __parallel )
56
57	!
58	!-- Rearrange indices of input array in order to make data to be send
59	!-- by MPI contiguous
60	DO i = 0, nxa
61	DO k = nzb_x, nzt_xa
62	DO j = nys_x, nyn_xa
63	f_inv(j,k,i) = f_in(i,j,k)
64	ENDDO
65	ENDDO
66	ENDDO
67
68	!
69	!-- Transpose array
70	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
71	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
72	CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
73	work(1), sendrecvcount_xy, MPI_REAL, &
74	comm1dy, ierr )
75	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
76
77	!
78	!-- Reorder transposed array
79	m = 0
80	DO l = 0, pdims(2) - 1
81	ys = 0 + l * ( nyn_xa - nys_x + 1 )
82	DO i = nxl_y, nxr_ya
83	DO k = nzb_y, nzt_ya
84	DO j = ys, ys + nyn_xa - nys_x
85	m = m + 1
86	f_out(j,i,k) = work(m)
87	ENDDO
88	ENDDO
89	ENDDO
90	ENDDO
91
92	#endif
93
94	END SUBROUTINE transpose_xy
95
96
97	SUBROUTINE transpose_xz( f_in, work, f_out )
98
99	!------------------------------------------------------------------------------!
100	! Description:
101	! ------------
102	! Transposition of input array (f_in) from x to z. For the input array, all
103	! elements along x reside on the same PE, while after transposition, all
104	! elements along z reside on the same PE.
105	!------------------------------------------------------------------------------!
106
107	USE cpulog
108	USE indices
109	USE interfaces
110	USE pegrid
111	USE transpose_indices
112
113	IMPLICIT NONE
114
115	INTEGER :: i, j, k, l, m, xs
116
117	REAL :: f_in(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
118	f_inv(nys:nyna,nxl:nxra,1:nza), &
119	f_out(1:nza,nys:nyna,nxl:nxra), &
120	work(nnxnnynnz)
121
122	#if defined( __parallel )
123
124	!
125	!-- If the PE grid is one-dimensional along y, the array has only to be
126	!-- reordered locally and therefore no transposition has to be done.
127	IF ( pdims(1) /= 1 ) THEN
128	!
129	!-- Reorder input array for transposition
130	m = 0
131	DO l = 0, pdims(1) - 1
132	xs = 0 + l * nnx
133	DO k = nzb_x, nzt_xa
134	DO i = xs, xs + nnx - 1
135	DO j = nys_x, nyn_xa
136	m = m + 1
137	work(m) = f_in(i,j,k)
138	ENDDO
139	ENDDO
140	ENDDO
141	ENDDO
142
143	!
144	!-- Transpose array
145	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
146	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
147	CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, &
148	f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
149	comm1dx, ierr )
150	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
151
152	!
153	!-- Reorder transposed array in a way that the z index is in first position
154	DO k = 1, nza
155	DO i = nxl, nxra
156	DO j = nys, nyna
157	f_out(k,j,i) = f_inv(j,i,k)
158	ENDDO
159	ENDDO
160	ENDDO
161	ELSE
162	!
163	!-- Reorder the array in a way that the z index is in first position
164	DO i = nxl, nxra
165	DO j = nys, nyna
166	DO k = 1, nza
167	f_inv(j,i,k) = f_in(i,j,k)
168	ENDDO
169	ENDDO
170	ENDDO
171
172	DO k = 1, nza
173	DO i = nxl, nxra
174	DO j = nys, nyna
175	f_out(k,j,i) = f_inv(j,i,k)
176	ENDDO
177	ENDDO
178	ENDDO
179
180	ENDIF
181
182
183	#endif
184
185	END SUBROUTINE transpose_xz
186
187
188	SUBROUTINE transpose_yx( f_in, work, f_out )
189
190	!------------------------------------------------------------------------------!
191	! Description:
192	! ------------
193	! Transposition of input array (f_in) from y to x. For the input array, all
194	! elements along y reside on the same PE, while after transposition, all
195	! elements along x reside on the same PE.
196	!------------------------------------------------------------------------------!
197
198	USE cpulog
199	USE indices
200	USE interfaces
201	USE pegrid
202	USE transpose_indices
203
204	IMPLICIT NONE
205
206	INTEGER :: i, j, k, l, m, ys
207
208	REAL :: f_in(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
209	f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), &
210	f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
211	work(nnxnnynnz)
212
213	#if defined( __parallel )
214
215	!
216	!-- Reorder input array for transposition
217	m = 0
218	DO l = 0, pdims(2) - 1
219	ys = 0 + l * ( nyn_xa - nys_x + 1 )
220	DO i = nxl_y, nxr_ya
221	DO k = nzb_y, nzt_ya
222	DO j = ys, ys + nyn_xa - nys_x
223	m = m + 1
224	work(m) = f_in(j,i,k)
225	ENDDO
226	ENDDO
227	ENDDO
228	ENDDO
229
230	!
231	!-- Transpose array
232	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
233	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
234	CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, &
235	f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
236	comm1dy, ierr )
237	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
238
239	!
240	!-- Reorder transposed array in a way that the x index is in first position
241	DO i = 0, nxa
242	DO k = nzb_x, nzt_xa
243	DO j = nys_x, nyn_xa
244	f_out(i,j,k) = f_inv(j,k,i)
245	ENDDO
246	ENDDO
247	ENDDO
248
249	#endif
250
251	END SUBROUTINE transpose_yx
252
253
254	SUBROUTINE transpose_yxd( f_in, work, f_out )
255
256	!------------------------------------------------------------------------------!
257	! Description:
258	! ------------
259	! Transposition of input array (f_in) from y to x. For the input array, all
260	! elements along y reside on the same PE, while after transposition, all
261	! elements along x reside on the same PE.
262	! This is a direct transposition for arrays with indices in regular order
263	! (k,j,i) (cf. transpose_yx).
264	!------------------------------------------------------------------------------!
265
266	USE cpulog
267	USE indices
268	USE interfaces
269	USE pegrid
270	USE transpose_indices
271
272	IMPLICIT NONE
273
274	INTEGER :: i, j, k, l, m, recvcount_yx, sendcount_yx, xs
275
276	REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nxl:nxra,1:nza,nys:nyna), &
277	f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
278	work(nnxnnynnz)
279
280	#if defined( __parallel )
281
282	!
283	!-- Rearrange indices of input array in order to make data to be send
284	!-- by MPI contiguous
285	DO k = 1, nza
286	DO j = nys, nyna
287	DO i = nxl, nxra
288	f_inv(i,k,j) = f_in(k,j,i)
289	ENDDO
290	ENDDO
291	ENDDO
292
293	!
294	!-- Transpose array
295	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
296	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
297	CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
298	work(1), sendrecvcount_xy, MPI_REAL, &
299	comm1dx, ierr )
300	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
301
302	!
303	!-- Reorder transposed array
304	m = 0
305	DO l = 0, pdims(1) - 1
306	xs = 0 + l * nnx
307	DO j = nys_x, nyn_xa
308	DO k = 1, nza
309	DO i = xs, xs + nnx - 1
310	m = m + 1
311	f_out(i,j,k) = work(m)
312	ENDDO
313	ENDDO
314	ENDDO
315	ENDDO
316
317	#endif
318
319	END SUBROUTINE transpose_yxd
320
321
322	SUBROUTINE transpose_yz( f_in, work, f_out )
323
324	!------------------------------------------------------------------------------!
325	! Description:
326	! ------------
327	! Transposition of input array (f_in) from y to z. For the input array, all
328	! elements along y reside on the same PE, while after transposition, all
329	! elements along z reside on the same PE.
330	!------------------------------------------------------------------------------!
331
332	USE cpulog
333	USE indices
334	USE interfaces
335	USE pegrid
336	USE transpose_indices
337
338	IMPLICIT NONE
339
340	INTEGER :: i, j, k, l, m, zs
341
342	REAL :: f_in(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
343	f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), &
344	f_out(nxl_z:nxr_za,nys_z:nyn_za,1:nza), &
345	work(nnxnnynnz)
346
347	#if defined( __parallel )
348
349	!
350	!-- Rearrange indices of input array in order to make data to be send
351	!-- by MPI contiguous
352	DO j = 0, nya
353	DO k = nzb_y, nzt_ya
354	DO i = nxl_y, nxr_ya
355	f_inv(i,k,j) = f_in(j,i,k)
356	ENDDO
357	ENDDO
358	ENDDO
359
360	!
361	!-- Move data to different array, because memory location of work1 is
362	!-- needed further below (work1 = work2).
363	!-- If the PE grid is one-dimensional along y, only local reordering
364	!-- of the data is necessary and no transposition has to be done.
365	IF ( pdims(1) == 1 ) THEN
366	DO j = 0, nya
367	DO k = nzb_y, nzt_ya
368	DO i = nxl_y, nxr_ya
369	f_out(i,j,k) = f_inv(i,k,j)
370	ENDDO
371	ENDDO
372	ENDDO
373	RETURN
374	ENDIF
375
376	!
377	!-- Transpose array
378	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
379	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
380	CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
381	work(1), sendrecvcount_yz, MPI_REAL, &
382	comm1dx, ierr )
383	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
384
385	!
386	!-- Reorder transposed array
387	m = 0
388	DO l = 0, pdims(1) - 1
389	zs = 1 + l * ( nzt_ya - nzb_y + 1 )
390	DO j = nys_z, nyn_za
391	DO k = zs, zs + nzt_ya - nzb_y
392	DO i = nxl_z, nxr_za
393	m = m + 1
394	f_out(i,j,k) = work(m)
395	ENDDO
396	ENDDO
397	ENDDO
398	ENDDO
399
400	#endif
401
402	END SUBROUTINE transpose_yz
403
404
405	SUBROUTINE transpose_zx( f_in, work, f_out )
406
407	!------------------------------------------------------------------------------!
408	! Description:
409	! ------------
410	! Transposition of input array (f_in) from z to x. For the input array, all
411	! elements along z reside on the same PE, while after transposition, all
412	! elements along x reside on the same PE.
413	!------------------------------------------------------------------------------!
414
415	USE cpulog
416	USE indices
417	USE interfaces
418	USE pegrid
419	USE transpose_indices
420
421	IMPLICIT NONE
422
423	INTEGER :: i, j, k, l, m, xs
424
425	REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), &
426	f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), &
427	work(nnxnnynnz)
428
429	#if defined( __parallel )
430
431	!
432	!-- Rearrange indices of input array in order to make data to be send
433	!-- by MPI contiguous
434	DO k = 1,nza
435	DO i = nxl, nxra
436	DO j = nys, nyna
437	f_inv(j,i,k) = f_in(k,j,i)
438	ENDDO
439	ENDDO
440	ENDDO
441
442	!
443	!-- Move data to different array, because memory location of work1 is
444	!-- needed further below (work1 = work2).
445	!-- If the PE grid is one-dimensional along y, only local reordering
446	!-- of the data is necessary and no transposition has to be done.
447	IF ( pdims(1) == 1 ) THEN
448	DO k = 1, nza
449	DO i = nxl, nxra
450	DO j = nys, nyna
451	f_out(i,j,k) = f_inv(j,i,k)
452	ENDDO
453	ENDDO
454	ENDDO
455	RETURN
456	ENDIF
457
458	!
459	!-- Transpose array
460	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
461	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
462	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
463	work(1), sendrecvcount_zx, MPI_REAL, &
464	comm1dx, ierr )
465	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
466
467	!
468	!-- Reorder transposed array
469	m = 0
470	DO l = 0, pdims(1) - 1
471	xs = 0 + l * nnx
472	DO k = nzb_x, nzt_xa
473	DO i = xs, xs + nnx - 1
474	DO j = nys_x, nyn_xa
475	m = m + 1
476	f_out(i,j,k) = work(m)
477	ENDDO
478	ENDDO
479	ENDDO
480	ENDDO
481
482	#endif
483
484	END SUBROUTINE transpose_zx
485
486
487	SUBROUTINE transpose_zy( f_in, work, f_out )
488
489	!------------------------------------------------------------------------------!
490	! Description:
491	! ------------
492	! Transposition of input array (f_in) from z to y. For the input array, all
493	! elements along z reside on the same PE, while after transposition, all
494	! elements along y reside on the same PE.
495	!------------------------------------------------------------------------------!
496
497	USE cpulog
498	USE indices
499	USE interfaces
500	USE pegrid
501	USE transpose_indices
502
503	IMPLICIT NONE
504
505	INTEGER :: i, j, k, l, m, zs
506
507	REAL :: f_in(nxl_z:nxr_za,nys_z:nyn_za,1:nza), &
508	f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), &
509	f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), &
510	work(nnxnnynnz)
511
512	#if defined( __parallel )
513
514	!
515	!-- If the PE grid is one-dimensional along y, the array has only to be
516	!-- reordered locally and therefore no transposition has to be done.
517	IF ( pdims(1) /= 1 ) THEN
518	!
519	!-- Reorder input array for transposition
520	m = 0
521	DO l = 0, pdims(1) - 1
522	zs = 1 + l * ( nzt_ya - nzb_y + 1 )
523	DO j = nys_z, nyn_za
524	DO k = zs, zs + nzt_ya - nzb_y
525	DO i = nxl_z, nxr_za
526	m = m + 1
527	work(m) = f_in(i,j,k)
528	ENDDO
529	ENDDO
530	ENDDO
531	ENDDO
532
533	!
534	!-- Transpose array
535	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
536	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
537	CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, &
538	f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
539	comm1dx, ierr )
540	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
541
542	!
543	!-- Reorder transposed array in a way that the y index is in first position
544	DO j = 0, nya
545	DO k = nzb_y, nzt_ya
546	DO i = nxl_y, nxr_ya
547	f_out(j,i,k) = f_inv(i,k,j)
548	ENDDO
549	ENDDO
550	ENDDO
551	ELSE
552	!
553	!-- Reorder the array in a way that the y index is in first position
554	DO k = nzb_y, nzt_ya
555	DO j = 0, nya
556	DO i = nxl_y, nxr_ya
557	f_inv(i,k,j) = f_in(i,j,k)
558	ENDDO
559	ENDDO
560	ENDDO
561	!
562	!-- Move data to output array
563	DO k = nzb_y, nzt_ya
564	DO i = nxl_y, nxr_ya
565	DO j = 0, nya
566	f_out(j,i,k) = f_inv(i,k,j)
567	ENDDO
568	ENDDO
569	ENDDO
570
571	ENDIF
572
573	#endif
574
575	END SUBROUTINE transpose_zy
576
577
578	SUBROUTINE transpose_zyd( f_in, work, f_out )
579
580	!------------------------------------------------------------------------------!
581	! Description:
582	! ------------
583	! Transposition of input array (f_in) from z to y. For the input array, all
584	! elements along z reside on the same PE, while after transposition, all
585	! elements along y reside on the same PE.
586	! This is a direct transposition for arrays with indices in regular order
587	! (k,j,i) (cf. transpose_zy).
588	!------------------------------------------------------------------------------!
589
590	USE cpulog
591	USE indices
592	USE interfaces
593	USE pegrid
594	USE transpose_indices
595
596	IMPLICIT NONE
597
598	INTEGER :: i, j, k, l, m, ys
599
600	REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), &
601	f_out(0:nya,nxl_yd:nxr_yda,nzb_yd:nzt_yda), &
602	work(nnxnnynnz)
603
604	#if defined( __parallel )
605
606	!
607	!-- Rearrange indices of input array in order to make data to be send
608	!-- by MPI contiguous
609	DO i = nxl, nxra
610	DO j = nys, nyna
611	DO k = 1, nza
612	f_inv(j,i,k) = f_in(k,j,i)
613	ENDDO
614	ENDDO
615	ENDDO
616
617	!
618	!-- Move data to different array, because memory location of work1 is
619	!-- needed further below (work1 = work2).
620	!-- If the PE grid is one-dimensional along x, only local reordering
621	!-- of the data is necessary and no transposition has to be done.
622	IF ( pdims(2) == 1 ) THEN
623	DO k = 1, nza
624	DO i = nxl, nxra
625	DO j = nys, nyna
626	f_out(j,i,k) = f_inv(j,i,k)
627	ENDDO
628	ENDDO
629	ENDDO
630	RETURN
631	ENDIF
632
633	!
634	!-- Transpose array
635	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
636	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
637	CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
638	work(1), sendrecvcount_zyd, MPI_REAL, &
639	comm1dy, ierr )
640	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
641
642	!
643	!-- Reorder transposed array
644	m = 0
645	DO l = 0, pdims(2) - 1
646	ys = 0 + l * nny
647	DO k = nzb_yd, nzt_yda
648	DO i = nxl_yd, nxr_yda
649	DO j = ys, ys + nny - 1
650	m = m + 1
651	f_out(j,i,k) = work(m)
652	ENDDO
653	ENDDO
654	ENDDO
655	ENDDO
656
657	#endif
658
659	END SUBROUTINE transpose_zyd

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: palm/trunk/SOURCE/transpose.f90 @ 622

Download in other formats: