Home

Context Navigation

poisfft.f90 @ 622

Last change on this file since 622 was 622, checked in by raasch, 14 years ago

New:
---

Optional barriers included in order to speed up collective operations
MPI_ALLTOALL and MPI_ALLREDUCE. This feature is controlled with new initial
parameter collective_wait. Default is .FALSE, but .TRUE. on SGI-type
systems. (advec_particles, advec_s_bc, buoyancy, check_for_restart,
cpu_statistics, data_output_2d, data_output_ptseries, flow_statistics,
global_min_max, inflow_turbulence, init_3d_model, init_particles, init_pegrid,
init_slope, parin, pres, poismg, set_particle_attributes, timestep,
read_var_list, user_statistics, write_compressed, write_var_list)

Adjustments for Kyushu Univ. (lcrte, ibmku). Concerning hybrid
(MPI/openMP) runs, the number of openMP threads per MPI tasks can now
be given as an argument to mrun-option -O. (mbuild, mrun, subjob)

Changed:

Initialization of the module command changed for SGI-ICE/lcsgi (mbuild, subjob)

Errors:

Property svn:keywords set to Id

File size: 44.9 KB

Line
1	MODULE poisfft_mod
2
3	!------------------------------------------------------------------------------!
4	! Current revisions:
5	! -----------------
6	! optional barriers included in order to speed up collective operations
7	!
8	! Former revisions:
9	! -----------------
10	! $Id: poisfft.f90 622 2010-12-10 08:08:13Z raasch $
11	!
12	! 377 2009-09-04 11:09:00Z raasch
13	! __lcmuk changed to __lc to avoid problems with Intel compiler on sgi-ice
14	!
15	! 164 2008-05-15 08:46:15Z raasch
16	! Arguments removed from transpose routines
17	!
18	! 128 2007-10-26 13:11:14Z raasch
19	! Bugfix: wavenumber calculation for even nx in routines maketri
20	!
21	! 85 2007-05-11 09:35:14Z raasch
22	! Bugfix: work_fft*_vec removed from some PRIVATE-declarations
23	!
24	! 76 2007-03-29 00:58:32Z raasch
25	! Tridiagonal coefficients adjusted for Neumann boundary conditions both at
26	! the bottom and the top.
27	!
28	! RCS Log replace by Id keyword, revision history cleaned up
29	!
30	! Revision 1.24 2006/08/04 15:00:24 raasch
31	! Default setting of the thread number tn in case of not using OpenMP
32	!
33	! Revision 1.23 2006/02/23 12:48:38 raasch
34	! Additional compiler directive in routine tridia_1dd for preventing loop
35	! exchange on NEC-SX6
36	!
37	! Revision 1.20 2004/04/30 12:38:09 raasch
38	! Parts of former poisfft_hybrid moved to this subroutine,
39	! former subroutine changed to a module, renaming of FFT-subroutines and
40	! -module, FFTs completely substituted by calls of fft_x and fft_y,
41	! NAG fft used in the non-parallel case completely removed, l in maketri
42	! is now a 1d-array, variables passed by modules instead of using parameter
43	! lists, enlarged transposition arrays introduced
44	!
45	! Revision 1.1 1997/07/24 11:24:14 raasch
46	! Initial revision
47	!
48	!
49	! Description:
50	! ------------
51	! See below.
52	!------------------------------------------------------------------------------!
53
54	!--------------------------------------------------------------------------!
55	! poisfft !
56	! !
57	! Original version: Stephan Siano (pois3d) !
58	! !
59	! Institute of Meteorology and Climatology, University of Hannover !
60	! Germany !
61	! !
62	! Version as of July 23,1996 !
63	! !
64	! !
65	! Version for parallel computers: Siegfried Raasch !
66	! !
67	! Version as of July 03,1997 !
68	! !
69	! Solves the Poisson equation with a 2D spectral method !
70	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
71	! !
72	! Input: !
73	! real ar contains in the (nnx,nny,nnz) elements, !
74	! starting from the element (1,nys,nxl), the !
75	! values for s !
76	! real work Temporary array !
77	! !
78	! Output: !
79	! real ar contains the solution for p !
80	!--------------------------------------------------------------------------!
81
82	USE fft_xy
83	USE indices
84	USE transpose_indices
85
86	IMPLICIT NONE
87
88	PRIVATE
89	PUBLIC poisfft, poisfft_init
90
91	INTERFACE poisfft
92	MODULE PROCEDURE poisfft
93	END INTERFACE poisfft
94
95	INTERFACE poisfft_init
96	MODULE PROCEDURE poisfft_init
97	END INTERFACE poisfft_init
98
99	CONTAINS
100
101	SUBROUTINE poisfft_init
102
103	CALL fft_init
104
105	END SUBROUTINE poisfft_init
106
107
108	SUBROUTINE poisfft( ar, work )
109
110	USE cpulog
111	USE interfaces
112	USE pegrid
113
114	IMPLICIT NONE
115
116	REAL, DIMENSION(1:nza,nys:nyna,nxl:nxra) :: ar, work
117
118
119	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
120
121	!
122	!-- Two-dimensional Fourier Transformation in x- and y-direction.
123	#if defined( __parallel )
124	IF ( pdims(2) == 1 ) THEN
125
126	!
127	!-- 1d-domain-decomposition along x:
128	!-- FFT along y and transposition y --> x
129	CALL ffty_tr_yx( ar, work, ar )
130
131	!
132	!-- FFT along x, solving the tridiagonal system and backward FFT
133	CALL fftx_tri_fftx( ar )
134
135	!
136	!-- Transposition x --> y and backward FFT along y
137	CALL tr_xy_ffty( ar, work, ar )
138
139	ELSEIF ( pdims(1) == 1 ) THEN
140
141	!
142	!-- 1d-domain-decomposition along y:
143	!-- FFT along x and transposition x --> y
144	CALL fftx_tr_xy( ar, work, ar )
145
146	!
147	!-- FFT along y, solving the tridiagonal system and backward FFT
148	CALL ffty_tri_ffty( ar )
149
150	!
151	!-- Transposition y --> x and backward FFT along x
152	CALL tr_yx_fftx( ar, work, ar )
153
154	ELSE
155
156	!
157	!-- 2d-domain-decomposition
158	!-- Transposition z --> x
159	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
160	CALL transpose_zx( ar, work, ar )
161	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
162
163	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
164	CALL fftxp( ar, 'forward' )
165	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
166
167	!
168	!-- Transposition x --> y
169	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
170	CALL transpose_xy( ar, work, ar )
171	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
172
173	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
174	CALL fftyp( ar, 'forward' )
175	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
176
177	!
178	!-- Transposition y --> z
179	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
180	CALL transpose_yz( ar, work, ar )
181	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
182
183	!
184	!-- Solve the Poisson equation in z-direction in cartesian space.
185	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
186	CALL tridia( ar )
187	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
188
189	!
190	!-- Inverse Fourier Transformation
191	!-- Transposition z --> y
192	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
193	CALL transpose_zy( ar, work, ar )
194	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
195
196	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
197	CALL fftyp( ar, 'backward' )
198	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
199
200	!
201	!-- Transposition y --> x
202	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
203	CALL transpose_yx( ar, work, ar )
204	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
205
206	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
207	CALL fftxp( ar, 'backward' )
208	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
209
210	!
211	!-- Transposition x --> z
212	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
213	CALL transpose_xz( ar, work, ar )
214	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
215
216	ENDIF
217
218	#else
219
220	!
221	!-- Two-dimensional Fourier Transformation along x- and y-direction.
222	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
223	CALL fftx( ar, 'forward' )
224	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
225	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
226	CALL ffty( ar, 'forward' )
227	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
228
229	!
230	!-- Solve the Poisson equation in z-direction in cartesian space.
231	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
232	CALL tridia( ar )
233	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
234
235	!
236	!-- Inverse Fourier Transformation.
237	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
238	CALL ffty( ar, 'backward' )
239	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
240	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
241	CALL fftx( ar, 'backward' )
242	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
243
244	#endif
245
246	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
247
248	END SUBROUTINE poisfft
249
250
251
252	SUBROUTINE tridia( ar )
253
254	!------------------------------------------------------------------------------!
255	! solves the linear system of equations:
256	!
257	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
258	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
259	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
260	!
261	! by using the Thomas algorithm
262	!------------------------------------------------------------------------------!
263
264	USE arrays_3d
265
266	IMPLICIT NONE
267
268	INTEGER :: i, j, k, nnyh
269
270	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
271	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
272
273	#if defined( __parallel )
274	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
275	#else
276	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
277	#endif
278
279
280	nnyh = (ny+1) / 2
281
282	!
283	!-- Define constant elements of the tridiagonal matrix.
284	DO k = 0, nz-1
285	DO i = nxl_z, nxr_z
286	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
287	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
288	ENDDO
289	ENDDO
290
291	#if defined( __parallel )
292	!
293	!-- Repeat for all y-levels.
294	DO j = nys_z, nyn_z
295	IF ( j <= nnyh ) THEN
296	CALL maketri( tri, j )
297	ELSE
298	CALL maketri( tri, ny+1-j )
299	ENDIF
300	CALL split( tri )
301	CALL substi( ar, ar1, tri, j )
302	ENDDO
303	#else
304	!
305	!-- First y-level.
306	CALL maketri( tri, nys_z )
307	CALL split( tri )
308	CALL substi( ar, ar1, tri, 0 )
309
310	!
311	!-- Further y-levels.
312	DO j = 1, nnyh - 1
313	CALL maketri( tri, j )
314	CALL split( tri )
315	CALL substi( ar, ar1, tri, j )
316	CALL substi( ar, ar1, tri, ny+1-j )
317	ENDDO
318	CALL maketri( tri, nnyh )
319	CALL split( tri )
320	CALL substi( ar, ar1, tri, nnyh+nys )
321	#endif
322
323	CONTAINS
324
325	SUBROUTINE maketri( tri, j )
326
327	!------------------------------------------------------------------------------!
328	! Computes the i- and j-dependent component of the matrix
329	!------------------------------------------------------------------------------!
330
331	USE arrays_3d
332	USE constants
333	USE control_parameters
334	USE grid_variables
335
336	IMPLICIT NONE
337
338	INTEGER :: i, j, k, nnxh
339	REAL :: a, c
340	REAL :: ll(nxl_z:nxr_z)
341	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
342
343
344	nnxh = ( nx + 1 ) / 2
345
346	!
347	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
348	!-- Fourier space. The coefficients are computed following the method of
349	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
350	!-- Siano's original version by discretizing the Poisson equation,
351	!-- before it is Fourier-transformed
352	#if defined( __parallel )
353	DO i = nxl_z, nxr_z
354	IF ( i >= 0 .AND. i <= nnxh ) THEN
355	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
356	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
357	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
358	FLOAT( ny+1 ) ) ) / ( dy * dy )
359	ELSE
360	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
361	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
362	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
363	FLOAT( ny+1 ) ) ) / ( dy * dy )
364	ENDIF
365	DO k = 0,nz-1
366	a = -1.0 * ddzu(k+2) * ddzw(k+1)
367	c = -1.0 * ddzu(k+1) * ddzw(k+1)
368	tri(1,i,k) = a + c - ll(i)
369	ENDDO
370	ENDDO
371	#else
372	DO i = 0, nnxh
373	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / FLOAT( nx+1 ) ) ) / &
374	( dx * dx ) + &
375	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / FLOAT( ny+1 ) ) ) / &
376	( dy * dy )
377	DO k = 0, nz-1
378	a = -1.0 * ddzu(k+2) * ddzw(k+1)
379	c = -1.0 * ddzu(k+1) * ddzw(k+1)
380	tri(1,i,k) = a + c - ll(i)
381	IF ( i >= 1 .and. i < nnxh ) THEN
382	tri(1,nx+1-i,k) = tri(1,i,k)
383	ENDIF
384	ENDDO
385	ENDDO
386	#endif
387	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
388	DO i = nxl_z, nxr_z
389	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
390	ENDDO
391	ENDIF
392	IF ( ibc_p_t == 1 ) THEN
393	DO i = nxl_z, nxr_z
394	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
395	ENDDO
396	ENDIF
397
398	END SUBROUTINE maketri
399
400
401	SUBROUTINE substi( ar, ar1, tri, j )
402
403	!------------------------------------------------------------------------------!
404	! Substitution (Forward and Backward) (Thomas algorithm)
405	!------------------------------------------------------------------------------!
406
407	USE control_parameters
408
409	IMPLICIT NONE
410
411	INTEGER :: i, j, k
412	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
413	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
414	#if defined( __parallel )
415	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
416	#else
417	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
418	#endif
419
420	!
421	!-- Forward substitution.
422	DO i = nxl_z, nxr_z
423	#if defined( __parallel )
424	ar1(i,0) = ar(i,j,1)
425	#else
426	ar1(i,0) = ar(1,j,i)
427	#endif
428	ENDDO
429	DO k = 1, nz - 1
430	DO i = nxl_z, nxr_z
431	#if defined( __parallel )
432	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
433	#else
434	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
435	#endif
436	ENDDO
437	ENDDO
438
439	!
440	!-- Backward substitution.
441	DO i = nxl_z, nxr_z
442	#if defined( __parallel )
443	ar(i,j,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
444	#else
445	ar(nz,j,i) = ar1(i,nz-1) / tri(4,i,nz-1)
446	#endif
447	ENDDO
448	DO k = nz-2, 0, -1
449	DO i = nxl_z, nxr_z
450	#if defined( __parallel )
451	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
452	/ tri(4,i,k)
453	#else
454	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
455	/ tri(4,i,k)
456	#endif
457	ENDDO
458	ENDDO
459
460	!
461	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
462	!-- The respective values of ar should be zero at all k-levels if
463	!-- acceleration of horizontally averaged vertical velocity is zero.
464	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
465	IF ( j == 0 .AND. nxl_z == 0 ) THEN
466	#if defined( __parallel )
467	DO k = 1, nz
468	ar(nxl_z,j,k) = 0.0
469	ENDDO
470	#else
471	DO k = 1, nz
472	ar(k,j,nxl_z) = 0.0
473	ENDDO
474	#endif
475	ENDIF
476	ENDIF
477
478	END SUBROUTINE substi
479
480
481	SUBROUTINE split( tri )
482
483	!------------------------------------------------------------------------------!
484	! Splitting of the tridiagonal matrix (Thomas algorithm)
485	!------------------------------------------------------------------------------!
486
487	IMPLICIT NONE
488
489	INTEGER :: i, k
490	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
491
492	!
493	!-- Splitting.
494	DO i = nxl_z, nxr_z
495	tri(4,i,0) = tri(1,i,0)
496	ENDDO
497	DO k = 1, nz-1
498	DO i = nxl_z, nxr_z
499	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
500	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
501	ENDDO
502	ENDDO
503
504	END SUBROUTINE split
505
506	END SUBROUTINE tridia
507
508
509	#if defined( __parallel )
510	SUBROUTINE fftxp( ar, direction )
511
512	!------------------------------------------------------------------------------!
513	! Fourier-transformation along x-direction Parallelized version
514	!------------------------------------------------------------------------------!
515
516	IMPLICIT NONE
517
518	CHARACTER (LEN=*) :: direction
519	INTEGER :: j, k
520	REAL :: ar(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa)
521
522	!
523	!-- Performing the fft with one of the methods implemented
524	DO k = nzb_x, nzt_x
525	DO j = nys_x, nyn_x
526	CALL fft_x( ar(0:nx,j,k), direction )
527	ENDDO
528	ENDDO
529
530	END SUBROUTINE fftxp
531
532	#else
533	SUBROUTINE fftx( ar, direction )
534
535	!------------------------------------------------------------------------------!
536	! Fourier-transformation along x-direction Non parallel version
537	!------------------------------------------------------------------------------!
538
539	IMPLICIT NONE
540
541	CHARACTER (LEN=*) :: direction
542	INTEGER :: i, j, k
543	REAL :: ar(1:nz,0:ny,0:nx)
544
545	!
546	!-- Performing the fft with one of the methods implemented
547	DO k = 1, nz
548	DO j = 0, ny
549	CALL fft_x( ar(k,j,0:nx), direction )
550	ENDDO
551	ENDDO
552
553	END SUBROUTINE fftx
554	#endif
555
556
557	#if defined( __parallel )
558	SUBROUTINE fftyp( ar, direction )
559
560	!------------------------------------------------------------------------------!
561	! Fourier-transformation along y-direction Parallelized version
562	!------------------------------------------------------------------------------!
563
564	IMPLICIT NONE
565
566	CHARACTER (LEN=*) :: direction
567	INTEGER :: i, k
568	REAL :: ar(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya)
569
570	!
571	!-- Performing the fft with one of the methods implemented
572	DO k = nzb_y, nzt_y
573	DO i = nxl_y, nxr_y
574	CALL fft_y( ar(0:ny,i,k), direction )
575	ENDDO
576	ENDDO
577
578	END SUBROUTINE fftyp
579
580	#else
581	SUBROUTINE ffty( ar, direction )
582
583	!------------------------------------------------------------------------------!
584	! Fourier-transformation along y-direction Non parallel version
585	!------------------------------------------------------------------------------!
586
587	IMPLICIT NONE
588
589	CHARACTER (LEN=*) :: direction
590	INTEGER :: i, k
591	REAL :: ar(1:nz,0:ny,0:nx)
592
593	!
594	!-- Performing the fft with one of the methods implemented
595	DO k = 1, nz
596	DO i = 0, nx
597	CALL fft_y( ar(k,0:ny,i), direction )
598	ENDDO
599	ENDDO
600
601	END SUBROUTINE ffty
602	#endif
603
604	#if defined( __parallel )
605	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
606
607	!------------------------------------------------------------------------------!
608	! Fourier-transformation along y with subsequent transposition y --> x for
609	! a 1d-decomposition along x
610	!
611	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
612	! if the first index of work_ffty_vec is odd. Otherwise
613	! memory bank conflicts may occur (especially if the index is a
614	! multiple of 128). That's why work_ffty_vec is dimensioned as
615	! 0:ny+1.
616	! Of course, this will not work if users are using an odd number
617	! of gridpoints along y.
618	!------------------------------------------------------------------------------!
619
620	USE control_parameters
621	USE cpulog
622	USE indices
623	USE interfaces
624	USE pegrid
625	USE transpose_indices
626
627	IMPLICIT NONE
628
629	INTEGER :: i, iend, iouter, ir, j, k
630	INTEGER, PARAMETER :: stridex = 4
631
632	REAL, DIMENSION(0:ny,stridex) :: work_ffty
633	#if defined( __nec )
634	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
635	#endif
636	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_in
637	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_out
638	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
639
640	!
641	!-- Carry out the FFT along y, where all data are present due to the
642	!-- 1d-decomposition along x. Resort the data in a way that x becomes
643	!-- the first index.
644	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
645
646	IF ( host(1:3) == 'nec' ) THEN
647	#if defined( __nec )
648	!
649	!-- Code optimized for vector processors
650	!$OMP PARALLEL PRIVATE ( i, j, k )
651	!$OMP DO
652	DO i = nxl, nxr
653
654	DO j = 0, ny
655	DO k = 1, nz
656	work_ffty_vec(j,k,i) = f_in(k,j,i)
657	ENDDO
658	ENDDO
659
660	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
661
662	ENDDO
663
664	!$OMP DO
665	DO k = 1, nz
666	DO j = 0, ny
667	DO i = nxl, nxr
668	work(i,k,j) = work_ffty_vec(j,k,i)
669	ENDDO
670	ENDDO
671	ENDDO
672	!$OMP END PARALLEL
673	#endif
674
675	ELSE
676
677	!
678	!-- Cache optimized code.
679	!-- The i-(x-)direction is split into a strided outer loop and an inner
680	!-- loop for better cache performance
681	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
682	!$OMP DO
683	DO iouter = nxl, nxr, stridex
684
685	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
686
687	DO k = 1, nz
688
689	DO i = iouter, iend
690
691	ir = i-iouter+1 ! counter within a stride
692	DO j = 0, ny
693	work_ffty(j,ir) = f_in(k,j,i)
694	ENDDO
695	!
696	!-- FFT along y
697	CALL fft_y( work_ffty(:,ir), 'forward' )
698
699	ENDDO
700
701	!
702	!-- Resort
703	DO j = 0, ny
704	DO i = iouter, iend
705	work(i,k,j) = work_ffty(j,i-iouter+1)
706	ENDDO
707	ENDDO
708
709	ENDDO
710
711	ENDDO
712	!$OMP END PARALLEL
713
714	ENDIF
715	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
716
717	!
718	!-- Transpose array
719	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
720	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
721	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
722	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
723	comm1dx, ierr )
724	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
725
726	END SUBROUTINE ffty_tr_yx
727
728
729	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
730
731	!------------------------------------------------------------------------------!
732	! Transposition x --> y with a subsequent backward Fourier transformation for
733	! a 1d-decomposition along x
734	!------------------------------------------------------------------------------!
735
736	USE control_parameters
737	USE cpulog
738	USE indices
739	USE interfaces
740	USE pegrid
741	USE transpose_indices
742
743	IMPLICIT NONE
744
745	INTEGER :: i, iend, iouter, ir, j, k
746	INTEGER, PARAMETER :: stridex = 4
747
748	REAL, DIMENSION(0:ny,stridex) :: work_ffty
749	#if defined( __nec )
750	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
751	#endif
752	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_in
753	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_out
754	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
755
756	!
757	!-- Transpose array
758	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
759	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
760	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
761	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
762	comm1dx, ierr )
763	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
764
765	!
766	!-- Resort the data in a way that y becomes the first index and carry out the
767	!-- backward fft along y.
768	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
769
770	IF ( host(1:3) == 'nec' ) THEN
771	#if defined( __nec )
772	!
773	!-- Code optimized for vector processors
774	!$OMP PARALLEL PRIVATE ( i, j, k )
775	!$OMP DO
776	DO k = 1, nz
777	DO j = 0, ny
778	DO i = nxl, nxr
779	work_ffty_vec(j,k,i) = work(i,k,j)
780	ENDDO
781	ENDDO
782	ENDDO
783
784	!$OMP DO
785	DO i = nxl, nxr
786
787	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
788
789	DO j = 0, ny
790	DO k = 1, nz
791	f_out(k,j,i) = work_ffty_vec(j,k,i)
792	ENDDO
793	ENDDO
794
795	ENDDO
796	!$OMP END PARALLEL
797	#endif
798
799	ELSE
800
801	!
802	!-- Cache optimized code.
803	!-- The i-(x-)direction is split into a strided outer loop and an inner
804	!-- loop for better cache performance
805	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
806	!$OMP DO
807	DO iouter = nxl, nxr, stridex
808
809	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
810
811	DO k = 1, nz
812	!
813	!-- Resort
814	DO j = 0, ny
815	DO i = iouter, iend
816	work_ffty(j,i-iouter+1) = work(i,k,j)
817	ENDDO
818	ENDDO
819
820	DO i = iouter, iend
821
822	!
823	!-- FFT along y
824	ir = i-iouter+1 ! counter within a stride
825	CALL fft_y( work_ffty(:,ir), 'backward' )
826
827	DO j = 0, ny
828	f_out(k,j,i) = work_ffty(j,ir)
829	ENDDO
830	ENDDO
831
832	ENDDO
833
834	ENDDO
835	!$OMP END PARALLEL
836
837	ENDIF
838
839	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
840
841	END SUBROUTINE tr_xy_ffty
842
843
844	SUBROUTINE fftx_tri_fftx( ar )
845
846	!------------------------------------------------------------------------------!
847	! FFT along x, solution of the tridiagonal system and backward FFT for
848	! a 1d-decomposition along x
849	!
850	! WARNING: this subroutine may still not work for hybrid parallelization
851	! with OpenMP (for possible necessary changes see the original
852	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
853	!------------------------------------------------------------------------------!
854
855	USE control_parameters
856	USE cpulog
857	USE grid_variables
858	USE indices
859	USE interfaces
860	USE pegrid
861	USE transpose_indices
862
863	IMPLICIT NONE
864
865	character(len=3) :: myth_char
866
867	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
868
869	REAL, DIMENSION(0:nx) :: work_fftx
870	REAL, DIMENSION(0:nx,1:nz) :: work_trix
871	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: ar
872	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
873
874
875	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
876
877	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
878
879	tn = 0 ! Default thread number in case of one thread
880	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
881	DO j = nys_x, nyn_x
882
883	!$ tn = omp_get_thread_num()
884
885	IF ( host(1:3) == 'nec' ) THEN
886	!
887	!-- Code optimized for vector processors
888	DO k = 1, nz
889
890	m = 0
891	DO n = 1, pdims(1)
892	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
893	work_trix(m,k) = ar(i,k,j,n)
894	m = m + 1
895	ENDDO
896	ENDDO
897
898	ENDDO
899
900	CALL fft_x_m( work_trix, 'forward' )
901
902	ELSE
903	!
904	!-- Cache optimized code
905	DO k = 1, nz
906
907	m = 0
908	DO n = 1, pdims(1)
909	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
910	work_fftx(m) = ar(i,k,j,n)
911	m = m + 1
912	ENDDO
913	ENDDO
914
915	CALL fft_x( work_fftx, 'forward' )
916
917	DO i = 0, nx
918	work_trix(i,k) = work_fftx(i)
919	ENDDO
920
921	ENDDO
922
923	ENDIF
924
925	!
926	!-- Solve the linear equation system
927	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
928
929	IF ( host(1:3) == 'nec' ) THEN
930	!
931	!-- Code optimized for vector processors
932	CALL fft_x_m( work_trix, 'backward' )
933
934	DO k = 1, nz
935
936	m = 0
937	DO n = 1, pdims(1)
938	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
939	ar(i,k,j,n) = work_trix(m,k)
940	m = m + 1
941	ENDDO
942	ENDDO
943
944	ENDDO
945
946	ELSE
947	!
948	!-- Cache optimized code
949	DO k = 1, nz
950
951	DO i = 0, nx
952	work_fftx(i) = work_trix(i,k)
953	ENDDO
954
955	CALL fft_x( work_fftx, 'backward' )
956
957	m = 0
958	DO n = 1, pdims(1)
959	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
960	ar(i,k,j,n) = work_fftx(m)
961	m = m + 1
962	ENDDO
963	ENDDO
964
965	ENDDO
966
967	ENDIF
968
969	ENDDO
970
971	DEALLOCATE( tri )
972
973	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
974
975	END SUBROUTINE fftx_tri_fftx
976
977
978	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
979
980	!------------------------------------------------------------------------------!
981	! Fourier-transformation along x with subsequent transposition x --> y for
982	! a 1d-decomposition along y
983	!
984	! ATTENTION: The NEC-branch of this routine may significantly profit from
985	! further optimizations. So far, performance is much worse than
986	! for routine ffty_tr_yx (more than three times slower).
987	!------------------------------------------------------------------------------!
988
989	USE control_parameters
990	USE cpulog
991	USE indices
992	USE interfaces
993	USE pegrid
994	USE transpose_indices
995
996	IMPLICIT NONE
997
998	INTEGER :: i, j, k
999
1000	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1001	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_in
1002	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_out
1003	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1004
1005	!
1006	!-- Carry out the FFT along x, where all data are present due to the
1007	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1008	!-- the first index.
1009	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
1010
1011	IF ( host(1:3) == 'nec' ) THEN
1012	!
1013	!-- Code for vector processors
1014	!$OMP PARALLEL PRIVATE ( i, j, k )
1015	!$OMP DO
1016	DO i = 0, nx
1017
1018	DO j = nys, nyn
1019	DO k = 1, nz
1020	work_fftx(i,k,j) = f_in(k,j,i)
1021	ENDDO
1022	ENDDO
1023
1024	ENDDO
1025
1026	!$OMP DO
1027	DO j = nys, nyn
1028
1029	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1030
1031	DO k = 1, nz
1032	DO i = 0, nx
1033	work(j,k,i) = work_fftx(i,k,j)
1034	ENDDO
1035	ENDDO
1036
1037	ENDDO
1038	!$OMP END PARALLEL
1039
1040	ELSE
1041
1042	!
1043	!-- Cache optimized code (there might be still a potential for better
1044	!-- optimization).
1045	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1046	!$OMP DO
1047	DO i = 0, nx
1048
1049	DO j = nys, nyn
1050	DO k = 1, nz
1051	work_fftx(i,k,j) = f_in(k,j,i)
1052	ENDDO
1053	ENDDO
1054
1055	ENDDO
1056
1057	!$OMP DO
1058	DO j = nys, nyn
1059	DO k = 1, nz
1060
1061	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1062
1063	DO i = 0, nx
1064	work(j,k,i) = work_fftx(i,k,j)
1065	ENDDO
1066	ENDDO
1067
1068	ENDDO
1069	!$OMP END PARALLEL
1070
1071	ENDIF
1072	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1073
1074	!
1075	!-- Transpose array
1076	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1077	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1078	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1079	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1080	comm1dy, ierr )
1081	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1082
1083	END SUBROUTINE fftx_tr_xy
1084
1085
1086	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1087
1088	!------------------------------------------------------------------------------!
1089	! Transposition y --> x with a subsequent backward Fourier transformation for
1090	! a 1d-decomposition along x
1091	!------------------------------------------------------------------------------!
1092
1093	USE control_parameters
1094	USE cpulog
1095	USE indices
1096	USE interfaces
1097	USE pegrid
1098	USE transpose_indices
1099
1100	IMPLICIT NONE
1101
1102	INTEGER :: i, j, k
1103
1104	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1105	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_in
1106	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_out
1107	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1108
1109	!
1110	!-- Transpose array
1111	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1112	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1113	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1114	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1115	comm1dy, ierr )
1116	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1117
1118	!
1119	!-- Carry out the FFT along x, where all data are present due to the
1120	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1121	!-- the first index.
1122	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1123
1124	IF ( host(1:3) == 'nec' ) THEN
1125	!
1126	!-- Code optimized for vector processors
1127	!$OMP PARALLEL PRIVATE ( i, j, k )
1128	!$OMP DO
1129	DO j = nys, nyn
1130
1131	DO k = 1, nz
1132	DO i = 0, nx
1133	work_fftx(i,k,j) = work(j,k,i)
1134	ENDDO
1135	ENDDO
1136
1137	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1138
1139	ENDDO
1140
1141	!$OMP DO
1142	DO i = 0, nx
1143	DO j = nys, nyn
1144	DO k = 1, nz
1145	f_out(k,j,i) = work_fftx(i,k,j)
1146	ENDDO
1147	ENDDO
1148	ENDDO
1149	!$OMP END PARALLEL
1150
1151	ELSE
1152
1153	!
1154	!-- Cache optimized code (there might be still a potential for better
1155	!-- optimization).
1156	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1157	!$OMP DO
1158	DO j = nys, nyn
1159	DO k = 1, nz
1160
1161	DO i = 0, nx
1162	work_fftx(i,k,j) = work(j,k,i)
1163	ENDDO
1164
1165	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1166
1167	ENDDO
1168	ENDDO
1169
1170	!$OMP DO
1171	DO i = 0, nx
1172	DO j = nys, nyn
1173	DO k = 1, nz
1174	f_out(k,j,i) = work_fftx(i,k,j)
1175	ENDDO
1176	ENDDO
1177	ENDDO
1178	!$OMP END PARALLEL
1179
1180	ENDIF
1181	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1182
1183	END SUBROUTINE tr_yx_fftx
1184
1185
1186	SUBROUTINE ffty_tri_ffty( ar )
1187
1188	!------------------------------------------------------------------------------!
1189	! FFT along y, solution of the tridiagonal system and backward FFT for
1190	! a 1d-decomposition along y
1191	!
1192	! WARNING: this subroutine may still not work for hybrid parallelization
1193	! with OpenMP (for possible necessary changes see the original
1194	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1195	!------------------------------------------------------------------------------!
1196
1197	USE control_parameters
1198	USE cpulog
1199	USE grid_variables
1200	USE indices
1201	USE interfaces
1202	USE pegrid
1203	USE transpose_indices
1204
1205	IMPLICIT NONE
1206
1207	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1208
1209	REAL, DIMENSION(0:ny) :: work_ffty
1210	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1211	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: ar
1212	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1213
1214
1215	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1216
1217	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1218
1219	tn = 0 ! Default thread number in case of one thread
1220	!$OMP PARALLEL PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1221	!$OMP DO
1222	DO i = nxl_y, nxr_y
1223
1224	!$ tn = omp_get_thread_num()
1225
1226	IF ( host(1:3) == 'nec' ) THEN
1227	!
1228	!-- Code optimized for vector processors
1229	DO k = 1, nz
1230
1231	m = 0
1232	DO n = 1, pdims(2)
1233	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1234	work_triy(m,k) = ar(j,k,i,n)
1235	m = m + 1
1236	ENDDO
1237	ENDDO
1238
1239	ENDDO
1240
1241	CALL fft_y_m( work_triy, ny, 'forward' )
1242
1243	ELSE
1244	!
1245	!-- Cache optimized code
1246	DO k = 1, nz
1247
1248	m = 0
1249	DO n = 1, pdims(2)
1250	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1251	work_ffty(m) = ar(j,k,i,n)
1252	m = m + 1
1253	ENDDO
1254	ENDDO
1255
1256	CALL fft_y( work_ffty, 'forward' )
1257
1258	DO j = 0, ny
1259	work_triy(j,k) = work_ffty(j)
1260	ENDDO
1261
1262	ENDDO
1263
1264	ENDIF
1265
1266	!
1267	!-- Solve the linear equation system
1268	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1269
1270	IF ( host(1:3) == 'nec' ) THEN
1271	!
1272	!-- Code optimized for vector processors
1273	CALL fft_y_m( work_triy, ny, 'backward' )
1274
1275	DO k = 1, nz
1276
1277	m = 0
1278	DO n = 1, pdims(2)
1279	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1280	ar(j,k,i,n) = work_triy(m,k)
1281	m = m + 1
1282	ENDDO
1283	ENDDO
1284
1285	ENDDO
1286
1287	ELSE
1288	!
1289	!-- Cache optimized code
1290	DO k = 1, nz
1291
1292	DO j = 0, ny
1293	work_ffty(j) = work_triy(j,k)
1294	ENDDO
1295
1296	CALL fft_y( work_ffty, 'backward' )
1297
1298	m = 0
1299	DO n = 1, pdims(2)
1300	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1301	ar(j,k,i,n) = work_ffty(m)
1302	m = m + 1
1303	ENDDO
1304	ENDDO
1305
1306	ENDDO
1307
1308	ENDIF
1309
1310	ENDDO
1311	!$OMP END PARALLEL
1312
1313	DEALLOCATE( tri )
1314
1315	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1316
1317	END SUBROUTINE ffty_tri_ffty
1318
1319
1320	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1321
1322	!------------------------------------------------------------------------------!
1323	! Solves the linear system of equations for a 1d-decomposition along x (see
1324	! tridia)
1325	!
1326	! Attention: when using the intel compiler, array tri must be passed as an
1327	! argument to the contained subroutines. Otherwise addres faults
1328	! will occur.
1329	! On NEC, tri should not be passed (except for routine substi_1dd)
1330	! because this causes very bad performance.
1331	!------------------------------------------------------------------------------!
1332
1333	USE arrays_3d
1334	USE control_parameters
1335
1336	USE pegrid
1337
1338	IMPLICIT NONE
1339
1340	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1341
1342	REAL :: ddx2, ddy2
1343
1344	REAL, DIMENSION(0:nx,1:nz) :: ar
1345	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1346	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1347
1348
1349	nnyh = ( ny + 1 ) / 2
1350
1351	!
1352	!-- Define constant elements of the tridiagonal matrix.
1353	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1354	!-- the exchanged loops create bank conflicts. The following directive
1355	!-- prohibits loop exchange and the loops perform much better.
1356	! tn = omp_get_thread_num()
1357	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1358	! CALL local_flush( 120+tn )
1359	!CDIR NOLOOPCHG
1360	DO k = 0, nz-1
1361	DO i = 0,nx
1362	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
1363	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
1364	ENDDO
1365	ENDDO
1366	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1367	! CALL local_flush( 120+tn )
1368
1369	IF ( j <= nnyh ) THEN
1370	#if defined( __lc )
1371	CALL maketri_1dd( j, tri )
1372	#else
1373	CALL maketri_1dd( j )
1374	#endif
1375	ELSE
1376	#if defined( __lc )
1377	CALL maketri_1dd( ny+1-j, tri )
1378	#else
1379	CALL maketri_1dd( ny+1-j )
1380	#endif
1381	ENDIF
1382	#if defined( __lc )
1383	CALL split_1dd( tri )
1384	#else
1385	CALL split_1dd
1386	#endif
1387	CALL substi_1dd( ar, tri )
1388
1389	CONTAINS
1390
1391	#if defined( __lc )
1392	SUBROUTINE maketri_1dd( j, tri )
1393	#else
1394	SUBROUTINE maketri_1dd( j )
1395	#endif
1396
1397	!------------------------------------------------------------------------------!
1398	! computes the i- and j-dependent component of the matrix
1399	!------------------------------------------------------------------------------!
1400
1401	USE constants
1402
1403	IMPLICIT NONE
1404
1405	INTEGER :: i, j, k, nnxh
1406	REAL :: a, c
1407
1408	REAL, DIMENSION(0:nx) :: l
1409
1410	#if defined( __lc )
1411	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1412	#endif
1413
1414
1415	nnxh = ( nx + 1 ) / 2
1416	!
1417	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1418	!-- Fourier space. The coefficients are computed following the method of
1419	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1420	!-- Siano's original version by discretizing the Poisson equation,
1421	!-- before it is Fourier-transformed
1422	DO i = 0, nx
1423	IF ( i >= 0 .AND. i <= nnxh ) THEN
1424	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1425	FLOAT( nx+1 ) ) ) * ddx2 + &
1426	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1427	FLOAT( ny+1 ) ) ) * ddy2
1428	ELSE
1429	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1430	FLOAT( nx+1 ) ) ) * ddx2 + &
1431	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1432	FLOAT( ny+1 ) ) ) * ddy2
1433	ENDIF
1434	ENDDO
1435
1436	DO k = 0, nz-1
1437	DO i = 0, nx
1438	a = -1.0 * ddzu(k+2) * ddzw(k+1)
1439	c = -1.0 * ddzu(k+1) * ddzw(k+1)
1440	tri(1,i,k) = a + c - l(i)
1441	ENDDO
1442	ENDDO
1443	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1444	DO i = 0, nx
1445	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1446	ENDDO
1447	ENDIF
1448	IF ( ibc_p_t == 1 ) THEN
1449	DO i = 0, nx
1450	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1451	ENDDO
1452	ENDIF
1453
1454	END SUBROUTINE maketri_1dd
1455
1456
1457	#if defined( __lc )
1458	SUBROUTINE split_1dd( tri )
1459	#else
1460	SUBROUTINE split_1dd
1461	#endif
1462
1463	!------------------------------------------------------------------------------!
1464	! Splitting of the tridiagonal matrix (Thomas algorithm)
1465	!------------------------------------------------------------------------------!
1466
1467	IMPLICIT NONE
1468
1469	INTEGER :: i, k
1470
1471	#if defined( __lc )
1472	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1473	#endif
1474
1475
1476	!
1477	!-- Splitting
1478	DO i = 0, nx
1479	tri(4,i,0) = tri(1,i,0)
1480	ENDDO
1481	DO k = 1, nz-1
1482	DO i = 0, nx
1483	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1484	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1485	ENDDO
1486	ENDDO
1487
1488	END SUBROUTINE split_1dd
1489
1490
1491	SUBROUTINE substi_1dd( ar, tri )
1492
1493	!------------------------------------------------------------------------------!
1494	! Substitution (Forward and Backward) (Thomas algorithm)
1495	!------------------------------------------------------------------------------!
1496
1497	IMPLICIT NONE
1498
1499	INTEGER :: i, k
1500
1501	REAL, DIMENSION(0:nx,nz) :: ar
1502	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1503	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1504
1505	!
1506	!-- Forward substitution
1507	DO i = 0, nx
1508	ar1(i,0) = ar(i,1)
1509	ENDDO
1510	DO k = 1, nz-1
1511	DO i = 0, nx
1512	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1513	ENDDO
1514	ENDDO
1515
1516	!
1517	!-- Backward substitution
1518	DO i = 0, nx
1519	ar(i,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
1520	ENDDO
1521	DO k = nz-2, 0, -1
1522	DO i = 0, nx
1523	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1524	/ tri(4,i,k)
1525	ENDDO
1526	ENDDO
1527
1528	!
1529	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
1530	!-- The respective values of ar should be zero at all k-levels if
1531	!-- acceleration of horizontally averaged vertical velocity is zero.
1532	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
1533	IF ( j == 0 ) THEN
1534	DO k = 1, nz
1535	ar(0,k) = 0.0
1536	ENDDO
1537	ENDIF
1538	ENDIF
1539
1540	END SUBROUTINE substi_1dd
1541
1542	END SUBROUTINE tridia_1dd
1543
1544	#endif
1545
1546	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 622

Download in other formats: