Home

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 668

Last change on this file since 668 was 668, checked in by suehring, 13 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 45.1 KB

Line
1	MODULE poisfft_mod
2
3	!------------------------------------------------------------------------------!
4	! Current revisions:
5	! -----------------
6	!
7	! Former revisions:
8	! -----------------
9	! $Id: poisfft.f90 668 2010-12-23 13:22:58Z suehring $
10	!
11	! 667 2010-12-23 12:06:00Z suehring/gryschka
12	! ddzu replaced by ddzu_pres due to changes in zu(0)
13	!
14	! 622 2010-12-10 08:08:13Z raasch
15	! optional barriers included in order to speed up collective operations
16	!
17	! 377 2009-09-04 11:09:00Z raasch
18	! __lcmuk changed to __lc to avoid problems with Intel compiler on sgi-ice
19	!
20	! 164 2008-05-15 08:46:15Z raasch
21	! Arguments removed from transpose routines
22	!
23	! 128 2007-10-26 13:11:14Z raasch
24	! Bugfix: wavenumber calculation for even nx in routines maketri
25	!
26	! 85 2007-05-11 09:35:14Z raasch
27	! Bugfix: work_fft*_vec removed from some PRIVATE-declarations
28	!
29	! 76 2007-03-29 00:58:32Z raasch
30	! Tridiagonal coefficients adjusted for Neumann boundary conditions both at
31	! the bottom and the top.
32	!
33	! RCS Log replace by Id keyword, revision history cleaned up
34	!
35	! Revision 1.24 2006/08/04 15:00:24 raasch
36	! Default setting of the thread number tn in case of not using OpenMP
37	!
38	! Revision 1.23 2006/02/23 12:48:38 raasch
39	! Additional compiler directive in routine tridia_1dd for preventing loop
40	! exchange on NEC-SX6
41	!
42	! Revision 1.20 2004/04/30 12:38:09 raasch
43	! Parts of former poisfft_hybrid moved to this subroutine,
44	! former subroutine changed to a module, renaming of FFT-subroutines and
45	! -module, FFTs completely substituted by calls of fft_x and fft_y,
46	! NAG fft used in the non-parallel case completely removed, l in maketri
47	! is now a 1d-array, variables passed by modules instead of using parameter
48	! lists, enlarged transposition arrays introduced
49	!
50	! Revision 1.1 1997/07/24 11:24:14 raasch
51	! Initial revision
52	!
53	!
54	! Description:
55	! ------------
56	! See below.
57	!------------------------------------------------------------------------------!
58
59	!--------------------------------------------------------------------------!
60	! poisfft !
61	! !
62	! Original version: Stephan Siano (pois3d) !
63	! !
64	! Institute of Meteorology and Climatology, University of Hannover !
65	! Germany !
66	! !
67	! Version as of July 23,1996 !
68	! !
69	! !
70	! Version for parallel computers: Siegfried Raasch !
71	! !
72	! Version as of July 03,1997 !
73	! !
74	! Solves the Poisson equation with a 2D spectral method !
75	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
76	! !
77	! Input: !
78	! real ar contains in the (nnx,nny,nnz) elements, !
79	! starting from the element (1,nys,nxl), the !
80	! values for s !
81	! real work Temporary array !
82	! !
83	! Output: !
84	! real ar contains the solution for p !
85	!--------------------------------------------------------------------------!
86
87	USE fft_xy
88	USE indices
89	USE transpose_indices
90
91	IMPLICIT NONE
92
93	PRIVATE
94	PUBLIC poisfft, poisfft_init
95
96	INTERFACE poisfft
97	MODULE PROCEDURE poisfft
98	END INTERFACE poisfft
99
100	INTERFACE poisfft_init
101	MODULE PROCEDURE poisfft_init
102	END INTERFACE poisfft_init
103
104	CONTAINS
105
106	SUBROUTINE poisfft_init
107
108	CALL fft_init
109
110	END SUBROUTINE poisfft_init
111
112
113	SUBROUTINE poisfft( ar, work )
114
115	USE cpulog
116	USE interfaces
117	USE pegrid
118
119	IMPLICIT NONE
120
121	REAL, DIMENSION(1:nza,nys:nyna,nxl:nxra) :: ar, work
122
123
124	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
125
126	!
127	!-- Two-dimensional Fourier Transformation in x- and y-direction.
128	#if defined( __parallel )
129	IF ( pdims(2) == 1 ) THEN
130
131	!
132	!-- 1d-domain-decomposition along x:
133	!-- FFT along y and transposition y --> x
134	CALL ffty_tr_yx( ar, work, ar )
135
136	!
137	!-- FFT along x, solving the tridiagonal system and backward FFT
138	CALL fftx_tri_fftx( ar )
139
140	!
141	!-- Transposition x --> y and backward FFT along y
142	CALL tr_xy_ffty( ar, work, ar )
143
144	ELSEIF ( pdims(1) == 1 ) THEN
145
146	!
147	!-- 1d-domain-decomposition along y:
148	!-- FFT along x and transposition x --> y
149	CALL fftx_tr_xy( ar, work, ar )
150
151	!
152	!-- FFT along y, solving the tridiagonal system and backward FFT
153	CALL ffty_tri_ffty( ar )
154
155	!
156	!-- Transposition y --> x and backward FFT along x
157	CALL tr_yx_fftx( ar, work, ar )
158
159	ELSE
160
161	!
162	!-- 2d-domain-decomposition
163	!-- Transposition z --> x
164	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
165	CALL transpose_zx( ar, work, ar )
166	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
167
168	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
169	CALL fftxp( ar, 'forward' )
170	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
171
172	!
173	!-- Transposition x --> y
174	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
175	CALL transpose_xy( ar, work, ar )
176	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
177
178	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
179	CALL fftyp( ar, 'forward' )
180	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
181
182	!
183	!-- Transposition y --> z
184	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
185	CALL transpose_yz( ar, work, ar )
186	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
187
188	!
189	!-- Solve the Poisson equation in z-direction in cartesian space.
190	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
191	CALL tridia( ar )
192	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
193
194	!
195	!-- Inverse Fourier Transformation
196	!-- Transposition z --> y
197	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
198	CALL transpose_zy( ar, work, ar )
199	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
200
201	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
202	CALL fftyp( ar, 'backward' )
203	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
204
205	!
206	!-- Transposition y --> x
207	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
208	CALL transpose_yx( ar, work, ar )
209	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
210
211	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
212	CALL fftxp( ar, 'backward' )
213	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
214
215	!
216	!-- Transposition x --> z
217	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
218	CALL transpose_xz( ar, work, ar )
219	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
220
221	ENDIF
222
223	#else
224
225	!
226	!-- Two-dimensional Fourier Transformation along x- and y-direction.
227	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
228	CALL fftx( ar, 'forward' )
229	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
230	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
231	CALL ffty( ar, 'forward' )
232	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
233
234	!
235	!-- Solve the Poisson equation in z-direction in cartesian space.
236	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
237	CALL tridia( ar )
238	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
239
240	!
241	!-- Inverse Fourier Transformation.
242	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
243	CALL ffty( ar, 'backward' )
244	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
245	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
246	CALL fftx( ar, 'backward' )
247	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
248
249	#endif
250
251	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
252
253	END SUBROUTINE poisfft
254
255
256
257	SUBROUTINE tridia( ar )
258
259	!------------------------------------------------------------------------------!
260	! solves the linear system of equations:
261	!
262	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
263	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
264	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
265	!
266	! by using the Thomas algorithm
267	!------------------------------------------------------------------------------!
268
269	USE arrays_3d
270
271	IMPLICIT NONE
272
273	INTEGER :: i, j, k, nnyh
274
275	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
276	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
277
278	#if defined( __parallel )
279	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
280	#else
281	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
282	#endif
283
284
285	nnyh = (ny+1) / 2
286
287	!
288	!-- Define constant elements of the tridiagonal matrix.
289	DO k = 0, nz-1
290	DO i = nxl_z, nxr_z
291	tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
292	tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
293	ENDDO
294	ENDDO
295
296	#if defined( __parallel )
297	!
298	!-- Repeat for all y-levels.
299	DO j = nys_z, nyn_z
300	IF ( j <= nnyh ) THEN
301	CALL maketri( tri, j )
302	ELSE
303	CALL maketri( tri, ny+1-j )
304	ENDIF
305	CALL split( tri )
306	CALL substi( ar, ar1, tri, j )
307	ENDDO
308	#else
309	!
310	!-- First y-level.
311	CALL maketri( tri, nys_z )
312	CALL split( tri )
313	CALL substi( ar, ar1, tri, 0 )
314
315	!
316	!-- Further y-levels.
317	DO j = 1, nnyh - 1
318	CALL maketri( tri, j )
319	CALL split( tri )
320	CALL substi( ar, ar1, tri, j )
321	CALL substi( ar, ar1, tri, ny+1-j )
322	ENDDO
323	CALL maketri( tri, nnyh )
324	CALL split( tri )
325	CALL substi( ar, ar1, tri, nnyh+nys )
326	#endif
327
328	CONTAINS
329
330	SUBROUTINE maketri( tri, j )
331
332	!------------------------------------------------------------------------------!
333	! Computes the i- and j-dependent component of the matrix
334	!------------------------------------------------------------------------------!
335
336	USE arrays_3d
337	USE constants
338	USE control_parameters
339	USE grid_variables
340
341	IMPLICIT NONE
342
343	INTEGER :: i, j, k, nnxh
344	REAL :: a, c
345	REAL :: ll(nxl_z:nxr_z)
346	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
347
348
349	nnxh = ( nx + 1 ) / 2
350
351	!
352	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
353	!-- Fourier space. The coefficients are computed following the method of
354	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
355	!-- Siano's original version by discretizing the Poisson equation,
356	!-- before it is Fourier-transformed
357	#if defined( __parallel )
358	DO i = nxl_z, nxr_z
359	IF ( i >= 0 .AND. i <= nnxh ) THEN
360	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
361	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
362	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
363	FLOAT( ny+1 ) ) ) / ( dy * dy )
364	ELSE
365	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
366	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
367	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
368	FLOAT( ny+1 ) ) ) / ( dy * dy )
369	ENDIF
370	DO k = 0,nz-1
371	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
372	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
373	tri(1,i,k) = a + c - ll(i)
374	ENDDO
375	ENDDO
376	#else
377	DO i = 0, nnxh
378	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / FLOAT( nx+1 ) ) ) / &
379	( dx * dx ) + &
380	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / FLOAT( ny+1 ) ) ) / &
381	( dy * dy )
382	DO k = 0, nz-1
383	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
384	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
385	tri(1,i,k) = a + c - ll(i)
386	IF ( i >= 1 .and. i < nnxh ) THEN
387	tri(1,nx+1-i,k) = tri(1,i,k)
388	ENDIF
389	ENDDO
390	ENDDO
391	#endif
392	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
393	DO i = nxl_z, nxr_z
394	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
395	ENDDO
396	ENDIF
397	IF ( ibc_p_t == 1 ) THEN
398	DO i = nxl_z, nxr_z
399	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
400	ENDDO
401	ENDIF
402
403	END SUBROUTINE maketri
404
405
406	SUBROUTINE substi( ar, ar1, tri, j )
407
408	!------------------------------------------------------------------------------!
409	! Substitution (Forward and Backward) (Thomas algorithm)
410	!------------------------------------------------------------------------------!
411
412	USE control_parameters
413
414	IMPLICIT NONE
415
416	INTEGER :: i, j, k
417	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
418	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
419	#if defined( __parallel )
420	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
421	#else
422	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
423	#endif
424
425	!
426	!-- Forward substitution.
427	DO i = nxl_z, nxr_z
428	#if defined( __parallel )
429	ar1(i,0) = ar(i,j,1)
430	#else
431	ar1(i,0) = ar(1,j,i)
432	#endif
433	ENDDO
434	DO k = 1, nz - 1
435	DO i = nxl_z, nxr_z
436	#if defined( __parallel )
437	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
438	#else
439	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
440	#endif
441	ENDDO
442	ENDDO
443
444	!
445	!-- Backward substitution.
446	DO i = nxl_z, nxr_z
447	#if defined( __parallel )
448	ar(i,j,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
449	#else
450	ar(nz,j,i) = ar1(i,nz-1) / tri(4,i,nz-1)
451	#endif
452	ENDDO
453	DO k = nz-2, 0, -1
454	DO i = nxl_z, nxr_z
455	#if defined( __parallel )
456	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
457	/ tri(4,i,k)
458	#else
459	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
460	/ tri(4,i,k)
461	#endif
462	ENDDO
463	ENDDO
464
465	!
466	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
467	!-- The respective values of ar should be zero at all k-levels if
468	!-- acceleration of horizontally averaged vertical velocity is zero.
469	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
470	IF ( j == 0 .AND. nxl_z == 0 ) THEN
471	#if defined( __parallel )
472	DO k = 1, nz
473	ar(nxl_z,j,k) = 0.0
474	ENDDO
475	#else
476	DO k = 1, nz
477	ar(k,j,nxl_z) = 0.0
478	ENDDO
479	#endif
480	ENDIF
481	ENDIF
482
483	END SUBROUTINE substi
484
485
486	SUBROUTINE split( tri )
487
488	!------------------------------------------------------------------------------!
489	! Splitting of the tridiagonal matrix (Thomas algorithm)
490	!------------------------------------------------------------------------------!
491
492	IMPLICIT NONE
493
494	INTEGER :: i, k
495	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
496
497	!
498	!-- Splitting.
499	DO i = nxl_z, nxr_z
500	tri(4,i,0) = tri(1,i,0)
501	ENDDO
502	DO k = 1, nz-1
503	DO i = nxl_z, nxr_z
504	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
505	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
506	ENDDO
507	ENDDO
508
509	END SUBROUTINE split
510
511	END SUBROUTINE tridia
512
513
514	#if defined( __parallel )
515	SUBROUTINE fftxp( ar, direction )
516
517	!------------------------------------------------------------------------------!
518	! Fourier-transformation along x-direction Parallelized version
519	!------------------------------------------------------------------------------!
520
521	IMPLICIT NONE
522
523	CHARACTER (LEN=*) :: direction
524	INTEGER :: j, k
525	REAL :: ar(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa)
526
527	!
528	!-- Performing the fft with one of the methods implemented
529	DO k = nzb_x, nzt_x
530	DO j = nys_x, nyn_x
531	CALL fft_x( ar(0:nx,j,k), direction )
532	ENDDO
533	ENDDO
534
535	END SUBROUTINE fftxp
536
537	#else
538	SUBROUTINE fftx( ar, direction )
539
540	!------------------------------------------------------------------------------!
541	! Fourier-transformation along x-direction Non parallel version
542	!------------------------------------------------------------------------------!
543
544	IMPLICIT NONE
545
546	CHARACTER (LEN=*) :: direction
547	INTEGER :: i, j, k
548	REAL :: ar(1:nz,0:ny,0:nx)
549
550	!
551	!-- Performing the fft with one of the methods implemented
552	DO k = 1, nz
553	DO j = 0, ny
554	CALL fft_x( ar(k,j,0:nx), direction )
555	ENDDO
556	ENDDO
557
558	END SUBROUTINE fftx
559	#endif
560
561
562	#if defined( __parallel )
563	SUBROUTINE fftyp( ar, direction )
564
565	!------------------------------------------------------------------------------!
566	! Fourier-transformation along y-direction Parallelized version
567	!------------------------------------------------------------------------------!
568
569	IMPLICIT NONE
570
571	CHARACTER (LEN=*) :: direction
572	INTEGER :: i, k
573	REAL :: ar(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya)
574
575	!
576	!-- Performing the fft with one of the methods implemented
577	DO k = nzb_y, nzt_y
578	DO i = nxl_y, nxr_y
579	CALL fft_y( ar(0:ny,i,k), direction )
580	ENDDO
581	ENDDO
582
583	END SUBROUTINE fftyp
584
585	#else
586	SUBROUTINE ffty( ar, direction )
587
588	!------------------------------------------------------------------------------!
589	! Fourier-transformation along y-direction Non parallel version
590	!------------------------------------------------------------------------------!
591
592	IMPLICIT NONE
593
594	CHARACTER (LEN=*) :: direction
595	INTEGER :: i, k
596	REAL :: ar(1:nz,0:ny,0:nx)
597
598	!
599	!-- Performing the fft with one of the methods implemented
600	DO k = 1, nz
601	DO i = 0, nx
602	CALL fft_y( ar(k,0:ny,i), direction )
603	ENDDO
604	ENDDO
605
606	END SUBROUTINE ffty
607	#endif
608
609	#if defined( __parallel )
610	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
611
612	!------------------------------------------------------------------------------!
613	! Fourier-transformation along y with subsequent transposition y --> x for
614	! a 1d-decomposition along x
615	!
616	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
617	! if the first index of work_ffty_vec is odd. Otherwise
618	! memory bank conflicts may occur (especially if the index is a
619	! multiple of 128). That's why work_ffty_vec is dimensioned as
620	! 0:ny+1.
621	! Of course, this will not work if users are using an odd number
622	! of gridpoints along y.
623	!------------------------------------------------------------------------------!
624
625	USE control_parameters
626	USE cpulog
627	USE indices
628	USE interfaces
629	USE pegrid
630	USE transpose_indices
631
632	IMPLICIT NONE
633
634	INTEGER :: i, iend, iouter, ir, j, k
635	INTEGER, PARAMETER :: stridex = 4
636
637	REAL, DIMENSION(0:ny,stridex) :: work_ffty
638	#if defined( __nec )
639	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
640	#endif
641	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_in
642	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_out
643	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
644
645	!
646	!-- Carry out the FFT along y, where all data are present due to the
647	!-- 1d-decomposition along x. Resort the data in a way that x becomes
648	!-- the first index.
649	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
650
651	IF ( host(1:3) == 'nec' ) THEN
652	#if defined( __nec )
653	!
654	!-- Code optimized for vector processors
655	!$OMP PARALLEL PRIVATE ( i, j, k )
656	!$OMP DO
657	DO i = nxl, nxr
658
659	DO j = 0, ny
660	DO k = 1, nz
661	work_ffty_vec(j,k,i) = f_in(k,j,i)
662	ENDDO
663	ENDDO
664
665	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
666
667	ENDDO
668
669	!$OMP DO
670	DO k = 1, nz
671	DO j = 0, ny
672	DO i = nxl, nxr
673	work(i,k,j) = work_ffty_vec(j,k,i)
674	ENDDO
675	ENDDO
676	ENDDO
677	!$OMP END PARALLEL
678	#endif
679
680	ELSE
681
682	!
683	!-- Cache optimized code.
684	!-- The i-(x-)direction is split into a strided outer loop and an inner
685	!-- loop for better cache performance
686	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
687	!$OMP DO
688	DO iouter = nxl, nxr, stridex
689
690	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
691
692	DO k = 1, nz
693
694	DO i = iouter, iend
695
696	ir = i-iouter+1 ! counter within a stride
697	DO j = 0, ny
698	work_ffty(j,ir) = f_in(k,j,i)
699	ENDDO
700	!
701	!-- FFT along y
702	CALL fft_y( work_ffty(:,ir), 'forward' )
703
704	ENDDO
705
706	!
707	!-- Resort
708	DO j = 0, ny
709	DO i = iouter, iend
710	work(i,k,j) = work_ffty(j,i-iouter+1)
711	ENDDO
712	ENDDO
713
714	ENDDO
715
716	ENDDO
717	!$OMP END PARALLEL
718
719	ENDIF
720	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
721
722	!
723	!-- Transpose array
724	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
725	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
726	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
727	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
728	comm1dx, ierr )
729	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
730
731	END SUBROUTINE ffty_tr_yx
732
733
734	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
735
736	!------------------------------------------------------------------------------!
737	! Transposition x --> y with a subsequent backward Fourier transformation for
738	! a 1d-decomposition along x
739	!------------------------------------------------------------------------------!
740
741	USE control_parameters
742	USE cpulog
743	USE indices
744	USE interfaces
745	USE pegrid
746	USE transpose_indices
747
748	IMPLICIT NONE
749
750	INTEGER :: i, iend, iouter, ir, j, k
751	INTEGER, PARAMETER :: stridex = 4
752
753	REAL, DIMENSION(0:ny,stridex) :: work_ffty
754	#if defined( __nec )
755	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
756	#endif
757	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_in
758	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_out
759	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
760
761	!
762	!-- Transpose array
763	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
764	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
765	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
766	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
767	comm1dx, ierr )
768	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
769
770	!
771	!-- Resort the data in a way that y becomes the first index and carry out the
772	!-- backward fft along y.
773	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
774
775	IF ( host(1:3) == 'nec' ) THEN
776	#if defined( __nec )
777	!
778	!-- Code optimized for vector processors
779	!$OMP PARALLEL PRIVATE ( i, j, k )
780	!$OMP DO
781	DO k = 1, nz
782	DO j = 0, ny
783	DO i = nxl, nxr
784	work_ffty_vec(j,k,i) = work(i,k,j)
785	ENDDO
786	ENDDO
787	ENDDO
788
789	!$OMP DO
790	DO i = nxl, nxr
791
792	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
793
794	DO j = 0, ny
795	DO k = 1, nz
796	f_out(k,j,i) = work_ffty_vec(j,k,i)
797	ENDDO
798	ENDDO
799
800	ENDDO
801	!$OMP END PARALLEL
802	#endif
803
804	ELSE
805
806	!
807	!-- Cache optimized code.
808	!-- The i-(x-)direction is split into a strided outer loop and an inner
809	!-- loop for better cache performance
810	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
811	!$OMP DO
812	DO iouter = nxl, nxr, stridex
813
814	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
815
816	DO k = 1, nz
817	!
818	!-- Resort
819	DO j = 0, ny
820	DO i = iouter, iend
821	work_ffty(j,i-iouter+1) = work(i,k,j)
822	ENDDO
823	ENDDO
824
825	DO i = iouter, iend
826
827	!
828	!-- FFT along y
829	ir = i-iouter+1 ! counter within a stride
830	CALL fft_y( work_ffty(:,ir), 'backward' )
831
832	DO j = 0, ny
833	f_out(k,j,i) = work_ffty(j,ir)
834	ENDDO
835	ENDDO
836
837	ENDDO
838
839	ENDDO
840	!$OMP END PARALLEL
841
842	ENDIF
843
844	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
845
846	END SUBROUTINE tr_xy_ffty
847
848
849	SUBROUTINE fftx_tri_fftx( ar )
850
851	!------------------------------------------------------------------------------!
852	! FFT along x, solution of the tridiagonal system and backward FFT for
853	! a 1d-decomposition along x
854	!
855	! WARNING: this subroutine may still not work for hybrid parallelization
856	! with OpenMP (for possible necessary changes see the original
857	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
858	!------------------------------------------------------------------------------!
859
860	USE control_parameters
861	USE cpulog
862	USE grid_variables
863	USE indices
864	USE interfaces
865	USE pegrid
866	USE transpose_indices
867
868	IMPLICIT NONE
869
870	character(len=3) :: myth_char
871
872	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
873
874	REAL, DIMENSION(0:nx) :: work_fftx
875	REAL, DIMENSION(0:nx,1:nz) :: work_trix
876	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: ar
877	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
878
879
880	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
881
882	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
883
884	tn = 0 ! Default thread number in case of one thread
885	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
886	DO j = nys_x, nyn_x
887
888	!$ tn = omp_get_thread_num()
889
890	IF ( host(1:3) == 'nec' ) THEN
891	!
892	!-- Code optimized for vector processors
893	DO k = 1, nz
894
895	m = 0
896	DO n = 1, pdims(1)
897	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
898	work_trix(m,k) = ar(i,k,j,n)
899	m = m + 1
900	ENDDO
901	ENDDO
902
903	ENDDO
904
905	CALL fft_x_m( work_trix, 'forward' )
906
907	ELSE
908	!
909	!-- Cache optimized code
910	DO k = 1, nz
911
912	m = 0
913	DO n = 1, pdims(1)
914	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
915	work_fftx(m) = ar(i,k,j,n)
916	m = m + 1
917	ENDDO
918	ENDDO
919
920	CALL fft_x( work_fftx, 'forward' )
921
922	DO i = 0, nx
923	work_trix(i,k) = work_fftx(i)
924	ENDDO
925
926	ENDDO
927
928	ENDIF
929
930	!
931	!-- Solve the linear equation system
932	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
933
934	IF ( host(1:3) == 'nec' ) THEN
935	!
936	!-- Code optimized for vector processors
937	CALL fft_x_m( work_trix, 'backward' )
938
939	DO k = 1, nz
940
941	m = 0
942	DO n = 1, pdims(1)
943	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
944	ar(i,k,j,n) = work_trix(m,k)
945	m = m + 1
946	ENDDO
947	ENDDO
948
949	ENDDO
950
951	ELSE
952	!
953	!-- Cache optimized code
954	DO k = 1, nz
955
956	DO i = 0, nx
957	work_fftx(i) = work_trix(i,k)
958	ENDDO
959
960	CALL fft_x( work_fftx, 'backward' )
961
962	m = 0
963	DO n = 1, pdims(1)
964	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
965	ar(i,k,j,n) = work_fftx(m)
966	m = m + 1
967	ENDDO
968	ENDDO
969
970	ENDDO
971
972	ENDIF
973
974	ENDDO
975
976	DEALLOCATE( tri )
977
978	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
979
980	END SUBROUTINE fftx_tri_fftx
981
982
983	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
984
985	!------------------------------------------------------------------------------!
986	! Fourier-transformation along x with subsequent transposition x --> y for
987	! a 1d-decomposition along y
988	!
989	! ATTENTION: The NEC-branch of this routine may significantly profit from
990	! further optimizations. So far, performance is much worse than
991	! for routine ffty_tr_yx (more than three times slower).
992	!------------------------------------------------------------------------------!
993
994	USE control_parameters
995	USE cpulog
996	USE indices
997	USE interfaces
998	USE pegrid
999	USE transpose_indices
1000
1001	IMPLICIT NONE
1002
1003	INTEGER :: i, j, k
1004
1005	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1006	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_in
1007	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_out
1008	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1009
1010	!
1011	!-- Carry out the FFT along x, where all data are present due to the
1012	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1013	!-- the first index.
1014	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
1015
1016	IF ( host(1:3) == 'nec' ) THEN
1017	!
1018	!-- Code for vector processors
1019	!$OMP PARALLEL PRIVATE ( i, j, k )
1020	!$OMP DO
1021	DO i = 0, nx
1022
1023	DO j = nys, nyn
1024	DO k = 1, nz
1025	work_fftx(i,k,j) = f_in(k,j,i)
1026	ENDDO
1027	ENDDO
1028
1029	ENDDO
1030
1031	!$OMP DO
1032	DO j = nys, nyn
1033
1034	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1035
1036	DO k = 1, nz
1037	DO i = 0, nx
1038	work(j,k,i) = work_fftx(i,k,j)
1039	ENDDO
1040	ENDDO
1041
1042	ENDDO
1043	!$OMP END PARALLEL
1044
1045	ELSE
1046
1047	!
1048	!-- Cache optimized code (there might be still a potential for better
1049	!-- optimization).
1050	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1051	!$OMP DO
1052	DO i = 0, nx
1053
1054	DO j = nys, nyn
1055	DO k = 1, nz
1056	work_fftx(i,k,j) = f_in(k,j,i)
1057	ENDDO
1058	ENDDO
1059
1060	ENDDO
1061
1062	!$OMP DO
1063	DO j = nys, nyn
1064	DO k = 1, nz
1065
1066	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1067
1068	DO i = 0, nx
1069	work(j,k,i) = work_fftx(i,k,j)
1070	ENDDO
1071	ENDDO
1072
1073	ENDDO
1074	!$OMP END PARALLEL
1075
1076	ENDIF
1077	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1078
1079	!
1080	!-- Transpose array
1081	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1082	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1083	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1084	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1085	comm1dy, ierr )
1086	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1087
1088	END SUBROUTINE fftx_tr_xy
1089
1090
1091	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1092
1093	!------------------------------------------------------------------------------!
1094	! Transposition y --> x with a subsequent backward Fourier transformation for
1095	! a 1d-decomposition along x
1096	!------------------------------------------------------------------------------!
1097
1098	USE control_parameters
1099	USE cpulog
1100	USE indices
1101	USE interfaces
1102	USE pegrid
1103	USE transpose_indices
1104
1105	IMPLICIT NONE
1106
1107	INTEGER :: i, j, k
1108
1109	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1110	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_in
1111	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_out
1112	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1113
1114	!
1115	!-- Transpose array
1116	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1117	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1118	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1119	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1120	comm1dy, ierr )
1121	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1122
1123	!
1124	!-- Carry out the FFT along x, where all data are present due to the
1125	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1126	!-- the first index.
1127	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1128
1129	IF ( host(1:3) == 'nec' ) THEN
1130	!
1131	!-- Code optimized for vector processors
1132	!$OMP PARALLEL PRIVATE ( i, j, k )
1133	!$OMP DO
1134	DO j = nys, nyn
1135
1136	DO k = 1, nz
1137	DO i = 0, nx
1138	work_fftx(i,k,j) = work(j,k,i)
1139	ENDDO
1140	ENDDO
1141
1142	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1143
1144	ENDDO
1145
1146	!$OMP DO
1147	DO i = 0, nx
1148	DO j = nys, nyn
1149	DO k = 1, nz
1150	f_out(k,j,i) = work_fftx(i,k,j)
1151	ENDDO
1152	ENDDO
1153	ENDDO
1154	!$OMP END PARALLEL
1155
1156	ELSE
1157
1158	!
1159	!-- Cache optimized code (there might be still a potential for better
1160	!-- optimization).
1161	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1162	!$OMP DO
1163	DO j = nys, nyn
1164	DO k = 1, nz
1165
1166	DO i = 0, nx
1167	work_fftx(i,k,j) = work(j,k,i)
1168	ENDDO
1169
1170	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1171
1172	ENDDO
1173	ENDDO
1174
1175	!$OMP DO
1176	DO i = 0, nx
1177	DO j = nys, nyn
1178	DO k = 1, nz
1179	f_out(k,j,i) = work_fftx(i,k,j)
1180	ENDDO
1181	ENDDO
1182	ENDDO
1183	!$OMP END PARALLEL
1184
1185	ENDIF
1186	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1187
1188	END SUBROUTINE tr_yx_fftx
1189
1190
1191	SUBROUTINE ffty_tri_ffty( ar )
1192
1193	!------------------------------------------------------------------------------!
1194	! FFT along y, solution of the tridiagonal system and backward FFT for
1195	! a 1d-decomposition along y
1196	!
1197	! WARNING: this subroutine may still not work for hybrid parallelization
1198	! with OpenMP (for possible necessary changes see the original
1199	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1200	!------------------------------------------------------------------------------!
1201
1202	USE control_parameters
1203	USE cpulog
1204	USE grid_variables
1205	USE indices
1206	USE interfaces
1207	USE pegrid
1208	USE transpose_indices
1209
1210	IMPLICIT NONE
1211
1212	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1213
1214	REAL, DIMENSION(0:ny) :: work_ffty
1215	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1216	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: ar
1217	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1218
1219
1220	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1221
1222	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1223
1224	tn = 0 ! Default thread number in case of one thread
1225	!$OMP PARALLEL PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1226	!$OMP DO
1227	DO i = nxl_y, nxr_y
1228
1229	!$ tn = omp_get_thread_num()
1230
1231	IF ( host(1:3) == 'nec' ) THEN
1232	!
1233	!-- Code optimized for vector processors
1234	DO k = 1, nz
1235
1236	m = 0
1237	DO n = 1, pdims(2)
1238	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1239	work_triy(m,k) = ar(j,k,i,n)
1240	m = m + 1
1241	ENDDO
1242	ENDDO
1243
1244	ENDDO
1245
1246	CALL fft_y_m( work_triy, ny, 'forward' )
1247
1248	ELSE
1249	!
1250	!-- Cache optimized code
1251	DO k = 1, nz
1252
1253	m = 0
1254	DO n = 1, pdims(2)
1255	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1256	work_ffty(m) = ar(j,k,i,n)
1257	m = m + 1
1258	ENDDO
1259	ENDDO
1260
1261	CALL fft_y( work_ffty, 'forward' )
1262
1263	DO j = 0, ny
1264	work_triy(j,k) = work_ffty(j)
1265	ENDDO
1266
1267	ENDDO
1268
1269	ENDIF
1270
1271	!
1272	!-- Solve the linear equation system
1273	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1274
1275	IF ( host(1:3) == 'nec' ) THEN
1276	!
1277	!-- Code optimized for vector processors
1278	CALL fft_y_m( work_triy, ny, 'backward' )
1279
1280	DO k = 1, nz
1281
1282	m = 0
1283	DO n = 1, pdims(2)
1284	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1285	ar(j,k,i,n) = work_triy(m,k)
1286	m = m + 1
1287	ENDDO
1288	ENDDO
1289
1290	ENDDO
1291
1292	ELSE
1293	!
1294	!-- Cache optimized code
1295	DO k = 1, nz
1296
1297	DO j = 0, ny
1298	work_ffty(j) = work_triy(j,k)
1299	ENDDO
1300
1301	CALL fft_y( work_ffty, 'backward' )
1302
1303	m = 0
1304	DO n = 1, pdims(2)
1305	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1306	ar(j,k,i,n) = work_ffty(m)
1307	m = m + 1
1308	ENDDO
1309	ENDDO
1310
1311	ENDDO
1312
1313	ENDIF
1314
1315	ENDDO
1316	!$OMP END PARALLEL
1317
1318	DEALLOCATE( tri )
1319
1320	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1321
1322	END SUBROUTINE ffty_tri_ffty
1323
1324
1325	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1326
1327	!------------------------------------------------------------------------------!
1328	! Solves the linear system of equations for a 1d-decomposition along x (see
1329	! tridia)
1330	!
1331	! Attention: when using the intel compiler, array tri must be passed as an
1332	! argument to the contained subroutines. Otherwise addres faults
1333	! will occur.
1334	! On NEC, tri should not be passed (except for routine substi_1dd)
1335	! because this causes very bad performance.
1336	!------------------------------------------------------------------------------!
1337
1338	USE arrays_3d
1339	USE control_parameters
1340
1341	USE pegrid
1342
1343	IMPLICIT NONE
1344
1345	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1346
1347	REAL :: ddx2, ddy2
1348
1349	REAL, DIMENSION(0:nx,1:nz) :: ar
1350	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1351	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1352
1353
1354	nnyh = ( ny + 1 ) / 2
1355
1356	!
1357	!-- Define constant elements of the tridiagonal matrix.
1358	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1359	!-- the exchanged loops create bank conflicts. The following directive
1360	!-- prohibits loop exchange and the loops perform much better.
1361	! tn = omp_get_thread_num()
1362	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1363	! CALL local_flush( 120+tn )
1364	!CDIR NOLOOPCHG
1365	DO k = 0, nz-1
1366	DO i = 0,nx
1367	tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
1368	tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
1369	ENDDO
1370	ENDDO
1371	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1372	! CALL local_flush( 120+tn )
1373
1374	IF ( j <= nnyh ) THEN
1375	#if defined( __lc )
1376	CALL maketri_1dd( j, tri )
1377	#else
1378	CALL maketri_1dd( j )
1379	#endif
1380	ELSE
1381	#if defined( __lc )
1382	CALL maketri_1dd( ny+1-j, tri )
1383	#else
1384	CALL maketri_1dd( ny+1-j )
1385	#endif
1386	ENDIF
1387	#if defined( __lc )
1388	CALL split_1dd( tri )
1389	#else
1390	CALL split_1dd
1391	#endif
1392	CALL substi_1dd( ar, tri )
1393
1394	CONTAINS
1395
1396	#if defined( __lc )
1397	SUBROUTINE maketri_1dd( j, tri )
1398	#else
1399	SUBROUTINE maketri_1dd( j )
1400	#endif
1401
1402	!------------------------------------------------------------------------------!
1403	! computes the i- and j-dependent component of the matrix
1404	!------------------------------------------------------------------------------!
1405
1406	USE constants
1407
1408	IMPLICIT NONE
1409
1410	INTEGER :: i, j, k, nnxh
1411	REAL :: a, c
1412
1413	REAL, DIMENSION(0:nx) :: l
1414
1415	#if defined( __lc )
1416	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1417	#endif
1418
1419
1420	nnxh = ( nx + 1 ) / 2
1421	!
1422	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1423	!-- Fourier space. The coefficients are computed following the method of
1424	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1425	!-- Siano's original version by discretizing the Poisson equation,
1426	!-- before it is Fourier-transformed
1427	DO i = 0, nx
1428	IF ( i >= 0 .AND. i <= nnxh ) THEN
1429	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1430	FLOAT( nx+1 ) ) ) * ddx2 + &
1431	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1432	FLOAT( ny+1 ) ) ) * ddy2
1433	ELSE
1434	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1435	FLOAT( nx+1 ) ) ) * ddx2 + &
1436	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1437	FLOAT( ny+1 ) ) ) * ddy2
1438	ENDIF
1439	ENDDO
1440
1441	DO k = 0, nz-1
1442	DO i = 0, nx
1443	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
1444	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
1445	tri(1,i,k) = a + c - l(i)
1446	ENDDO
1447	ENDDO
1448	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1449	DO i = 0, nx
1450	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1451	ENDDO
1452	ENDIF
1453	IF ( ibc_p_t == 1 ) THEN
1454	DO i = 0, nx
1455	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1456	ENDDO
1457	ENDIF
1458
1459	END SUBROUTINE maketri_1dd
1460
1461
1462	#if defined( __lc )
1463	SUBROUTINE split_1dd( tri )
1464	#else
1465	SUBROUTINE split_1dd
1466	#endif
1467
1468	!------------------------------------------------------------------------------!
1469	! Splitting of the tridiagonal matrix (Thomas algorithm)
1470	!------------------------------------------------------------------------------!
1471
1472	IMPLICIT NONE
1473
1474	INTEGER :: i, k
1475
1476	#if defined( __lc )
1477	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1478	#endif
1479
1480
1481	!
1482	!-- Splitting
1483	DO i = 0, nx
1484	tri(4,i,0) = tri(1,i,0)
1485	ENDDO
1486	DO k = 1, nz-1
1487	DO i = 0, nx
1488	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1489	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1490	ENDDO
1491	ENDDO
1492
1493	END SUBROUTINE split_1dd
1494
1495
1496	SUBROUTINE substi_1dd( ar, tri )
1497
1498	!------------------------------------------------------------------------------!
1499	! Substitution (Forward and Backward) (Thomas algorithm)
1500	!------------------------------------------------------------------------------!
1501
1502	IMPLICIT NONE
1503
1504	INTEGER :: i, k
1505
1506	REAL, DIMENSION(0:nx,nz) :: ar
1507	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1508	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1509
1510	!
1511	!-- Forward substitution
1512	DO i = 0, nx
1513	ar1(i,0) = ar(i,1)
1514	ENDDO
1515	DO k = 1, nz-1
1516	DO i = 0, nx
1517	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1518	ENDDO
1519	ENDDO
1520
1521	!
1522	!-- Backward substitution
1523	DO i = 0, nx
1524	ar(i,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
1525	ENDDO
1526	DO k = nz-2, 0, -1
1527	DO i = 0, nx
1528	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1529	/ tri(4,i,k)
1530	ENDDO
1531	ENDDO
1532
1533	!
1534	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
1535	!-- The respective values of ar should be zero at all k-levels if
1536	!-- acceleration of horizontally averaged vertical velocity is zero.
1537	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
1538	IF ( j == 0 ) THEN
1539	DO k = 1, nz
1540	ar(0,k) = 0.0
1541	ENDDO
1542	ENDIF
1543	ENDIF
1544
1545	END SUBROUTINE substi_1dd
1546
1547	END SUBROUTINE tridia_1dd
1548
1549	#endif
1550
1551	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |