Home

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 128

Last change on this file since 128 was 128, checked in by raasch, 17 years ago
bugfix in poisfft
Property svn:keywords set to `Id`
File size: 44.4 KB

Line
1	MODULE poisfft_mod
2
3	!------------------------------------------------------------------------------!
4	! Actual revisions:
5	! -----------------
6	! Bugfix: wavenumber calculation for even nx in routines maketri
7	!
8	! Former revisions:
9	! -----------------
10	! $Id: poisfft.f90 128 2007-10-26 13:11:14Z raasch $
11	!
12	! 85 2007-05-11 09:35:14Z raasch
13	! Bugfix: work_fft*_vec removed from some PRIVATE-declarations
14	!
15	! 76 2007-03-29 00:58:32Z raasch
16	! Tridiagonal coefficients adjusted for Neumann boundary conditions both at
17	! the bottom and the top.
18	!
19	! RCS Log replace by Id keyword, revision history cleaned up
20	!
21	! Revision 1.24 2006/08/04 15:00:24 raasch
22	! Default setting of the thread number tn in case of not using OpenMP
23	!
24	! Revision 1.23 2006/02/23 12:48:38 raasch
25	! Additional compiler directive in routine tridia_1dd for preventing loop
26	! exchange on NEC-SX6
27	!
28	! Revision 1.20 2004/04/30 12:38:09 raasch
29	! Parts of former poisfft_hybrid moved to this subroutine,
30	! former subroutine changed to a module, renaming of FFT-subroutines and
31	! -module, FFTs completely substituted by calls of fft_x and fft_y,
32	! NAG fft used in the non-parallel case completely removed, l in maketri
33	! is now a 1d-array, variables passed by modules instead of using parameter
34	! lists, enlarged transposition arrays introduced
35	!
36	! Revision 1.1 1997/07/24 11:24:14 raasch
37	! Initial revision
38	!
39	!
40	! Description:
41	! ------------
42	! See below.
43	!------------------------------------------------------------------------------!
44
45	!--------------------------------------------------------------------------!
46	! poisfft !
47	! !
48	! Original version: Stephan Siano (pois3d) !
49	! !
50	! Institute of Meteorology and Climatology, University of Hannover !
51	! Germany !
52	! !
53	! Version as of July 23,1996 !
54	! !
55	! !
56	! Version for parallel computers: Siegfried Raasch !
57	! !
58	! Version as of July 03,1997 !
59	! !
60	! Solves the Poisson equation with a 2D spectral method !
61	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
62	! !
63	! Input: !
64	! real ar contains in the (nnx,nny,nnz) elements, !
65	! starting from the element (1,nys,nxl), the !
66	! values for s !
67	! real work Temporary array !
68	! !
69	! Output: !
70	! real ar contains the solution for p !
71	!--------------------------------------------------------------------------!
72
73	USE fft_xy
74	USE indices
75	USE transpose_indices
76
77	IMPLICIT NONE
78
79	PRIVATE
80	PUBLIC poisfft, poisfft_init
81
82	INTERFACE poisfft
83	MODULE PROCEDURE poisfft
84	END INTERFACE poisfft
85
86	INTERFACE poisfft_init
87	MODULE PROCEDURE poisfft_init
88	END INTERFACE poisfft_init
89
90	CONTAINS
91
92	SUBROUTINE poisfft_init
93
94	CALL fft_init
95
96	END SUBROUTINE poisfft_init
97
98
99	SUBROUTINE poisfft( ar, work )
100
101	USE cpulog
102	USE interfaces
103	USE pegrid
104
105	IMPLICIT NONE
106
107	REAL, DIMENSION(1:nza,nys:nyna,nxl:nxra) :: ar, work
108
109
110	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
111
112	!
113	!-- Two-dimensional Fourier Transformation in x- and y-direction.
114	#if defined( __parallel )
115	IF ( pdims(2) == 1 ) THEN
116
117	!
118	!-- 1d-domain-decomposition along x:
119	!-- FFT along y and transposition y --> x
120	CALL ffty_tr_yx( ar, work, ar )
121
122	!
123	!-- FFT along x, solving the tridiagonal system and backward FFT
124	CALL fftx_tri_fftx( ar )
125
126	!
127	!-- Transposition x --> y and backward FFT along y
128	CALL tr_xy_ffty( ar, work, ar )
129
130	ELSEIF ( pdims(1) == 1 ) THEN
131
132	!
133	!-- 1d-domain-decomposition along y:
134	!-- FFT along x and transposition x --> y
135	CALL fftx_tr_xy( ar, work, ar )
136
137	!
138	!-- FFT along y, solving the tridiagonal system and backward FFT
139	CALL ffty_tri_ffty( ar )
140
141	!
142	!-- Transposition y --> x and backward FFT along x
143	CALL tr_yx_fftx( ar, work, ar )
144
145	ELSE
146
147	!
148	!-- 2d-domain-decomposition
149	!-- Transposition z --> x
150	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
151	CALL transpose_zx( ar, work, ar, work, ar )
152	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
153
154	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
155	CALL fftxp( ar, 'forward' )
156	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
157
158	!
159	!-- Transposition x --> y
160	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
161	CALL transpose_xy( ar, work, ar, work, ar )
162	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
163
164	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
165	CALL fftyp( ar, 'forward' )
166	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
167
168	!
169	!-- Transposition y --> z
170	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
171	CALL transpose_yz( ar, work, ar, work, ar )
172	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
173
174	!
175	!-- Solve the Poisson equation in z-direction in cartesian space.
176	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
177	CALL tridia( ar )
178	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
179
180	!
181	!-- Inverse Fourier Transformation
182	!-- Transposition z --> y
183	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
184	CALL transpose_zy( ar, work, ar, work, ar )
185	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
186
187	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
188	CALL fftyp( ar, 'backward' )
189	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
190
191	!
192	!-- Transposition y --> x
193	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
194	CALL transpose_yx( ar, work, ar, work, ar )
195	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
196
197	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
198	CALL fftxp( ar, 'backward' )
199	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
200
201	!
202	!-- Transposition x --> z
203	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
204	CALL transpose_xz( ar, work, ar, work, ar )
205	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
206
207	ENDIF
208
209	#else
210
211	!
212	!-- Two-dimensional Fourier Transformation along x- and y-direction.
213	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
214	CALL fftx( ar, 'forward' )
215	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
216	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
217	CALL ffty( ar, 'forward' )
218	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
219
220	!
221	!-- Solve the Poisson equation in z-direction in cartesian space.
222	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
223	CALL tridia( ar )
224	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
225
226	!
227	!-- Inverse Fourier Transformation.
228	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
229	CALL ffty( ar, 'backward' )
230	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
231	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
232	CALL fftx( ar, 'backward' )
233	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
234
235	#endif
236
237	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
238
239	END SUBROUTINE poisfft
240
241
242
243	SUBROUTINE tridia( ar )
244
245	!------------------------------------------------------------------------------!
246	! solves the linear system of equations:
247	!
248	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
249	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
250	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
251	!
252	! by using the Thomas algorithm
253	!------------------------------------------------------------------------------!
254
255	USE arrays_3d
256
257	IMPLICIT NONE
258
259	INTEGER :: i, j, k, nnyh
260
261	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
262	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
263
264	#if defined( __parallel )
265	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
266	#else
267	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
268	#endif
269
270
271	nnyh = (ny+1) / 2
272
273	!
274	!-- Define constant elements of the tridiagonal matrix.
275	DO k = 0, nz-1
276	DO i = nxl_z, nxr_z
277	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
278	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
279	ENDDO
280	ENDDO
281
282	#if defined( __parallel )
283	!
284	!-- Repeat for all y-levels.
285	DO j = nys_z, nyn_z
286	IF ( j <= nnyh ) THEN
287	CALL maketri( tri, j )
288	ELSE
289	CALL maketri( tri, ny+1-j )
290	ENDIF
291	CALL split( tri )
292	CALL substi( ar, ar1, tri, j )
293	ENDDO
294	#else
295	!
296	!-- First y-level.
297	CALL maketri( tri, nys_z )
298	CALL split( tri )
299	CALL substi( ar, ar1, tri, 0 )
300
301	!
302	!-- Further y-levels.
303	DO j = 1, nnyh - 1
304	CALL maketri( tri, j )
305	CALL split( tri )
306	CALL substi( ar, ar1, tri, j )
307	CALL substi( ar, ar1, tri, ny+1-j )
308	ENDDO
309	CALL maketri( tri, nnyh )
310	CALL split( tri )
311	CALL substi( ar, ar1, tri, nnyh+nys )
312	#endif
313
314	CONTAINS
315
316	SUBROUTINE maketri( tri, j )
317
318	!------------------------------------------------------------------------------!
319	! Computes the i- and j-dependent component of the matrix
320	!------------------------------------------------------------------------------!
321
322	USE arrays_3d
323	USE constants
324	USE control_parameters
325	USE grid_variables
326
327	IMPLICIT NONE
328
329	INTEGER :: i, j, k, nnxh
330	REAL :: a, c
331	REAL :: ll(nxl_z:nxr_z)
332	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
333
334
335	nnxh = ( nx + 1 ) / 2
336
337	!
338	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
339	!-- Fourier space. The coefficients are computed following the method of
340	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
341	!-- Siano's original version by discretizing the Poisson equation,
342	!-- before it is Fourier-transformed
343	#if defined( __parallel )
344	DO i = nxl_z, nxr_z
345	IF ( i >= 0 .AND. i <= nnxh ) THEN
346	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
347	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
348	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
349	FLOAT( ny+1 ) ) ) / ( dy * dy )
350	ELSE
351	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
352	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
353	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
354	FLOAT( ny+1 ) ) ) / ( dy * dy )
355	ENDIF
356	DO k = 0,nz-1
357	a = -1.0 * ddzu(k+2) * ddzw(k+1)
358	c = -1.0 * ddzu(k+1) * ddzw(k+1)
359	tri(1,i,k) = a + c - ll(i)
360	ENDDO
361	ENDDO
362	#else
363	DO i = 0, nnxh
364	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / FLOAT( nx+1 ) ) ) / &
365	( dx * dx ) + &
366	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / FLOAT( ny+1 ) ) ) / &
367	( dy * dy )
368	DO k = 0, nz-1
369	a = -1.0 * ddzu(k+2) * ddzw(k+1)
370	c = -1.0 * ddzu(k+1) * ddzw(k+1)
371	tri(1,i,k) = a + c - ll(i)
372	IF ( i >= 1 .and. i < nnxh ) THEN
373	tri(1,nx+1-i,k) = tri(1,i,k)
374	ENDIF
375	ENDDO
376	ENDDO
377	#endif
378	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
379	DO i = nxl_z, nxr_z
380	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
381	ENDDO
382	ENDIF
383	IF ( ibc_p_t == 1 ) THEN
384	DO i = nxl_z, nxr_z
385	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
386	ENDDO
387	ENDIF
388
389	END SUBROUTINE maketri
390
391
392	SUBROUTINE substi( ar, ar1, tri, j )
393
394	!------------------------------------------------------------------------------!
395	! Substitution (Forward and Backward) (Thomas algorithm)
396	!------------------------------------------------------------------------------!
397
398	USE control_parameters
399
400	IMPLICIT NONE
401
402	INTEGER :: i, j, k
403	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
404	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
405	#if defined( __parallel )
406	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
407	#else
408	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
409	#endif
410
411	!
412	!-- Forward substitution.
413	DO i = nxl_z, nxr_z
414	#if defined( __parallel )
415	ar1(i,0) = ar(i,j,1)
416	#else
417	ar1(i,0) = ar(1,j,i)
418	#endif
419	ENDDO
420	DO k = 1, nz - 1
421	DO i = nxl_z, nxr_z
422	#if defined( __parallel )
423	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
424	#else
425	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
426	#endif
427	ENDDO
428	ENDDO
429
430	!
431	!-- Backward substitution.
432	DO i = nxl_z, nxr_z
433	#if defined( __parallel )
434	ar(i,j,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
435	#else
436	ar(nz,j,i) = ar1(i,nz-1) / tri(4,i,nz-1)
437	#endif
438	ENDDO
439	DO k = nz-2, 0, -1
440	DO i = nxl_z, nxr_z
441	#if defined( __parallel )
442	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
443	/ tri(4,i,k)
444	#else
445	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
446	/ tri(4,i,k)
447	#endif
448	ENDDO
449	ENDDO
450
451	!
452	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
453	!-- The respective values of ar should be zero at all k-levels if
454	!-- acceleration of horizontally averaged vertical velocity is zero.
455	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
456	IF ( j == 0 .AND. nxl_z == 0 ) THEN
457	#if defined( __parallel )
458	DO k = 1, nz
459	ar(nxl_z,j,k) = 0.0
460	ENDDO
461	#else
462	DO k = 1, nz
463	ar(k,j,nxl_z) = 0.0
464	ENDDO
465	#endif
466	ENDIF
467	ENDIF
468
469	END SUBROUTINE substi
470
471
472	SUBROUTINE split( tri )
473
474	!------------------------------------------------------------------------------!
475	! Splitting of the tridiagonal matrix (Thomas algorithm)
476	!------------------------------------------------------------------------------!
477
478	IMPLICIT NONE
479
480	INTEGER :: i, k
481	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
482
483	!
484	!-- Splitting.
485	DO i = nxl_z, nxr_z
486	tri(4,i,0) = tri(1,i,0)
487	ENDDO
488	DO k = 1, nz-1
489	DO i = nxl_z, nxr_z
490	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
491	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
492	ENDDO
493	ENDDO
494
495	END SUBROUTINE split
496
497	END SUBROUTINE tridia
498
499
500	#if defined( __parallel )
501	SUBROUTINE fftxp( ar, direction )
502
503	!------------------------------------------------------------------------------!
504	! Fourier-transformation along x-direction Parallelized version
505	!------------------------------------------------------------------------------!
506
507	IMPLICIT NONE
508
509	CHARACTER (LEN=*) :: direction
510	INTEGER :: j, k
511	REAL :: ar(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa)
512
513	!
514	!-- Performing the fft with one of the methods implemented
515	DO k = nzb_x, nzt_x
516	DO j = nys_x, nyn_x
517	CALL fft_x( ar(0:nx,j,k), direction )
518	ENDDO
519	ENDDO
520
521	END SUBROUTINE fftxp
522
523	#else
524	SUBROUTINE fftx( ar, direction )
525
526	!------------------------------------------------------------------------------!
527	! Fourier-transformation along x-direction Non parallel version
528	!------------------------------------------------------------------------------!
529
530	IMPLICIT NONE
531
532	CHARACTER (LEN=*) :: direction
533	INTEGER :: i, j, k
534	REAL :: ar(1:nz,0:ny,0:nx)
535
536	!
537	!-- Performing the fft with one of the methods implemented
538	DO k = 1, nz
539	DO j = 0, ny
540	CALL fft_x( ar(k,j,0:nx), direction )
541	ENDDO
542	ENDDO
543
544	END SUBROUTINE fftx
545	#endif
546
547
548	#if defined( __parallel )
549	SUBROUTINE fftyp( ar, direction )
550
551	!------------------------------------------------------------------------------!
552	! Fourier-transformation along y-direction Parallelized version
553	!------------------------------------------------------------------------------!
554
555	IMPLICIT NONE
556
557	CHARACTER (LEN=*) :: direction
558	INTEGER :: i, k
559	REAL :: ar(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya)
560
561	!
562	!-- Performing the fft with one of the methods implemented
563	DO k = nzb_y, nzt_y
564	DO i = nxl_y, nxr_y
565	CALL fft_y( ar(0:ny,i,k), direction )
566	ENDDO
567	ENDDO
568
569	END SUBROUTINE fftyp
570
571	#else
572	SUBROUTINE ffty( ar, direction )
573
574	!------------------------------------------------------------------------------!
575	! Fourier-transformation along y-direction Non parallel version
576	!------------------------------------------------------------------------------!
577
578	IMPLICIT NONE
579
580	CHARACTER (LEN=*) :: direction
581	INTEGER :: i, k
582	REAL :: ar(1:nz,0:ny,0:nx)
583
584	!
585	!-- Performing the fft with one of the methods implemented
586	DO k = 1, nz
587	DO i = 0, nx
588	CALL fft_y( ar(k,0:ny,i), direction )
589	ENDDO
590	ENDDO
591
592	END SUBROUTINE ffty
593	#endif
594
595	#if defined( __parallel )
596	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
597
598	!------------------------------------------------------------------------------!
599	! Fourier-transformation along y with subsequent transposition y --> x for
600	! a 1d-decomposition along x
601	!
602	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
603	! if the first index of work_ffty_vec is odd. Otherwise
604	! memory bank conflicts may occur (especially if the index is a
605	! multiple of 128). That's why work_ffty_vec is dimensioned as
606	! 0:ny+1.
607	! Of course, this will not work if users are using an odd number
608	! of gridpoints along y.
609	!------------------------------------------------------------------------------!
610
611	USE control_parameters
612	USE cpulog
613	USE indices
614	USE interfaces
615	USE pegrid
616	USE transpose_indices
617
618	IMPLICIT NONE
619
620	INTEGER :: i, iend, iouter, ir, j, k
621	INTEGER, PARAMETER :: stridex = 4
622
623	REAL, DIMENSION(0:ny,stridex) :: work_ffty
624	#if defined( __nec )
625	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
626	#endif
627	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_in
628	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_out
629	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
630
631	!
632	!-- Carry out the FFT along y, where all data are present due to the
633	!-- 1d-decomposition along x. Resort the data in a way that x becomes
634	!-- the first index.
635	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
636
637	IF ( host(1:3) == 'nec' ) THEN
638	#if defined( __nec )
639	!
640	!-- Code optimized for vector processors
641	!$OMP PARALLEL PRIVATE ( i, j, k )
642	!$OMP DO
643	DO i = nxl, nxr
644
645	DO j = 0, ny
646	DO k = 1, nz
647	work_ffty_vec(j,k,i) = f_in(k,j,i)
648	ENDDO
649	ENDDO
650
651	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
652
653	ENDDO
654
655	!$OMP DO
656	DO k = 1, nz
657	DO j = 0, ny
658	DO i = nxl, nxr
659	work(i,k,j) = work_ffty_vec(j,k,i)
660	ENDDO
661	ENDDO
662	ENDDO
663	!$OMP END PARALLEL
664	#endif
665
666	ELSE
667
668	!
669	!-- Cache optimized code.
670	!-- The i-(x-)direction is split into a strided outer loop and an inner
671	!-- loop for better cache performance
672	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
673	!$OMP DO
674	DO iouter = nxl, nxr, stridex
675
676	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
677
678	DO k = 1, nz
679
680	DO i = iouter, iend
681
682	ir = i-iouter+1 ! counter within a stride
683	DO j = 0, ny
684	work_ffty(j,ir) = f_in(k,j,i)
685	ENDDO
686	!
687	!-- FFT along y
688	CALL fft_y( work_ffty(:,ir), 'forward' )
689
690	ENDDO
691
692	!
693	!-- Resort
694	DO j = 0, ny
695	DO i = iouter, iend
696	work(i,k,j) = work_ffty(j,i-iouter+1)
697	ENDDO
698	ENDDO
699
700	ENDDO
701
702	ENDDO
703	!$OMP END PARALLEL
704
705	ENDIF
706	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
707
708	!
709	!-- Transpose array
710	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
711	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
712	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
713	comm1dx, ierr )
714	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
715
716	END SUBROUTINE ffty_tr_yx
717
718
719	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
720
721	!------------------------------------------------------------------------------!
722	! Transposition x --> y with a subsequent backward Fourier transformation for
723	! a 1d-decomposition along x
724	!------------------------------------------------------------------------------!
725
726	USE control_parameters
727	USE cpulog
728	USE indices
729	USE interfaces
730	USE pegrid
731	USE transpose_indices
732
733	IMPLICIT NONE
734
735	INTEGER :: i, iend, iouter, ir, j, k
736	INTEGER, PARAMETER :: stridex = 4
737
738	REAL, DIMENSION(0:ny,stridex) :: work_ffty
739	#if defined( __nec )
740	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
741	#endif
742	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_in
743	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_out
744	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
745
746	!
747	!-- Transpose array
748	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
749	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
750	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
751	comm1dx, ierr )
752	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
753
754	!
755	!-- Resort the data in a way that y becomes the first index and carry out the
756	!-- backward fft along y.
757	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
758
759	IF ( host(1:3) == 'nec' ) THEN
760	#if defined( __nec )
761	!
762	!-- Code optimized for vector processors
763	!$OMP PARALLEL PRIVATE ( i, j, k )
764	!$OMP DO
765	DO k = 1, nz
766	DO j = 0, ny
767	DO i = nxl, nxr
768	work_ffty_vec(j,k,i) = work(i,k,j)
769	ENDDO
770	ENDDO
771	ENDDO
772
773	!$OMP DO
774	DO i = nxl, nxr
775
776	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
777
778	DO j = 0, ny
779	DO k = 1, nz
780	f_out(k,j,i) = work_ffty_vec(j,k,i)
781	ENDDO
782	ENDDO
783
784	ENDDO
785	!$OMP END PARALLEL
786	#endif
787
788	ELSE
789
790	!
791	!-- Cache optimized code.
792	!-- The i-(x-)direction is split into a strided outer loop and an inner
793	!-- loop for better cache performance
794	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
795	!$OMP DO
796	DO iouter = nxl, nxr, stridex
797
798	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
799
800	DO k = 1, nz
801	!
802	!-- Resort
803	DO j = 0, ny
804	DO i = iouter, iend
805	work_ffty(j,i-iouter+1) = work(i,k,j)
806	ENDDO
807	ENDDO
808
809	DO i = iouter, iend
810
811	!
812	!-- FFT along y
813	ir = i-iouter+1 ! counter within a stride
814	CALL fft_y( work_ffty(:,ir), 'backward' )
815
816	DO j = 0, ny
817	f_out(k,j,i) = work_ffty(j,ir)
818	ENDDO
819	ENDDO
820
821	ENDDO
822
823	ENDDO
824	!$OMP END PARALLEL
825
826	ENDIF
827
828	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
829
830	END SUBROUTINE tr_xy_ffty
831
832
833	SUBROUTINE fftx_tri_fftx( ar )
834
835	!------------------------------------------------------------------------------!
836	! FFT along x, solution of the tridiagonal system and backward FFT for
837	! a 1d-decomposition along x
838	!
839	! WARNING: this subroutine may still not work for hybrid parallelization
840	! with OpenMP (for possible necessary changes see the original
841	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
842	!------------------------------------------------------------------------------!
843
844	USE control_parameters
845	USE cpulog
846	USE grid_variables
847	USE indices
848	USE interfaces
849	USE pegrid
850	USE transpose_indices
851
852	IMPLICIT NONE
853
854	character(len=3) :: myth_char
855
856	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
857
858	REAL, DIMENSION(0:nx) :: work_fftx
859	REAL, DIMENSION(0:nx,1:nz) :: work_trix
860	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: ar
861	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
862
863
864	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
865
866	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
867
868	tn = 0 ! Default thread number in case of one thread
869	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
870	DO j = nys_x, nyn_x
871
872	!$ tn = omp_get_thread_num()
873
874	IF ( host(1:3) == 'nec' ) THEN
875	!
876	!-- Code optimized for vector processors
877	DO k = 1, nz
878
879	m = 0
880	DO n = 1, pdims(1)
881	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
882	work_trix(m,k) = ar(i,k,j,n)
883	m = m + 1
884	ENDDO
885	ENDDO
886
887	ENDDO
888
889	CALL fft_x_m( work_trix, 'forward' )
890
891	ELSE
892	!
893	!-- Cache optimized code
894	DO k = 1, nz
895
896	m = 0
897	DO n = 1, pdims(1)
898	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
899	work_fftx(m) = ar(i,k,j,n)
900	m = m + 1
901	ENDDO
902	ENDDO
903
904	CALL fft_x( work_fftx, 'forward' )
905
906	DO i = 0, nx
907	work_trix(i,k) = work_fftx(i)
908	ENDDO
909
910	ENDDO
911
912	ENDIF
913
914	!
915	!-- Solve the linear equation system
916	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
917
918	IF ( host(1:3) == 'nec' ) THEN
919	!
920	!-- Code optimized for vector processors
921	CALL fft_x_m( work_trix, 'backward' )
922
923	DO k = 1, nz
924
925	m = 0
926	DO n = 1, pdims(1)
927	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
928	ar(i,k,j,n) = work_trix(m,k)
929	m = m + 1
930	ENDDO
931	ENDDO
932
933	ENDDO
934
935	ELSE
936	!
937	!-- Cache optimized code
938	DO k = 1, nz
939
940	DO i = 0, nx
941	work_fftx(i) = work_trix(i,k)
942	ENDDO
943
944	CALL fft_x( work_fftx, 'backward' )
945
946	m = 0
947	DO n = 1, pdims(1)
948	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
949	ar(i,k,j,n) = work_fftx(m)
950	m = m + 1
951	ENDDO
952	ENDDO
953
954	ENDDO
955
956	ENDIF
957
958	ENDDO
959
960	DEALLOCATE( tri )
961
962	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
963
964	END SUBROUTINE fftx_tri_fftx
965
966
967	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
968
969	!------------------------------------------------------------------------------!
970	! Fourier-transformation along x with subsequent transposition x --> y for
971	! a 1d-decomposition along y
972	!
973	! ATTENTION: The NEC-branch of this routine may significantly profit from
974	! further optimizations. So far, performance is much worse than
975	! for routine ffty_tr_yx (more than three times slower).
976	!------------------------------------------------------------------------------!
977
978	USE control_parameters
979	USE cpulog
980	USE indices
981	USE interfaces
982	USE pegrid
983	USE transpose_indices
984
985	IMPLICIT NONE
986
987	INTEGER :: i, j, k
988
989	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
990	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_in
991	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_out
992	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
993
994	!
995	!-- Carry out the FFT along x, where all data are present due to the
996	!-- 1d-decomposition along y. Resort the data in a way that y becomes
997	!-- the first index.
998	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
999
1000	IF ( host(1:3) == 'nec' ) THEN
1001	!
1002	!-- Code for vector processors
1003	!$OMP PARALLEL PRIVATE ( i, j, k )
1004	!$OMP DO
1005	DO i = 0, nx
1006
1007	DO j = nys, nyn
1008	DO k = 1, nz
1009	work_fftx(i,k,j) = f_in(k,j,i)
1010	ENDDO
1011	ENDDO
1012
1013	ENDDO
1014
1015	!$OMP DO
1016	DO j = nys, nyn
1017
1018	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1019
1020	DO k = 1, nz
1021	DO i = 0, nx
1022	work(j,k,i) = work_fftx(i,k,j)
1023	ENDDO
1024	ENDDO
1025
1026	ENDDO
1027	!$OMP END PARALLEL
1028
1029	ELSE
1030
1031	!
1032	!-- Cache optimized code (there might be still a potential for better
1033	!-- optimization).
1034	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1035	!$OMP DO
1036	DO i = 0, nx
1037
1038	DO j = nys, nyn
1039	DO k = 1, nz
1040	work_fftx(i,k,j) = f_in(k,j,i)
1041	ENDDO
1042	ENDDO
1043
1044	ENDDO
1045
1046	!$OMP DO
1047	DO j = nys, nyn
1048	DO k = 1, nz
1049
1050	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1051
1052	DO i = 0, nx
1053	work(j,k,i) = work_fftx(i,k,j)
1054	ENDDO
1055	ENDDO
1056
1057	ENDDO
1058	!$OMP END PARALLEL
1059
1060	ENDIF
1061	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1062
1063	!
1064	!-- Transpose array
1065	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1066	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1067	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1068	comm1dy, ierr )
1069	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1070
1071	END SUBROUTINE fftx_tr_xy
1072
1073
1074	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1075
1076	!------------------------------------------------------------------------------!
1077	! Transposition y --> x with a subsequent backward Fourier transformation for
1078	! a 1d-decomposition along x
1079	!------------------------------------------------------------------------------!
1080
1081	USE control_parameters
1082	USE cpulog
1083	USE indices
1084	USE interfaces
1085	USE pegrid
1086	USE transpose_indices
1087
1088	IMPLICIT NONE
1089
1090	INTEGER :: i, j, k
1091
1092	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1093	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_in
1094	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_out
1095	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1096
1097	!
1098	!-- Transpose array
1099	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1100	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1101	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1102	comm1dy, ierr )
1103	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1104
1105	!
1106	!-- Carry out the FFT along x, where all data are present due to the
1107	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1108	!-- the first index.
1109	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1110
1111	IF ( host(1:3) == 'nec' ) THEN
1112	!
1113	!-- Code optimized for vector processors
1114	!$OMP PARALLEL PRIVATE ( i, j, k )
1115	!$OMP DO
1116	DO j = nys, nyn
1117
1118	DO k = 1, nz
1119	DO i = 0, nx
1120	work_fftx(i,k,j) = work(j,k,i)
1121	ENDDO
1122	ENDDO
1123
1124	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1125
1126	ENDDO
1127
1128	!$OMP DO
1129	DO i = 0, nx
1130	DO j = nys, nyn
1131	DO k = 1, nz
1132	f_out(k,j,i) = work_fftx(i,k,j)
1133	ENDDO
1134	ENDDO
1135	ENDDO
1136	!$OMP END PARALLEL
1137
1138	ELSE
1139
1140	!
1141	!-- Cache optimized code (there might be still a potential for better
1142	!-- optimization).
1143	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1144	!$OMP DO
1145	DO j = nys, nyn
1146	DO k = 1, nz
1147
1148	DO i = 0, nx
1149	work_fftx(i,k,j) = work(j,k,i)
1150	ENDDO
1151
1152	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1153
1154	ENDDO
1155	ENDDO
1156
1157	!$OMP DO
1158	DO i = 0, nx
1159	DO j = nys, nyn
1160	DO k = 1, nz
1161	f_out(k,j,i) = work_fftx(i,k,j)
1162	ENDDO
1163	ENDDO
1164	ENDDO
1165	!$OMP END PARALLEL
1166
1167	ENDIF
1168	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1169
1170	END SUBROUTINE tr_yx_fftx
1171
1172
1173	SUBROUTINE ffty_tri_ffty( ar )
1174
1175	!------------------------------------------------------------------------------!
1176	! FFT along y, solution of the tridiagonal system and backward FFT for
1177	! a 1d-decomposition along y
1178	!
1179	! WARNING: this subroutine may still not work for hybrid parallelization
1180	! with OpenMP (for possible necessary changes see the original
1181	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1182	!------------------------------------------------------------------------------!
1183
1184	USE control_parameters
1185	USE cpulog
1186	USE grid_variables
1187	USE indices
1188	USE interfaces
1189	USE pegrid
1190	USE transpose_indices
1191
1192	IMPLICIT NONE
1193
1194	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1195
1196	REAL, DIMENSION(0:ny) :: work_ffty
1197	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1198	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: ar
1199	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1200
1201
1202	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1203
1204	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1205
1206	tn = 0 ! Default thread number in case of one thread
1207	!$OMP PARALLEL PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1208	!$OMP DO
1209	DO i = nxl_y, nxr_y
1210
1211	!$ tn = omp_get_thread_num()
1212
1213	IF ( host(1:3) == 'nec' ) THEN
1214	!
1215	!-- Code optimized for vector processors
1216	DO k = 1, nz
1217
1218	m = 0
1219	DO n = 1, pdims(2)
1220	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1221	work_triy(m,k) = ar(j,k,i,n)
1222	m = m + 1
1223	ENDDO
1224	ENDDO
1225
1226	ENDDO
1227
1228	CALL fft_y_m( work_triy, ny, 'forward' )
1229
1230	ELSE
1231	!
1232	!-- Cache optimized code
1233	DO k = 1, nz
1234
1235	m = 0
1236	DO n = 1, pdims(2)
1237	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1238	work_ffty(m) = ar(j,k,i,n)
1239	m = m + 1
1240	ENDDO
1241	ENDDO
1242
1243	CALL fft_y( work_ffty, 'forward' )
1244
1245	DO j = 0, ny
1246	work_triy(j,k) = work_ffty(j)
1247	ENDDO
1248
1249	ENDDO
1250
1251	ENDIF
1252
1253	!
1254	!-- Solve the linear equation system
1255	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1256
1257	IF ( host(1:3) == 'nec' ) THEN
1258	!
1259	!-- Code optimized for vector processors
1260	CALL fft_y_m( work_triy, ny, 'backward' )
1261
1262	DO k = 1, nz
1263
1264	m = 0
1265	DO n = 1, pdims(2)
1266	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1267	ar(j,k,i,n) = work_triy(m,k)
1268	m = m + 1
1269	ENDDO
1270	ENDDO
1271
1272	ENDDO
1273
1274	ELSE
1275	!
1276	!-- Cache optimized code
1277	DO k = 1, nz
1278
1279	DO j = 0, ny
1280	work_ffty(j) = work_triy(j,k)
1281	ENDDO
1282
1283	CALL fft_y( work_ffty, 'backward' )
1284
1285	m = 0
1286	DO n = 1, pdims(2)
1287	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1288	ar(j,k,i,n) = work_ffty(m)
1289	m = m + 1
1290	ENDDO
1291	ENDDO
1292
1293	ENDDO
1294
1295	ENDIF
1296
1297	ENDDO
1298	!$OMP END PARALLEL
1299
1300	DEALLOCATE( tri )
1301
1302	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1303
1304	END SUBROUTINE ffty_tri_ffty
1305
1306
1307	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1308
1309	!------------------------------------------------------------------------------!
1310	! Solves the linear system of equations for a 1d-decomposition along x (see
1311	! tridia)
1312	!
1313	! Attention: when using the intel compiler, array tri must be passed as an
1314	! argument to the contained subroutines. Otherwise addres faults
1315	! will occur.
1316	! On NEC, tri should not be passed (except for routine substi_1dd)
1317	! because this causes very bad performance.
1318	!------------------------------------------------------------------------------!
1319
1320	USE arrays_3d
1321	USE control_parameters
1322
1323	USE pegrid
1324
1325	IMPLICIT NONE
1326
1327	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1328
1329	REAL :: ddx2, ddy2
1330
1331	REAL, DIMENSION(0:nx,1:nz) :: ar
1332	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1333	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1334
1335
1336	nnyh = ( ny + 1 ) / 2
1337
1338	!
1339	!-- Define constant elements of the tridiagonal matrix.
1340	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1341	!-- the exchanged loops create bank conflicts. The following directive
1342	!-- prohibits loop exchange and the loops perform much better.
1343	! tn = omp_get_thread_num()
1344	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1345	! CALL local_flush( 120+tn )
1346	!CDIR NOLOOPCHG
1347	DO k = 0, nz-1
1348	DO i = 0,nx
1349	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
1350	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
1351	ENDDO
1352	ENDDO
1353	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1354	! CALL local_flush( 120+tn )
1355
1356	IF ( j <= nnyh ) THEN
1357	#if defined( __lcmuk )
1358	CALL maketri_1dd( j, tri )
1359	#else
1360	CALL maketri_1dd( j )
1361	#endif
1362	ELSE
1363	#if defined( __lcmuk )
1364	CALL maketri_1dd( ny+1-j, tri )
1365	#else
1366	CALL maketri_1dd( ny+1-j )
1367	#endif
1368	ENDIF
1369	#if defined( __lcmuk )
1370	CALL split_1dd( tri )
1371	#else
1372	CALL split_1dd
1373	#endif
1374	CALL substi_1dd( ar, tri )
1375
1376	CONTAINS
1377
1378	#if defined( __lcmuk )
1379	SUBROUTINE maketri_1dd( j, tri )
1380	#else
1381	SUBROUTINE maketri_1dd( j )
1382	#endif
1383
1384	!------------------------------------------------------------------------------!
1385	! computes the i- and j-dependent component of the matrix
1386	!------------------------------------------------------------------------------!
1387
1388	USE constants
1389
1390	IMPLICIT NONE
1391
1392	INTEGER :: i, j, k, nnxh
1393	REAL :: a, c
1394
1395	REAL, DIMENSION(0:nx) :: l
1396
1397	#if defined( __lcmuk )
1398	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1399	#endif
1400
1401
1402	nnxh = ( nx + 1 ) / 2
1403	!
1404	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1405	!-- Fourier space. The coefficients are computed following the method of
1406	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1407	!-- Siano's original version by discretizing the Poisson equation,
1408	!-- before it is Fourier-transformed
1409	DO i = 0, nx
1410	IF ( i >= 0 .AND. i <= nnxh ) THEN
1411	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1412	FLOAT( nx+1 ) ) ) * ddx2 + &
1413	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1414	FLOAT( ny+1 ) ) ) * ddy2
1415	ELSE
1416	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1417	FLOAT( nx+1 ) ) ) * ddx2 + &
1418	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1419	FLOAT( ny+1 ) ) ) * ddy2
1420	ENDIF
1421	ENDDO
1422
1423	DO k = 0, nz-1
1424	DO i = 0, nx
1425	a = -1.0 * ddzu(k+2) * ddzw(k+1)
1426	c = -1.0 * ddzu(k+1) * ddzw(k+1)
1427	tri(1,i,k) = a + c - l(i)
1428	ENDDO
1429	ENDDO
1430	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1431	DO i = 0, nx
1432	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1433	ENDDO
1434	ENDIF
1435	IF ( ibc_p_t == 1 ) THEN
1436	DO i = 0, nx
1437	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1438	ENDDO
1439	ENDIF
1440
1441	END SUBROUTINE maketri_1dd
1442
1443
1444	#if defined( __lcmuk )
1445	SUBROUTINE split_1dd( tri )
1446	#else
1447	SUBROUTINE split_1dd
1448	#endif
1449
1450	!------------------------------------------------------------------------------!
1451	! Splitting of the tridiagonal matrix (Thomas algorithm)
1452	!------------------------------------------------------------------------------!
1453
1454	IMPLICIT NONE
1455
1456	INTEGER :: i, k
1457
1458	#if defined( __lcmuk )
1459	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1460	#endif
1461
1462
1463	!
1464	!-- Splitting
1465	DO i = 0, nx
1466	tri(4,i,0) = tri(1,i,0)
1467	ENDDO
1468	DO k = 1, nz-1
1469	DO i = 0, nx
1470	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1471	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1472	ENDDO
1473	ENDDO
1474
1475	END SUBROUTINE split_1dd
1476
1477
1478	SUBROUTINE substi_1dd( ar, tri )
1479
1480	!------------------------------------------------------------------------------!
1481	! Substitution (Forward and Backward) (Thomas algorithm)
1482	!------------------------------------------------------------------------------!
1483
1484	IMPLICIT NONE
1485
1486	INTEGER :: i, k
1487
1488	REAL, DIMENSION(0:nx,nz) :: ar
1489	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1490	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1491
1492	!
1493	!-- Forward substitution
1494	DO i = 0, nx
1495	ar1(i,0) = ar(i,1)
1496	ENDDO
1497	DO k = 1, nz-1
1498	DO i = 0, nx
1499	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1500	ENDDO
1501	ENDDO
1502
1503	!
1504	!-- Backward substitution
1505	DO i = 0, nx
1506	ar(i,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
1507	ENDDO
1508	DO k = nz-2, 0, -1
1509	DO i = 0, nx
1510	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1511	/ tri(4,i,k)
1512	ENDDO
1513	ENDDO
1514
1515	!
1516	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
1517	!-- The respective values of ar should be zero at all k-levels if
1518	!-- acceleration of horizontally averaged vertical velocity is zero.
1519	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
1520	IF ( j == 0 ) THEN
1521	DO k = 1, nz
1522	ar(0,k) = 0.0
1523	ENDDO
1524	ENDIF
1525	ENDIF
1526
1527	END SUBROUTINE substi_1dd
1528
1529	END SUBROUTINE tridia_1dd
1530
1531	#endif
1532
1533	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |