Home

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 85

Last change on this file since 85 was 85, checked in by raasch, 17 years ago
openmp bugfixes found by NEC benchmarker
Property svn:keywords set to `Id`
File size: 44.9 KB

Line
1	MODULE poisfft_mod
2
3	!------------------------------------------------------------------------------!
4	! Actual revisions:
5	! -----------------
6	! Bugfix: work_fft*_vec removed from some PRIVATE-declarations
7	!
8	! Former revisions:
9	! -----------------
10	! $Id: poisfft.f90 85 2007-05-11 09:35:14Z raasch $
11	!
12	! 76 2007-03-29 00:58:32Z raasch
13	! Tridiagonal coefficients adjusted for Neumann boundary conditions both at
14	! the bottom and the top.
15	!
16	! RCS Log replace by Id keyword, revision history cleaned up
17	!
18	! Revision 1.24 2006/08/04 15:00:24 raasch
19	! Default setting of the thread number tn in case of not using OpenMP
20	!
21	! Revision 1.23 2006/02/23 12:48:38 raasch
22	! Additional compiler directive in routine tridia_1dd for preventing loop
23	! exchange on NEC-SX6
24	!
25	! Revision 1.20 2004/04/30 12:38:09 raasch
26	! Parts of former poisfft_hybrid moved to this subroutine,
27	! former subroutine changed to a module, renaming of FFT-subroutines and
28	! -module, FFTs completely substituted by calls of fft_x and fft_y,
29	! NAG fft used in the non-parallel case completely removed, l in maketri
30	! is now a 1d-array, variables passed by modules instead of using parameter
31	! lists, enlarged transposition arrays introduced
32	!
33	! Revision 1.1 1997/07/24 11:24:14 raasch
34	! Initial revision
35	!
36	!
37	! Description:
38	! ------------
39	! See below.
40	!------------------------------------------------------------------------------!
41
42	!--------------------------------------------------------------------------!
43	! poisfft !
44	! !
45	! Original version: Stephan Siano (pois3d) !
46	! !
47	! Institute of Meteorology and Climatology, University of Hannover !
48	! Germany !
49	! !
50	! Version as of July 23,1996 !
51	! !
52	! !
53	! Version for parallel computers: Siegfried Raasch !
54	! !
55	! Version as of July 03,1997 !
56	! !
57	! Solves the Poisson equation with a 2D spectral method !
58	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
59	! !
60	! Input: !
61	! real ar contains in the (nnx,nny,nnz) elements, !
62	! starting from the element (1,nys,nxl), the !
63	! values for s !
64	! real work Temporary array !
65	! !
66	! Output: !
67	! real ar contains the solution for p !
68	!--------------------------------------------------------------------------!
69
70	USE fft_xy
71	USE indices
72	USE transpose_indices
73
74	IMPLICIT NONE
75
76	PRIVATE
77	PUBLIC poisfft, poisfft_init
78
79	INTERFACE poisfft
80	MODULE PROCEDURE poisfft
81	END INTERFACE poisfft
82
83	INTERFACE poisfft_init
84	MODULE PROCEDURE poisfft_init
85	END INTERFACE poisfft_init
86
87	CONTAINS
88
89	SUBROUTINE poisfft_init
90
91	CALL fft_init
92
93	END SUBROUTINE poisfft_init
94
95
96	SUBROUTINE poisfft( ar, work )
97
98	USE cpulog
99	USE interfaces
100	USE pegrid
101
102	IMPLICIT NONE
103
104	REAL, DIMENSION(1:nza,nys:nyna,nxl:nxra) :: ar, work
105
106
107	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
108
109	!
110	!-- Two-dimensional Fourier Transformation in x- and y-direction.
111	#if defined( __parallel )
112	IF ( pdims(2) == 1 ) THEN
113
114	!
115	!-- 1d-domain-decomposition along x:
116	!-- FFT along y and transposition y --> x
117	CALL ffty_tr_yx( ar, work, ar )
118
119	!
120	!-- FFT along x, solving the tridiagonal system and backward FFT
121	CALL fftx_tri_fftx( ar )
122
123	!
124	!-- Transposition x --> y and backward FFT along y
125	CALL tr_xy_ffty( ar, work, ar )
126
127	ELSEIF ( pdims(1) == 1 ) THEN
128
129	!
130	!-- 1d-domain-decomposition along y:
131	!-- FFT along x and transposition x --> y
132	CALL fftx_tr_xy( ar, work, ar )
133
134	!
135	!-- FFT along y, solving the tridiagonal system and backward FFT
136	CALL ffty_tri_ffty( ar )
137
138	!
139	!-- Transposition y --> x and backward FFT along x
140	CALL tr_yx_fftx( ar, work, ar )
141
142	ELSE
143
144	!
145	!-- 2d-domain-decomposition
146	!-- Transposition z --> x
147	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
148	CALL transpose_zx( ar, work, ar, work, ar )
149	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
150
151	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
152	CALL fftxp( ar, 'forward' )
153	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
154
155	!
156	!-- Transposition x --> y
157	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
158	CALL transpose_xy( ar, work, ar, work, ar )
159	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
160
161	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
162	CALL fftyp( ar, 'forward' )
163	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
164
165	!
166	!-- Transposition y --> z
167	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
168	CALL transpose_yz( ar, work, ar, work, ar )
169	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
170
171	!
172	!-- Solve the Poisson equation in z-direction in cartesian space.
173	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
174	CALL tridia( ar )
175	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
176
177	!
178	!-- Inverse Fourier Transformation
179	!-- Transposition z --> y
180	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
181	CALL transpose_zy( ar, work, ar, work, ar )
182	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
183
184	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
185	CALL fftyp( ar, 'backward' )
186	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
187
188	!
189	!-- Transposition y --> x
190	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
191	CALL transpose_yx( ar, work, ar, work, ar )
192	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
193
194	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
195	CALL fftxp( ar, 'backward' )
196	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
197
198	!
199	!-- Transposition x --> z
200	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
201	CALL transpose_xz( ar, work, ar, work, ar )
202	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
203
204	ENDIF
205
206	#else
207
208	!
209	!-- Two-dimensional Fourier Transformation along x- and y-direction.
210	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
211	CALL fftx( ar, 'forward' )
212	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
213	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
214	CALL ffty( ar, 'forward' )
215	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
216
217	!
218	!-- Solve the Poisson equation in z-direction in cartesian space.
219	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
220	CALL tridia( ar )
221	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
222
223	!
224	!-- Inverse Fourier Transformation.
225	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
226	CALL ffty( ar, 'backward' )
227	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
228	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
229	CALL fftx( ar, 'backward' )
230	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
231
232	#endif
233
234	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
235
236	END SUBROUTINE poisfft
237
238
239
240	SUBROUTINE tridia( ar )
241
242	!------------------------------------------------------------------------------!
243	! solves the linear system of equations:
244	!
245	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
246	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
247	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
248	!
249	! by using the Thomas algorithm
250	!------------------------------------------------------------------------------!
251
252	USE arrays_3d
253
254	IMPLICIT NONE
255
256	INTEGER :: i, j, k, nnyh
257
258	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
259	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
260
261	#if defined( __parallel )
262	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
263	#else
264	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
265	#endif
266
267
268	nnyh = (ny+1) / 2
269
270	!
271	!-- Define constant elements of the tridiagonal matrix.
272	DO k = 0, nz-1
273	DO i = nxl_z, nxr_z
274	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
275	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
276	ENDDO
277	ENDDO
278
279	#if defined( __parallel )
280	!
281	!-- Repeat for all y-levels.
282	DO j = nys_z, nyn_z
283	IF ( j <= nnyh ) THEN
284	CALL maketri( tri, j )
285	ELSE
286	CALL maketri( tri, ny+1-j )
287	ENDIF
288	CALL split( tri )
289	CALL substi( ar, ar1, tri, j )
290	ENDDO
291	#else
292	!
293	!-- First y-level.
294	CALL maketri( tri, nys_z )
295	CALL split( tri )
296	CALL substi( ar, ar1, tri, 0 )
297
298	!
299	!-- Further y-levels.
300	DO j = 1, nnyh - 1
301	CALL maketri( tri, j )
302	CALL split( tri )
303	CALL substi( ar, ar1, tri, j )
304	CALL substi( ar, ar1, tri, ny+1-j )
305	ENDDO
306	CALL maketri( tri, nnyh )
307	CALL split( tri )
308	CALL substi( ar, ar1, tri, nnyh+nys )
309	#endif
310
311	CONTAINS
312
313	SUBROUTINE maketri( tri, j )
314
315	!------------------------------------------------------------------------------!
316	! Computes the i- and j-dependent component of the matrix
317	!------------------------------------------------------------------------------!
318
319	USE arrays_3d
320	USE constants
321	USE control_parameters
322	USE grid_variables
323
324	IMPLICIT NONE
325
326	INTEGER :: i, j, k, nnxh
327	REAL :: a, c
328	REAL :: ll(nxl_z:nxr_z)
329	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
330
331
332	nnxh = ( nx + 1 ) / 2
333
334	!
335	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
336	!-- Fourier space. The coefficients are computed following the method of
337	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
338	!-- Siano's original version by discretizing the Poisson equation,
339	!-- before it is Fourier-transformed
340	#if defined( __parallel )
341	DO i = nxl_z, nxr_z
342	IF ( i >= 0 .AND. i < nnxh ) THEN
343	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
344	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
345	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
346	FLOAT( ny+1 ) ) ) / ( dy * dy )
347	ELSEIF ( i == nnxh ) THEN
348	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
349	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
350	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
351	FLOAT( ny+1 ) ) ) / ( dy * dy )
352	ELSE
353	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
354	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
355	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
356	FLOAT( ny+1 ) ) ) / ( dy * dy )
357	ENDIF
358	DO k = 0,nz-1
359	a = -1.0 * ddzu(k+2) * ddzw(k+1)
360	c = -1.0 * ddzu(k+1) * ddzw(k+1)
361	tri(1,i,k) = a + c - ll(i)
362	ENDDO
363	ENDDO
364	#else
365	DO i = 0, nnxh
366	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / FLOAT( nx+1 ) ) ) / &
367	( dx * dx ) + &
368	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / FLOAT( ny+1 ) ) ) / &
369	( dy * dy )
370	DO k = 0, nz-1
371	a = -1.0 * ddzu(k+2) * ddzw(k+1)
372	c = -1.0 * ddzu(k+1) * ddzw(k+1)
373	tri(1,i,k) = a + c - ll(i)
374	IF ( i >= 1 .and. i < nnxh ) THEN
375	tri(1,nx+1-i,k) = tri(1,i,k)
376	ENDIF
377	ENDDO
378	ENDDO
379	#endif
380	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
381	DO i = nxl_z, nxr_z
382	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
383	ENDDO
384	ENDIF
385	IF ( ibc_p_t == 1 ) THEN
386	DO i = nxl_z, nxr_z
387	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
388	ENDDO
389	ENDIF
390
391	END SUBROUTINE maketri
392
393
394	SUBROUTINE substi( ar, ar1, tri, j )
395
396	!------------------------------------------------------------------------------!
397	! Substitution (Forward and Backward) (Thomas algorithm)
398	!------------------------------------------------------------------------------!
399
400	USE control_parameters
401
402	IMPLICIT NONE
403
404	INTEGER :: i, j, k
405	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
406	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
407	#if defined( __parallel )
408	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
409	#else
410	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
411	#endif
412
413	!
414	!-- Forward substitution.
415	DO i = nxl_z, nxr_z
416	#if defined( __parallel )
417	ar1(i,0) = ar(i,j,1)
418	#else
419	ar1(i,0) = ar(1,j,i)
420	#endif
421	ENDDO
422	DO k = 1, nz - 1
423	DO i = nxl_z, nxr_z
424	#if defined( __parallel )
425	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
426	#else
427	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
428	#endif
429	ENDDO
430	ENDDO
431
432	!
433	!-- Backward substitution.
434	DO i = nxl_z, nxr_z
435	#if defined( __parallel )
436	ar(i,j,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
437	#else
438	ar(nz,j,i) = ar1(i,nz-1) / tri(4,i,nz-1)
439	#endif
440	ENDDO
441	DO k = nz-2, 0, -1
442	DO i = nxl_z, nxr_z
443	#if defined( __parallel )
444	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
445	/ tri(4,i,k)
446	#else
447	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
448	/ tri(4,i,k)
449	#endif
450	ENDDO
451	ENDDO
452
453	!
454	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
455	!-- The respective values of ar should be zero at all k-levels if
456	!-- acceleration of horizontally averaged vertical velocity is zero.
457	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
458	IF ( j == 0 .AND. nxl_z == 0 ) THEN
459	#if defined( __parallel )
460	DO k = 1, nz
461	ar(nxl_z,j,k) = 0.0
462	ENDDO
463	#else
464	DO k = 1, nz
465	ar(k,j,nxl_z) = 0.0
466	ENDDO
467	#endif
468	ENDIF
469	ENDIF
470
471	END SUBROUTINE substi
472
473
474	SUBROUTINE split( tri )
475
476	!------------------------------------------------------------------------------!
477	! Splitting of the tridiagonal matrix (Thomas algorithm)
478	!------------------------------------------------------------------------------!
479
480	IMPLICIT NONE
481
482	INTEGER :: i, k
483	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
484
485	!
486	!-- Splitting.
487	DO i = nxl_z, nxr_z
488	tri(4,i,0) = tri(1,i,0)
489	ENDDO
490	DO k = 1, nz-1
491	DO i = nxl_z, nxr_z
492	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
493	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
494	ENDDO
495	ENDDO
496
497	END SUBROUTINE split
498
499	END SUBROUTINE tridia
500
501
502	#if defined( __parallel )
503	SUBROUTINE fftxp( ar, direction )
504
505	!------------------------------------------------------------------------------!
506	! Fourier-transformation along x-direction Parallelized version
507	!------------------------------------------------------------------------------!
508
509	IMPLICIT NONE
510
511	CHARACTER (LEN=*) :: direction
512	INTEGER :: j, k
513	REAL :: ar(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa)
514
515	!
516	!-- Performing the fft with one of the methods implemented
517	DO k = nzb_x, nzt_x
518	DO j = nys_x, nyn_x
519	CALL fft_x( ar(0:nx,j,k), direction )
520	ENDDO
521	ENDDO
522
523	END SUBROUTINE fftxp
524
525	#else
526	SUBROUTINE fftx( ar, direction )
527
528	!------------------------------------------------------------------------------!
529	! Fourier-transformation along x-direction Non parallel version
530	!------------------------------------------------------------------------------!
531
532	IMPLICIT NONE
533
534	CHARACTER (LEN=*) :: direction
535	INTEGER :: i, j, k
536	REAL :: ar(1:nz,0:ny,0:nx)
537
538	!
539	!-- Performing the fft with one of the methods implemented
540	DO k = 1, nz
541	DO j = 0, ny
542	CALL fft_x( ar(k,j,0:nx), direction )
543	ENDDO
544	ENDDO
545
546	END SUBROUTINE fftx
547	#endif
548
549
550	#if defined( __parallel )
551	SUBROUTINE fftyp( ar, direction )
552
553	!------------------------------------------------------------------------------!
554	! Fourier-transformation along y-direction Parallelized version
555	!------------------------------------------------------------------------------!
556
557	IMPLICIT NONE
558
559	CHARACTER (LEN=*) :: direction
560	INTEGER :: i, k
561	REAL :: ar(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya)
562
563	!
564	!-- Performing the fft with one of the methods implemented
565	DO k = nzb_y, nzt_y
566	DO i = nxl_y, nxr_y
567	CALL fft_y( ar(0:ny,i,k), direction )
568	ENDDO
569	ENDDO
570
571	END SUBROUTINE fftyp
572
573	#else
574	SUBROUTINE ffty( ar, direction )
575
576	!------------------------------------------------------------------------------!
577	! Fourier-transformation along y-direction Non parallel version
578	!------------------------------------------------------------------------------!
579
580	IMPLICIT NONE
581
582	CHARACTER (LEN=*) :: direction
583	INTEGER :: i, k
584	REAL :: ar(1:nz,0:ny,0:nx)
585
586	!
587	!-- Performing the fft with one of the methods implemented
588	DO k = 1, nz
589	DO i = 0, nx
590	CALL fft_y( ar(k,0:ny,i), direction )
591	ENDDO
592	ENDDO
593
594	END SUBROUTINE ffty
595	#endif
596
597	#if defined( __parallel )
598	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
599
600	!------------------------------------------------------------------------------!
601	! Fourier-transformation along y with subsequent transposition y --> x for
602	! a 1d-decomposition along x
603	!
604	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
605	! if the first index of work_ffty_vec is odd. Otherwise
606	! memory bank conflicts may occur (especially if the index is a
607	! multiple of 128). That's why work_ffty_vec is dimensioned as
608	! 0:ny+1.
609	! Of course, this will not work if users are using an odd number
610	! of gridpoints along y.
611	!------------------------------------------------------------------------------!
612
613	USE control_parameters
614	USE cpulog
615	USE indices
616	USE interfaces
617	USE pegrid
618	USE transpose_indices
619
620	IMPLICIT NONE
621
622	INTEGER :: i, iend, iouter, ir, j, k
623	INTEGER, PARAMETER :: stridex = 4
624
625	REAL, DIMENSION(0:ny,stridex) :: work_ffty
626	#if defined( __nec )
627	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
628	#endif
629	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_in
630	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_out
631	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
632
633	!
634	!-- Carry out the FFT along y, where all data are present due to the
635	!-- 1d-decomposition along x. Resort the data in a way that x becomes
636	!-- the first index.
637	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
638
639	IF ( host(1:3) == 'nec' ) THEN
640	#if defined( __nec )
641	!
642	!-- Code optimized for vector processors
643	!$OMP PARALLEL PRIVATE ( i, j, k )
644	!$OMP DO
645	DO i = nxl, nxr
646
647	DO j = 0, ny
648	DO k = 1, nz
649	work_ffty_vec(j,k,i) = f_in(k,j,i)
650	ENDDO
651	ENDDO
652
653	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
654
655	ENDDO
656
657	!$OMP DO
658	DO k = 1, nz
659	DO j = 0, ny
660	DO i = nxl, nxr
661	work(i,k,j) = work_ffty_vec(j,k,i)
662	ENDDO
663	ENDDO
664	ENDDO
665	!$OMP END PARALLEL
666	#endif
667
668	ELSE
669
670	!
671	!-- Cache optimized code.
672	!-- The i-(x-)direction is split into a strided outer loop and an inner
673	!-- loop for better cache performance
674	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
675	!$OMP DO
676	DO iouter = nxl, nxr, stridex
677
678	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
679
680	DO k = 1, nz
681
682	DO i = iouter, iend
683
684	ir = i-iouter+1 ! counter within a stride
685	DO j = 0, ny
686	work_ffty(j,ir) = f_in(k,j,i)
687	ENDDO
688	!
689	!-- FFT along y
690	CALL fft_y( work_ffty(:,ir), 'forward' )
691
692	ENDDO
693
694	!
695	!-- Resort
696	DO j = 0, ny
697	DO i = iouter, iend
698	work(i,k,j) = work_ffty(j,i-iouter+1)
699	ENDDO
700	ENDDO
701
702	ENDDO
703
704	ENDDO
705	!$OMP END PARALLEL
706
707	ENDIF
708	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
709
710	!
711	!-- Transpose array
712	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
713	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
714	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
715	comm1dx, ierr )
716	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
717
718	END SUBROUTINE ffty_tr_yx
719
720
721	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
722
723	!------------------------------------------------------------------------------!
724	! Transposition x --> y with a subsequent backward Fourier transformation for
725	! a 1d-decomposition along x
726	!------------------------------------------------------------------------------!
727
728	USE control_parameters
729	USE cpulog
730	USE indices
731	USE interfaces
732	USE pegrid
733	USE transpose_indices
734
735	IMPLICIT NONE
736
737	INTEGER :: i, iend, iouter, ir, j, k
738	INTEGER, PARAMETER :: stridex = 4
739
740	REAL, DIMENSION(0:ny,stridex) :: work_ffty
741	#if defined( __nec )
742	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
743	#endif
744	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_in
745	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_out
746	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
747
748	!
749	!-- Transpose array
750	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
751	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
752	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
753	comm1dx, ierr )
754	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
755
756	!
757	!-- Resort the data in a way that y becomes the first index and carry out the
758	!-- backward fft along y.
759	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
760
761	IF ( host(1:3) == 'nec' ) THEN
762	#if defined( __nec )
763	!
764	!-- Code optimized for vector processors
765	!$OMP PARALLEL PRIVATE ( i, j, k )
766	!$OMP DO
767	DO k = 1, nz
768	DO j = 0, ny
769	DO i = nxl, nxr
770	work_ffty_vec(j,k,i) = work(i,k,j)
771	ENDDO
772	ENDDO
773	ENDDO
774
775	!$OMP DO
776	DO i = nxl, nxr
777
778	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
779
780	DO j = 0, ny
781	DO k = 1, nz
782	f_out(k,j,i) = work_ffty_vec(j,k,i)
783	ENDDO
784	ENDDO
785
786	ENDDO
787	!$OMP END PARALLEL
788	#endif
789
790	ELSE
791
792	!
793	!-- Cache optimized code.
794	!-- The i-(x-)direction is split into a strided outer loop and an inner
795	!-- loop for better cache performance
796	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
797	!$OMP DO
798	DO iouter = nxl, nxr, stridex
799
800	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
801
802	DO k = 1, nz
803	!
804	!-- Resort
805	DO j = 0, ny
806	DO i = iouter, iend
807	work_ffty(j,i-iouter+1) = work(i,k,j)
808	ENDDO
809	ENDDO
810
811	DO i = iouter, iend
812
813	!
814	!-- FFT along y
815	ir = i-iouter+1 ! counter within a stride
816	CALL fft_y( work_ffty(:,ir), 'backward' )
817
818	DO j = 0, ny
819	f_out(k,j,i) = work_ffty(j,ir)
820	ENDDO
821	ENDDO
822
823	ENDDO
824
825	ENDDO
826	!$OMP END PARALLEL
827
828	ENDIF
829
830	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
831
832	END SUBROUTINE tr_xy_ffty
833
834
835	SUBROUTINE fftx_tri_fftx( ar )
836
837	!------------------------------------------------------------------------------!
838	! FFT along x, solution of the tridiagonal system and backward FFT for
839	! a 1d-decomposition along x
840	!
841	! WARNING: this subroutine may still not work for hybrid parallelization
842	! with OpenMP (for possible necessary changes see the original
843	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
844	!------------------------------------------------------------------------------!
845
846	USE control_parameters
847	USE cpulog
848	USE grid_variables
849	USE indices
850	USE interfaces
851	USE pegrid
852	USE transpose_indices
853
854	IMPLICIT NONE
855
856	character(len=3) :: myth_char
857
858	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
859
860	REAL, DIMENSION(0:nx) :: work_fftx
861	REAL, DIMENSION(0:nx,1:nz) :: work_trix
862	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: ar
863	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
864
865
866	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
867
868	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
869
870	tn = 0 ! Default thread number in case of one thread
871	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
872	DO j = nys_x, nyn_x
873
874	!$ tn = omp_get_thread_num()
875
876	IF ( host(1:3) == 'nec' ) THEN
877	!
878	!-- Code optimized for vector processors
879	DO k = 1, nz
880
881	m = 0
882	DO n = 1, pdims(1)
883	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
884	work_trix(m,k) = ar(i,k,j,n)
885	m = m + 1
886	ENDDO
887	ENDDO
888
889	ENDDO
890
891	CALL fft_x_m( work_trix, 'forward' )
892
893	ELSE
894	!
895	!-- Cache optimized code
896	DO k = 1, nz
897
898	m = 0
899	DO n = 1, pdims(1)
900	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
901	work_fftx(m) = ar(i,k,j,n)
902	m = m + 1
903	ENDDO
904	ENDDO
905
906	CALL fft_x( work_fftx, 'forward' )
907
908	DO i = 0, nx
909	work_trix(i,k) = work_fftx(i)
910	ENDDO
911
912	ENDDO
913
914	ENDIF
915
916	!
917	!-- Solve the linear equation system
918	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
919
920	IF ( host(1:3) == 'nec' ) THEN
921	!
922	!-- Code optimized for vector processors
923	CALL fft_x_m( work_trix, 'backward' )
924
925	DO k = 1, nz
926
927	m = 0
928	DO n = 1, pdims(1)
929	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
930	ar(i,k,j,n) = work_trix(m,k)
931	m = m + 1
932	ENDDO
933	ENDDO
934
935	ENDDO
936
937	ELSE
938	!
939	!-- Cache optimized code
940	DO k = 1, nz
941
942	DO i = 0, nx
943	work_fftx(i) = work_trix(i,k)
944	ENDDO
945
946	CALL fft_x( work_fftx, 'backward' )
947
948	m = 0
949	DO n = 1, pdims(1)
950	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
951	ar(i,k,j,n) = work_fftx(m)
952	m = m + 1
953	ENDDO
954	ENDDO
955
956	ENDDO
957
958	ENDIF
959
960	ENDDO
961
962	DEALLOCATE( tri )
963
964	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
965
966	END SUBROUTINE fftx_tri_fftx
967
968
969	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
970
971	!------------------------------------------------------------------------------!
972	! Fourier-transformation along x with subsequent transposition x --> y for
973	! a 1d-decomposition along y
974	!
975	! ATTENTION: The NEC-branch of this routine may significantly profit from
976	! further optimizations. So far, performance is much worse than
977	! for routine ffty_tr_yx (more than three times slower).
978	!------------------------------------------------------------------------------!
979
980	USE control_parameters
981	USE cpulog
982	USE indices
983	USE interfaces
984	USE pegrid
985	USE transpose_indices
986
987	IMPLICIT NONE
988
989	INTEGER :: i, j, k
990
991	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
992	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_in
993	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_out
994	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
995
996	!
997	!-- Carry out the FFT along x, where all data are present due to the
998	!-- 1d-decomposition along y. Resort the data in a way that y becomes
999	!-- the first index.
1000	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
1001
1002	IF ( host(1:3) == 'nec' ) THEN
1003	!
1004	!-- Code for vector processors
1005	!$OMP PARALLEL PRIVATE ( i, j, k )
1006	!$OMP DO
1007	DO i = 0, nx
1008
1009	DO j = nys, nyn
1010	DO k = 1, nz
1011	work_fftx(i,k,j) = f_in(k,j,i)
1012	ENDDO
1013	ENDDO
1014
1015	ENDDO
1016
1017	!$OMP DO
1018	DO j = nys, nyn
1019
1020	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1021
1022	DO k = 1, nz
1023	DO i = 0, nx
1024	work(j,k,i) = work_fftx(i,k,j)
1025	ENDDO
1026	ENDDO
1027
1028	ENDDO
1029	!$OMP END PARALLEL
1030
1031	ELSE
1032
1033	!
1034	!-- Cache optimized code (there might be still a potential for better
1035	!-- optimization).
1036	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1037	!$OMP DO
1038	DO i = 0, nx
1039
1040	DO j = nys, nyn
1041	DO k = 1, nz
1042	work_fftx(i,k,j) = f_in(k,j,i)
1043	ENDDO
1044	ENDDO
1045
1046	ENDDO
1047
1048	!$OMP DO
1049	DO j = nys, nyn
1050	DO k = 1, nz
1051
1052	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1053
1054	DO i = 0, nx
1055	work(j,k,i) = work_fftx(i,k,j)
1056	ENDDO
1057	ENDDO
1058
1059	ENDDO
1060	!$OMP END PARALLEL
1061
1062	ENDIF
1063	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1064
1065	!
1066	!-- Transpose array
1067	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1068	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1069	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1070	comm1dy, ierr )
1071	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1072
1073	END SUBROUTINE fftx_tr_xy
1074
1075
1076	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1077
1078	!------------------------------------------------------------------------------!
1079	! Transposition y --> x with a subsequent backward Fourier transformation for
1080	! a 1d-decomposition along x
1081	!------------------------------------------------------------------------------!
1082
1083	USE control_parameters
1084	USE cpulog
1085	USE indices
1086	USE interfaces
1087	USE pegrid
1088	USE transpose_indices
1089
1090	IMPLICIT NONE
1091
1092	INTEGER :: i, j, k
1093
1094	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1095	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_in
1096	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_out
1097	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1098
1099	!
1100	!-- Transpose array
1101	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1102	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1103	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1104	comm1dy, ierr )
1105	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1106
1107	!
1108	!-- Carry out the FFT along x, where all data are present due to the
1109	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1110	!-- the first index.
1111	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1112
1113	IF ( host(1:3) == 'nec' ) THEN
1114	!
1115	!-- Code optimized for vector processors
1116	!$OMP PARALLEL PRIVATE ( i, j, k )
1117	!$OMP DO
1118	DO j = nys, nyn
1119
1120	DO k = 1, nz
1121	DO i = 0, nx
1122	work_fftx(i,k,j) = work(j,k,i)
1123	ENDDO
1124	ENDDO
1125
1126	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1127
1128	ENDDO
1129
1130	!$OMP DO
1131	DO i = 0, nx
1132	DO j = nys, nyn
1133	DO k = 1, nz
1134	f_out(k,j,i) = work_fftx(i,k,j)
1135	ENDDO
1136	ENDDO
1137	ENDDO
1138	!$OMP END PARALLEL
1139
1140	ELSE
1141
1142	!
1143	!-- Cache optimized code (there might be still a potential for better
1144	!-- optimization).
1145	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1146	!$OMP DO
1147	DO j = nys, nyn
1148	DO k = 1, nz
1149
1150	DO i = 0, nx
1151	work_fftx(i,k,j) = work(j,k,i)
1152	ENDDO
1153
1154	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1155
1156	ENDDO
1157	ENDDO
1158
1159	!$OMP DO
1160	DO i = 0, nx
1161	DO j = nys, nyn
1162	DO k = 1, nz
1163	f_out(k,j,i) = work_fftx(i,k,j)
1164	ENDDO
1165	ENDDO
1166	ENDDO
1167	!$OMP END PARALLEL
1168
1169	ENDIF
1170	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1171
1172	END SUBROUTINE tr_yx_fftx
1173
1174
1175	SUBROUTINE ffty_tri_ffty( ar )
1176
1177	!------------------------------------------------------------------------------!
1178	! FFT along y, solution of the tridiagonal system and backward FFT for
1179	! a 1d-decomposition along y
1180	!
1181	! WARNING: this subroutine may still not work for hybrid parallelization
1182	! with OpenMP (for possible necessary changes see the original
1183	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1184	!------------------------------------------------------------------------------!
1185
1186	USE control_parameters
1187	USE cpulog
1188	USE grid_variables
1189	USE indices
1190	USE interfaces
1191	USE pegrid
1192	USE transpose_indices
1193
1194	IMPLICIT NONE
1195
1196	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1197
1198	REAL, DIMENSION(0:ny) :: work_ffty
1199	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1200	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: ar
1201	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1202
1203
1204	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1205
1206	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1207
1208	tn = 0 ! Default thread number in case of one thread
1209	!$OMP PARALLEL PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1210	!$OMP DO
1211	DO i = nxl_y, nxr_y
1212
1213	!$ tn = omp_get_thread_num()
1214
1215	IF ( host(1:3) == 'nec' ) THEN
1216	!
1217	!-- Code optimized for vector processors
1218	DO k = 1, nz
1219
1220	m = 0
1221	DO n = 1, pdims(2)
1222	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1223	work_triy(m,k) = ar(j,k,i,n)
1224	m = m + 1
1225	ENDDO
1226	ENDDO
1227
1228	ENDDO
1229
1230	CALL fft_y_m( work_triy, ny, 'forward' )
1231
1232	ELSE
1233	!
1234	!-- Cache optimized code
1235	DO k = 1, nz
1236
1237	m = 0
1238	DO n = 1, pdims(2)
1239	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1240	work_ffty(m) = ar(j,k,i,n)
1241	m = m + 1
1242	ENDDO
1243	ENDDO
1244
1245	CALL fft_y( work_ffty, 'forward' )
1246
1247	DO j = 0, ny
1248	work_triy(j,k) = work_ffty(j)
1249	ENDDO
1250
1251	ENDDO
1252
1253	ENDIF
1254
1255	!
1256	!-- Solve the linear equation system
1257	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1258
1259	IF ( host(1:3) == 'nec' ) THEN
1260	!
1261	!-- Code optimized for vector processors
1262	CALL fft_y_m( work_triy, ny, 'backward' )
1263
1264	DO k = 1, nz
1265
1266	m = 0
1267	DO n = 1, pdims(2)
1268	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1269	ar(j,k,i,n) = work_triy(m,k)
1270	m = m + 1
1271	ENDDO
1272	ENDDO
1273
1274	ENDDO
1275
1276	ELSE
1277	!
1278	!-- Cache optimized code
1279	DO k = 1, nz
1280
1281	DO j = 0, ny
1282	work_ffty(j) = work_triy(j,k)
1283	ENDDO
1284
1285	CALL fft_y( work_ffty, 'backward' )
1286
1287	m = 0
1288	DO n = 1, pdims(2)
1289	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1290	ar(j,k,i,n) = work_ffty(m)
1291	m = m + 1
1292	ENDDO
1293	ENDDO
1294
1295	ENDDO
1296
1297	ENDIF
1298
1299	ENDDO
1300	!$OMP END PARALLEL
1301
1302	DEALLOCATE( tri )
1303
1304	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1305
1306	END SUBROUTINE ffty_tri_ffty
1307
1308
1309	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1310
1311	!------------------------------------------------------------------------------!
1312	! Solves the linear system of equations for a 1d-decomposition along x (see
1313	! tridia)
1314	!
1315	! Attention: when using the intel compiler, array tri must be passed as an
1316	! argument to the contained subroutines. Otherwise addres faults
1317	! will occur.
1318	! On NEC, tri should not be passed (except for routine substi_1dd)
1319	! because this causes very bad performance.
1320	!------------------------------------------------------------------------------!
1321
1322	USE arrays_3d
1323	USE control_parameters
1324
1325	USE pegrid
1326
1327	IMPLICIT NONE
1328
1329	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1330
1331	REAL :: ddx2, ddy2
1332
1333	REAL, DIMENSION(0:nx,1:nz) :: ar
1334	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1335	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1336
1337
1338	nnyh = ( ny + 1 ) / 2
1339
1340	!
1341	!-- Define constant elements of the tridiagonal matrix.
1342	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1343	!-- the exchanged loops create bank conflicts. The following directive
1344	!-- prohibits loop exchange and the loops perform much better.
1345	! tn = omp_get_thread_num()
1346	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1347	! CALL local_flush( 120+tn )
1348	!CDIR NOLOOPCHG
1349	DO k = 0, nz-1
1350	DO i = 0,nx
1351	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
1352	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
1353	ENDDO
1354	ENDDO
1355	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1356	! CALL local_flush( 120+tn )
1357
1358	IF ( j <= nnyh ) THEN
1359	#if defined( __lcmuk )
1360	CALL maketri_1dd( j, tri )
1361	#else
1362	CALL maketri_1dd( j )
1363	#endif
1364	ELSE
1365	#if defined( __lcmuk )
1366	CALL maketri_1dd( ny+1-j, tri )
1367	#else
1368	CALL maketri_1dd( ny+1-j )
1369	#endif
1370	ENDIF
1371	#if defined( __lcmuk )
1372	CALL split_1dd( tri )
1373	#else
1374	CALL split_1dd
1375	#endif
1376	CALL substi_1dd( ar, tri )
1377
1378	CONTAINS
1379
1380	#if defined( __lcmuk )
1381	SUBROUTINE maketri_1dd( j, tri )
1382	#else
1383	SUBROUTINE maketri_1dd( j )
1384	#endif
1385
1386	!------------------------------------------------------------------------------!
1387	! computes the i- and j-dependent component of the matrix
1388	!------------------------------------------------------------------------------!
1389
1390	USE constants
1391
1392	IMPLICIT NONE
1393
1394	INTEGER :: i, j, k, nnxh
1395	REAL :: a, c
1396
1397	REAL, DIMENSION(0:nx) :: l
1398
1399	#if defined( __lcmuk )
1400	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1401	#endif
1402
1403
1404	nnxh = ( nx + 1 ) / 2
1405	!
1406	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1407	!-- Fourier space. The coefficients are computed following the method of
1408	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1409	!-- Siano's original version by discretizing the Poisson equation,
1410	!-- before it is Fourier-transformed
1411	DO i = 0, nx
1412	IF ( i >= 0 .AND. i < nnxh ) THEN
1413	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1414	FLOAT( nx+1 ) ) ) * ddx2 + &
1415	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1416	FLOAT( ny+1 ) ) ) * ddy2
1417	ELSEIF ( i == nnxh ) THEN
1418	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1419	FLOAT( nx+1 ) ) ) * ddx2 + &
1420	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1421	FLOAT( ny+1 ) ) ) * ddy2
1422	ELSE
1423	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1424	FLOAT( nx+1 ) ) ) * ddx2 + &
1425	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1426	FLOAT( ny+1 ) ) ) * ddy2
1427	ENDIF
1428	ENDDO
1429
1430	DO k = 0, nz-1
1431	DO i = 0, nx
1432	a = -1.0 * ddzu(k+2) * ddzw(k+1)
1433	c = -1.0 * ddzu(k+1) * ddzw(k+1)
1434	tri(1,i,k) = a + c - l(i)
1435	ENDDO
1436	ENDDO
1437	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1438	DO i = 0, nx
1439	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1440	ENDDO
1441	ENDIF
1442	IF ( ibc_p_t == 1 ) THEN
1443	DO i = 0, nx
1444	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1445	ENDDO
1446	ENDIF
1447
1448	END SUBROUTINE maketri_1dd
1449
1450
1451	#if defined( __lcmuk )
1452	SUBROUTINE split_1dd( tri )
1453	#else
1454	SUBROUTINE split_1dd
1455	#endif
1456
1457	!------------------------------------------------------------------------------!
1458	! Splitting of the tridiagonal matrix (Thomas algorithm)
1459	!------------------------------------------------------------------------------!
1460
1461	IMPLICIT NONE
1462
1463	INTEGER :: i, k
1464
1465	#if defined( __lcmuk )
1466	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1467	#endif
1468
1469
1470	!
1471	!-- Splitting
1472	DO i = 0, nx
1473	tri(4,i,0) = tri(1,i,0)
1474	ENDDO
1475	DO k = 1, nz-1
1476	DO i = 0, nx
1477	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1478	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1479	ENDDO
1480	ENDDO
1481
1482	END SUBROUTINE split_1dd
1483
1484
1485	SUBROUTINE substi_1dd( ar, tri )
1486
1487	!------------------------------------------------------------------------------!
1488	! Substitution (Forward and Backward) (Thomas algorithm)
1489	!------------------------------------------------------------------------------!
1490
1491	IMPLICIT NONE
1492
1493	INTEGER :: i, k
1494
1495	REAL, DIMENSION(0:nx,nz) :: ar
1496	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1497	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1498
1499	!
1500	!-- Forward substitution
1501	DO i = 0, nx
1502	ar1(i,0) = ar(i,1)
1503	ENDDO
1504	DO k = 1, nz-1
1505	DO i = 0, nx
1506	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1507	ENDDO
1508	ENDDO
1509
1510	!
1511	!-- Backward substitution
1512	DO i = 0, nx
1513	ar(i,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
1514	ENDDO
1515	DO k = nz-2, 0, -1
1516	DO i = 0, nx
1517	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1518	/ tri(4,i,k)
1519	ENDDO
1520	ENDDO
1521
1522	!
1523	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
1524	!-- The respective values of ar should be zero at all k-levels if
1525	!-- acceleration of horizontally averaged vertical velocity is zero.
1526	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
1527	IF ( j == 0 ) THEN
1528	DO k = 1, nz
1529	ar(0,k) = 0.0
1530	ENDDO
1531	ENDIF
1532	ENDIF
1533
1534	END SUBROUTINE substi_1dd
1535
1536	END SUBROUTINE tridia_1dd
1537
1538	#endif
1539
1540	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |