Home

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 377

Last change on this file since 377 was 377, checked in by raasch, 15 years ago
adjustments for openmp usage on sgi-ice
Property svn:keywords set to `Id`
File size: 44.5 KB

Line
1	MODULE poisfft_mod
2
3	!------------------------------------------------------------------------------!
4	! Actual revisions:
5	! -----------------
6	! __lcmuk changed to __lc to avoid problems with Intel compiler on sgi-ice
7	!
8	! Former revisions:
9	! -----------------
10	! $Id: poisfft.f90 377 2009-09-04 11:09:00Z raasch $
11	!
12	! 164 2008-05-15 08:46:15Z raasch
13	! Arguments removed from transpose routines
14	!
15	! 128 2007-10-26 13:11:14Z raasch
16	! Bugfix: wavenumber calculation for even nx in routines maketri
17	!
18	! 85 2007-05-11 09:35:14Z raasch
19	! Bugfix: work_fft*_vec removed from some PRIVATE-declarations
20	!
21	! 76 2007-03-29 00:58:32Z raasch
22	! Tridiagonal coefficients adjusted for Neumann boundary conditions both at
23	! the bottom and the top.
24	!
25	! RCS Log replace by Id keyword, revision history cleaned up
26	!
27	! Revision 1.24 2006/08/04 15:00:24 raasch
28	! Default setting of the thread number tn in case of not using OpenMP
29	!
30	! Revision 1.23 2006/02/23 12:48:38 raasch
31	! Additional compiler directive in routine tridia_1dd for preventing loop
32	! exchange on NEC-SX6
33	!
34	! Revision 1.20 2004/04/30 12:38:09 raasch
35	! Parts of former poisfft_hybrid moved to this subroutine,
36	! former subroutine changed to a module, renaming of FFT-subroutines and
37	! -module, FFTs completely substituted by calls of fft_x and fft_y,
38	! NAG fft used in the non-parallel case completely removed, l in maketri
39	! is now a 1d-array, variables passed by modules instead of using parameter
40	! lists, enlarged transposition arrays introduced
41	!
42	! Revision 1.1 1997/07/24 11:24:14 raasch
43	! Initial revision
44	!
45	!
46	! Description:
47	! ------------
48	! See below.
49	!------------------------------------------------------------------------------!
50
51	!--------------------------------------------------------------------------!
52	! poisfft !
53	! !
54	! Original version: Stephan Siano (pois3d) !
55	! !
56	! Institute of Meteorology and Climatology, University of Hannover !
57	! Germany !
58	! !
59	! Version as of July 23,1996 !
60	! !
61	! !
62	! Version for parallel computers: Siegfried Raasch !
63	! !
64	! Version as of July 03,1997 !
65	! !
66	! Solves the Poisson equation with a 2D spectral method !
67	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
68	! !
69	! Input: !
70	! real ar contains in the (nnx,nny,nnz) elements, !
71	! starting from the element (1,nys,nxl), the !
72	! values for s !
73	! real work Temporary array !
74	! !
75	! Output: !
76	! real ar contains the solution for p !
77	!--------------------------------------------------------------------------!
78
79	USE fft_xy
80	USE indices
81	USE transpose_indices
82
83	IMPLICIT NONE
84
85	PRIVATE
86	PUBLIC poisfft, poisfft_init
87
88	INTERFACE poisfft
89	MODULE PROCEDURE poisfft
90	END INTERFACE poisfft
91
92	INTERFACE poisfft_init
93	MODULE PROCEDURE poisfft_init
94	END INTERFACE poisfft_init
95
96	CONTAINS
97
98	SUBROUTINE poisfft_init
99
100	CALL fft_init
101
102	END SUBROUTINE poisfft_init
103
104
105	SUBROUTINE poisfft( ar, work )
106
107	USE cpulog
108	USE interfaces
109	USE pegrid
110
111	IMPLICIT NONE
112
113	REAL, DIMENSION(1:nza,nys:nyna,nxl:nxra) :: ar, work
114
115
116	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
117
118	!
119	!-- Two-dimensional Fourier Transformation in x- and y-direction.
120	#if defined( __parallel )
121	IF ( pdims(2) == 1 ) THEN
122
123	!
124	!-- 1d-domain-decomposition along x:
125	!-- FFT along y and transposition y --> x
126	CALL ffty_tr_yx( ar, work, ar )
127
128	!
129	!-- FFT along x, solving the tridiagonal system and backward FFT
130	CALL fftx_tri_fftx( ar )
131
132	!
133	!-- Transposition x --> y and backward FFT along y
134	CALL tr_xy_ffty( ar, work, ar )
135
136	ELSEIF ( pdims(1) == 1 ) THEN
137
138	!
139	!-- 1d-domain-decomposition along y:
140	!-- FFT along x and transposition x --> y
141	CALL fftx_tr_xy( ar, work, ar )
142
143	!
144	!-- FFT along y, solving the tridiagonal system and backward FFT
145	CALL ffty_tri_ffty( ar )
146
147	!
148	!-- Transposition y --> x and backward FFT along x
149	CALL tr_yx_fftx( ar, work, ar )
150
151	ELSE
152
153	!
154	!-- 2d-domain-decomposition
155	!-- Transposition z --> x
156	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
157	CALL transpose_zx( ar, work, ar )
158	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
159
160	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
161	CALL fftxp( ar, 'forward' )
162	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
163
164	!
165	!-- Transposition x --> y
166	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
167	CALL transpose_xy( ar, work, ar )
168	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
169
170	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
171	CALL fftyp( ar, 'forward' )
172	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
173
174	!
175	!-- Transposition y --> z
176	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
177	CALL transpose_yz( ar, work, ar )
178	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
179
180	!
181	!-- Solve the Poisson equation in z-direction in cartesian space.
182	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
183	CALL tridia( ar )
184	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
185
186	!
187	!-- Inverse Fourier Transformation
188	!-- Transposition z --> y
189	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
190	CALL transpose_zy( ar, work, ar )
191	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
192
193	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
194	CALL fftyp( ar, 'backward' )
195	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
196
197	!
198	!-- Transposition y --> x
199	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
200	CALL transpose_yx( ar, work, ar )
201	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
202
203	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
204	CALL fftxp( ar, 'backward' )
205	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
206
207	!
208	!-- Transposition x --> z
209	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
210	CALL transpose_xz( ar, work, ar )
211	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
212
213	ENDIF
214
215	#else
216
217	!
218	!-- Two-dimensional Fourier Transformation along x- and y-direction.
219	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
220	CALL fftx( ar, 'forward' )
221	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
222	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
223	CALL ffty( ar, 'forward' )
224	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
225
226	!
227	!-- Solve the Poisson equation in z-direction in cartesian space.
228	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
229	CALL tridia( ar )
230	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
231
232	!
233	!-- Inverse Fourier Transformation.
234	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
235	CALL ffty( ar, 'backward' )
236	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
237	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
238	CALL fftx( ar, 'backward' )
239	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
240
241	#endif
242
243	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
244
245	END SUBROUTINE poisfft
246
247
248
249	SUBROUTINE tridia( ar )
250
251	!------------------------------------------------------------------------------!
252	! solves the linear system of equations:
253	!
254	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
255	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
256	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
257	!
258	! by using the Thomas algorithm
259	!------------------------------------------------------------------------------!
260
261	USE arrays_3d
262
263	IMPLICIT NONE
264
265	INTEGER :: i, j, k, nnyh
266
267	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
268	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
269
270	#if defined( __parallel )
271	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
272	#else
273	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
274	#endif
275
276
277	nnyh = (ny+1) / 2
278
279	!
280	!-- Define constant elements of the tridiagonal matrix.
281	DO k = 0, nz-1
282	DO i = nxl_z, nxr_z
283	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
284	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
285	ENDDO
286	ENDDO
287
288	#if defined( __parallel )
289	!
290	!-- Repeat for all y-levels.
291	DO j = nys_z, nyn_z
292	IF ( j <= nnyh ) THEN
293	CALL maketri( tri, j )
294	ELSE
295	CALL maketri( tri, ny+1-j )
296	ENDIF
297	CALL split( tri )
298	CALL substi( ar, ar1, tri, j )
299	ENDDO
300	#else
301	!
302	!-- First y-level.
303	CALL maketri( tri, nys_z )
304	CALL split( tri )
305	CALL substi( ar, ar1, tri, 0 )
306
307	!
308	!-- Further y-levels.
309	DO j = 1, nnyh - 1
310	CALL maketri( tri, j )
311	CALL split( tri )
312	CALL substi( ar, ar1, tri, j )
313	CALL substi( ar, ar1, tri, ny+1-j )
314	ENDDO
315	CALL maketri( tri, nnyh )
316	CALL split( tri )
317	CALL substi( ar, ar1, tri, nnyh+nys )
318	#endif
319
320	CONTAINS
321
322	SUBROUTINE maketri( tri, j )
323
324	!------------------------------------------------------------------------------!
325	! Computes the i- and j-dependent component of the matrix
326	!------------------------------------------------------------------------------!
327
328	USE arrays_3d
329	USE constants
330	USE control_parameters
331	USE grid_variables
332
333	IMPLICIT NONE
334
335	INTEGER :: i, j, k, nnxh
336	REAL :: a, c
337	REAL :: ll(nxl_z:nxr_z)
338	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
339
340
341	nnxh = ( nx + 1 ) / 2
342
343	!
344	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
345	!-- Fourier space. The coefficients are computed following the method of
346	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
347	!-- Siano's original version by discretizing the Poisson equation,
348	!-- before it is Fourier-transformed
349	#if defined( __parallel )
350	DO i = nxl_z, nxr_z
351	IF ( i >= 0 .AND. i <= nnxh ) THEN
352	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
353	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
354	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
355	FLOAT( ny+1 ) ) ) / ( dy * dy )
356	ELSE
357	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
358	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
359	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
360	FLOAT( ny+1 ) ) ) / ( dy * dy )
361	ENDIF
362	DO k = 0,nz-1
363	a = -1.0 * ddzu(k+2) * ddzw(k+1)
364	c = -1.0 * ddzu(k+1) * ddzw(k+1)
365	tri(1,i,k) = a + c - ll(i)
366	ENDDO
367	ENDDO
368	#else
369	DO i = 0, nnxh
370	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / FLOAT( nx+1 ) ) ) / &
371	( dx * dx ) + &
372	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / FLOAT( ny+1 ) ) ) / &
373	( dy * dy )
374	DO k = 0, nz-1
375	a = -1.0 * ddzu(k+2) * ddzw(k+1)
376	c = -1.0 * ddzu(k+1) * ddzw(k+1)
377	tri(1,i,k) = a + c - ll(i)
378	IF ( i >= 1 .and. i < nnxh ) THEN
379	tri(1,nx+1-i,k) = tri(1,i,k)
380	ENDIF
381	ENDDO
382	ENDDO
383	#endif
384	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
385	DO i = nxl_z, nxr_z
386	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
387	ENDDO
388	ENDIF
389	IF ( ibc_p_t == 1 ) THEN
390	DO i = nxl_z, nxr_z
391	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
392	ENDDO
393	ENDIF
394
395	END SUBROUTINE maketri
396
397
398	SUBROUTINE substi( ar, ar1, tri, j )
399
400	!------------------------------------------------------------------------------!
401	! Substitution (Forward and Backward) (Thomas algorithm)
402	!------------------------------------------------------------------------------!
403
404	USE control_parameters
405
406	IMPLICIT NONE
407
408	INTEGER :: i, j, k
409	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
410	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
411	#if defined( __parallel )
412	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
413	#else
414	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
415	#endif
416
417	!
418	!-- Forward substitution.
419	DO i = nxl_z, nxr_z
420	#if defined( __parallel )
421	ar1(i,0) = ar(i,j,1)
422	#else
423	ar1(i,0) = ar(1,j,i)
424	#endif
425	ENDDO
426	DO k = 1, nz - 1
427	DO i = nxl_z, nxr_z
428	#if defined( __parallel )
429	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
430	#else
431	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
432	#endif
433	ENDDO
434	ENDDO
435
436	!
437	!-- Backward substitution.
438	DO i = nxl_z, nxr_z
439	#if defined( __parallel )
440	ar(i,j,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
441	#else
442	ar(nz,j,i) = ar1(i,nz-1) / tri(4,i,nz-1)
443	#endif
444	ENDDO
445	DO k = nz-2, 0, -1
446	DO i = nxl_z, nxr_z
447	#if defined( __parallel )
448	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
449	/ tri(4,i,k)
450	#else
451	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
452	/ tri(4,i,k)
453	#endif
454	ENDDO
455	ENDDO
456
457	!
458	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
459	!-- The respective values of ar should be zero at all k-levels if
460	!-- acceleration of horizontally averaged vertical velocity is zero.
461	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
462	IF ( j == 0 .AND. nxl_z == 0 ) THEN
463	#if defined( __parallel )
464	DO k = 1, nz
465	ar(nxl_z,j,k) = 0.0
466	ENDDO
467	#else
468	DO k = 1, nz
469	ar(k,j,nxl_z) = 0.0
470	ENDDO
471	#endif
472	ENDIF
473	ENDIF
474
475	END SUBROUTINE substi
476
477
478	SUBROUTINE split( tri )
479
480	!------------------------------------------------------------------------------!
481	! Splitting of the tridiagonal matrix (Thomas algorithm)
482	!------------------------------------------------------------------------------!
483
484	IMPLICIT NONE
485
486	INTEGER :: i, k
487	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
488
489	!
490	!-- Splitting.
491	DO i = nxl_z, nxr_z
492	tri(4,i,0) = tri(1,i,0)
493	ENDDO
494	DO k = 1, nz-1
495	DO i = nxl_z, nxr_z
496	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
497	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
498	ENDDO
499	ENDDO
500
501	END SUBROUTINE split
502
503	END SUBROUTINE tridia
504
505
506	#if defined( __parallel )
507	SUBROUTINE fftxp( ar, direction )
508
509	!------------------------------------------------------------------------------!
510	! Fourier-transformation along x-direction Parallelized version
511	!------------------------------------------------------------------------------!
512
513	IMPLICIT NONE
514
515	CHARACTER (LEN=*) :: direction
516	INTEGER :: j, k
517	REAL :: ar(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa)
518
519	!
520	!-- Performing the fft with one of the methods implemented
521	DO k = nzb_x, nzt_x
522	DO j = nys_x, nyn_x
523	CALL fft_x( ar(0:nx,j,k), direction )
524	ENDDO
525	ENDDO
526
527	END SUBROUTINE fftxp
528
529	#else
530	SUBROUTINE fftx( ar, direction )
531
532	!------------------------------------------------------------------------------!
533	! Fourier-transformation along x-direction Non parallel version
534	!------------------------------------------------------------------------------!
535
536	IMPLICIT NONE
537
538	CHARACTER (LEN=*) :: direction
539	INTEGER :: i, j, k
540	REAL :: ar(1:nz,0:ny,0:nx)
541
542	!
543	!-- Performing the fft with one of the methods implemented
544	DO k = 1, nz
545	DO j = 0, ny
546	CALL fft_x( ar(k,j,0:nx), direction )
547	ENDDO
548	ENDDO
549
550	END SUBROUTINE fftx
551	#endif
552
553
554	#if defined( __parallel )
555	SUBROUTINE fftyp( ar, direction )
556
557	!------------------------------------------------------------------------------!
558	! Fourier-transformation along y-direction Parallelized version
559	!------------------------------------------------------------------------------!
560
561	IMPLICIT NONE
562
563	CHARACTER (LEN=*) :: direction
564	INTEGER :: i, k
565	REAL :: ar(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya)
566
567	!
568	!-- Performing the fft with one of the methods implemented
569	DO k = nzb_y, nzt_y
570	DO i = nxl_y, nxr_y
571	CALL fft_y( ar(0:ny,i,k), direction )
572	ENDDO
573	ENDDO
574
575	END SUBROUTINE fftyp
576
577	#else
578	SUBROUTINE ffty( ar, direction )
579
580	!------------------------------------------------------------------------------!
581	! Fourier-transformation along y-direction Non parallel version
582	!------------------------------------------------------------------------------!
583
584	IMPLICIT NONE
585
586	CHARACTER (LEN=*) :: direction
587	INTEGER :: i, k
588	REAL :: ar(1:nz,0:ny,0:nx)
589
590	!
591	!-- Performing the fft with one of the methods implemented
592	DO k = 1, nz
593	DO i = 0, nx
594	CALL fft_y( ar(k,0:ny,i), direction )
595	ENDDO
596	ENDDO
597
598	END SUBROUTINE ffty
599	#endif
600
601	#if defined( __parallel )
602	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
603
604	!------------------------------------------------------------------------------!
605	! Fourier-transformation along y with subsequent transposition y --> x for
606	! a 1d-decomposition along x
607	!
608	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
609	! if the first index of work_ffty_vec is odd. Otherwise
610	! memory bank conflicts may occur (especially if the index is a
611	! multiple of 128). That's why work_ffty_vec is dimensioned as
612	! 0:ny+1.
613	! Of course, this will not work if users are using an odd number
614	! of gridpoints along y.
615	!------------------------------------------------------------------------------!
616
617	USE control_parameters
618	USE cpulog
619	USE indices
620	USE interfaces
621	USE pegrid
622	USE transpose_indices
623
624	IMPLICIT NONE
625
626	INTEGER :: i, iend, iouter, ir, j, k
627	INTEGER, PARAMETER :: stridex = 4
628
629	REAL, DIMENSION(0:ny,stridex) :: work_ffty
630	#if defined( __nec )
631	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
632	#endif
633	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_in
634	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_out
635	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
636
637	!
638	!-- Carry out the FFT along y, where all data are present due to the
639	!-- 1d-decomposition along x. Resort the data in a way that x becomes
640	!-- the first index.
641	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
642
643	IF ( host(1:3) == 'nec' ) THEN
644	#if defined( __nec )
645	!
646	!-- Code optimized for vector processors
647	!$OMP PARALLEL PRIVATE ( i, j, k )
648	!$OMP DO
649	DO i = nxl, nxr
650
651	DO j = 0, ny
652	DO k = 1, nz
653	work_ffty_vec(j,k,i) = f_in(k,j,i)
654	ENDDO
655	ENDDO
656
657	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
658
659	ENDDO
660
661	!$OMP DO
662	DO k = 1, nz
663	DO j = 0, ny
664	DO i = nxl, nxr
665	work(i,k,j) = work_ffty_vec(j,k,i)
666	ENDDO
667	ENDDO
668	ENDDO
669	!$OMP END PARALLEL
670	#endif
671
672	ELSE
673
674	!
675	!-- Cache optimized code.
676	!-- The i-(x-)direction is split into a strided outer loop and an inner
677	!-- loop for better cache performance
678	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
679	!$OMP DO
680	DO iouter = nxl, nxr, stridex
681
682	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
683
684	DO k = 1, nz
685
686	DO i = iouter, iend
687
688	ir = i-iouter+1 ! counter within a stride
689	DO j = 0, ny
690	work_ffty(j,ir) = f_in(k,j,i)
691	ENDDO
692	!
693	!-- FFT along y
694	CALL fft_y( work_ffty(:,ir), 'forward' )
695
696	ENDDO
697
698	!
699	!-- Resort
700	DO j = 0, ny
701	DO i = iouter, iend
702	work(i,k,j) = work_ffty(j,i-iouter+1)
703	ENDDO
704	ENDDO
705
706	ENDDO
707
708	ENDDO
709	!$OMP END PARALLEL
710
711	ENDIF
712	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
713
714	!
715	!-- Transpose array
716	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
717	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
718	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
719	comm1dx, ierr )
720	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
721
722	END SUBROUTINE ffty_tr_yx
723
724
725	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
726
727	!------------------------------------------------------------------------------!
728	! Transposition x --> y with a subsequent backward Fourier transformation for
729	! a 1d-decomposition along x
730	!------------------------------------------------------------------------------!
731
732	USE control_parameters
733	USE cpulog
734	USE indices
735	USE interfaces
736	USE pegrid
737	USE transpose_indices
738
739	IMPLICIT NONE
740
741	INTEGER :: i, iend, iouter, ir, j, k
742	INTEGER, PARAMETER :: stridex = 4
743
744	REAL, DIMENSION(0:ny,stridex) :: work_ffty
745	#if defined( __nec )
746	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
747	#endif
748	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_in
749	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_out
750	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
751
752	!
753	!-- Transpose array
754	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
755	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
756	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
757	comm1dx, ierr )
758	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
759
760	!
761	!-- Resort the data in a way that y becomes the first index and carry out the
762	!-- backward fft along y.
763	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
764
765	IF ( host(1:3) == 'nec' ) THEN
766	#if defined( __nec )
767	!
768	!-- Code optimized for vector processors
769	!$OMP PARALLEL PRIVATE ( i, j, k )
770	!$OMP DO
771	DO k = 1, nz
772	DO j = 0, ny
773	DO i = nxl, nxr
774	work_ffty_vec(j,k,i) = work(i,k,j)
775	ENDDO
776	ENDDO
777	ENDDO
778
779	!$OMP DO
780	DO i = nxl, nxr
781
782	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
783
784	DO j = 0, ny
785	DO k = 1, nz
786	f_out(k,j,i) = work_ffty_vec(j,k,i)
787	ENDDO
788	ENDDO
789
790	ENDDO
791	!$OMP END PARALLEL
792	#endif
793
794	ELSE
795
796	!
797	!-- Cache optimized code.
798	!-- The i-(x-)direction is split into a strided outer loop and an inner
799	!-- loop for better cache performance
800	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
801	!$OMP DO
802	DO iouter = nxl, nxr, stridex
803
804	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
805
806	DO k = 1, nz
807	!
808	!-- Resort
809	DO j = 0, ny
810	DO i = iouter, iend
811	work_ffty(j,i-iouter+1) = work(i,k,j)
812	ENDDO
813	ENDDO
814
815	DO i = iouter, iend
816
817	!
818	!-- FFT along y
819	ir = i-iouter+1 ! counter within a stride
820	CALL fft_y( work_ffty(:,ir), 'backward' )
821
822	DO j = 0, ny
823	f_out(k,j,i) = work_ffty(j,ir)
824	ENDDO
825	ENDDO
826
827	ENDDO
828
829	ENDDO
830	!$OMP END PARALLEL
831
832	ENDIF
833
834	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
835
836	END SUBROUTINE tr_xy_ffty
837
838
839	SUBROUTINE fftx_tri_fftx( ar )
840
841	!------------------------------------------------------------------------------!
842	! FFT along x, solution of the tridiagonal system and backward FFT for
843	! a 1d-decomposition along x
844	!
845	! WARNING: this subroutine may still not work for hybrid parallelization
846	! with OpenMP (for possible necessary changes see the original
847	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
848	!------------------------------------------------------------------------------!
849
850	USE control_parameters
851	USE cpulog
852	USE grid_variables
853	USE indices
854	USE interfaces
855	USE pegrid
856	USE transpose_indices
857
858	IMPLICIT NONE
859
860	character(len=3) :: myth_char
861
862	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
863
864	REAL, DIMENSION(0:nx) :: work_fftx
865	REAL, DIMENSION(0:nx,1:nz) :: work_trix
866	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: ar
867	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
868
869
870	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
871
872	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
873
874	tn = 0 ! Default thread number in case of one thread
875	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
876	DO j = nys_x, nyn_x
877
878	!$ tn = omp_get_thread_num()
879
880	IF ( host(1:3) == 'nec' ) THEN
881	!
882	!-- Code optimized for vector processors
883	DO k = 1, nz
884
885	m = 0
886	DO n = 1, pdims(1)
887	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
888	work_trix(m,k) = ar(i,k,j,n)
889	m = m + 1
890	ENDDO
891	ENDDO
892
893	ENDDO
894
895	CALL fft_x_m( work_trix, 'forward' )
896
897	ELSE
898	!
899	!-- Cache optimized code
900	DO k = 1, nz
901
902	m = 0
903	DO n = 1, pdims(1)
904	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
905	work_fftx(m) = ar(i,k,j,n)
906	m = m + 1
907	ENDDO
908	ENDDO
909
910	CALL fft_x( work_fftx, 'forward' )
911
912	DO i = 0, nx
913	work_trix(i,k) = work_fftx(i)
914	ENDDO
915
916	ENDDO
917
918	ENDIF
919
920	!
921	!-- Solve the linear equation system
922	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
923
924	IF ( host(1:3) == 'nec' ) THEN
925	!
926	!-- Code optimized for vector processors
927	CALL fft_x_m( work_trix, 'backward' )
928
929	DO k = 1, nz
930
931	m = 0
932	DO n = 1, pdims(1)
933	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
934	ar(i,k,j,n) = work_trix(m,k)
935	m = m + 1
936	ENDDO
937	ENDDO
938
939	ENDDO
940
941	ELSE
942	!
943	!-- Cache optimized code
944	DO k = 1, nz
945
946	DO i = 0, nx
947	work_fftx(i) = work_trix(i,k)
948	ENDDO
949
950	CALL fft_x( work_fftx, 'backward' )
951
952	m = 0
953	DO n = 1, pdims(1)
954	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
955	ar(i,k,j,n) = work_fftx(m)
956	m = m + 1
957	ENDDO
958	ENDDO
959
960	ENDDO
961
962	ENDIF
963
964	ENDDO
965
966	DEALLOCATE( tri )
967
968	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
969
970	END SUBROUTINE fftx_tri_fftx
971
972
973	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
974
975	!------------------------------------------------------------------------------!
976	! Fourier-transformation along x with subsequent transposition x --> y for
977	! a 1d-decomposition along y
978	!
979	! ATTENTION: The NEC-branch of this routine may significantly profit from
980	! further optimizations. So far, performance is much worse than
981	! for routine ffty_tr_yx (more than three times slower).
982	!------------------------------------------------------------------------------!
983
984	USE control_parameters
985	USE cpulog
986	USE indices
987	USE interfaces
988	USE pegrid
989	USE transpose_indices
990
991	IMPLICIT NONE
992
993	INTEGER :: i, j, k
994
995	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
996	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_in
997	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_out
998	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
999
1000	!
1001	!-- Carry out the FFT along x, where all data are present due to the
1002	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1003	!-- the first index.
1004	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
1005
1006	IF ( host(1:3) == 'nec' ) THEN
1007	!
1008	!-- Code for vector processors
1009	!$OMP PARALLEL PRIVATE ( i, j, k )
1010	!$OMP DO
1011	DO i = 0, nx
1012
1013	DO j = nys, nyn
1014	DO k = 1, nz
1015	work_fftx(i,k,j) = f_in(k,j,i)
1016	ENDDO
1017	ENDDO
1018
1019	ENDDO
1020
1021	!$OMP DO
1022	DO j = nys, nyn
1023
1024	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1025
1026	DO k = 1, nz
1027	DO i = 0, nx
1028	work(j,k,i) = work_fftx(i,k,j)
1029	ENDDO
1030	ENDDO
1031
1032	ENDDO
1033	!$OMP END PARALLEL
1034
1035	ELSE
1036
1037	!
1038	!-- Cache optimized code (there might be still a potential for better
1039	!-- optimization).
1040	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1041	!$OMP DO
1042	DO i = 0, nx
1043
1044	DO j = nys, nyn
1045	DO k = 1, nz
1046	work_fftx(i,k,j) = f_in(k,j,i)
1047	ENDDO
1048	ENDDO
1049
1050	ENDDO
1051
1052	!$OMP DO
1053	DO j = nys, nyn
1054	DO k = 1, nz
1055
1056	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1057
1058	DO i = 0, nx
1059	work(j,k,i) = work_fftx(i,k,j)
1060	ENDDO
1061	ENDDO
1062
1063	ENDDO
1064	!$OMP END PARALLEL
1065
1066	ENDIF
1067	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1068
1069	!
1070	!-- Transpose array
1071	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1072	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1073	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1074	comm1dy, ierr )
1075	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1076
1077	END SUBROUTINE fftx_tr_xy
1078
1079
1080	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1081
1082	!------------------------------------------------------------------------------!
1083	! Transposition y --> x with a subsequent backward Fourier transformation for
1084	! a 1d-decomposition along x
1085	!------------------------------------------------------------------------------!
1086
1087	USE control_parameters
1088	USE cpulog
1089	USE indices
1090	USE interfaces
1091	USE pegrid
1092	USE transpose_indices
1093
1094	IMPLICIT NONE
1095
1096	INTEGER :: i, j, k
1097
1098	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1099	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_in
1100	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_out
1101	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1102
1103	!
1104	!-- Transpose array
1105	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1106	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1107	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1108	comm1dy, ierr )
1109	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1110
1111	!
1112	!-- Carry out the FFT along x, where all data are present due to the
1113	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1114	!-- the first index.
1115	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1116
1117	IF ( host(1:3) == 'nec' ) THEN
1118	!
1119	!-- Code optimized for vector processors
1120	!$OMP PARALLEL PRIVATE ( i, j, k )
1121	!$OMP DO
1122	DO j = nys, nyn
1123
1124	DO k = 1, nz
1125	DO i = 0, nx
1126	work_fftx(i,k,j) = work(j,k,i)
1127	ENDDO
1128	ENDDO
1129
1130	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1131
1132	ENDDO
1133
1134	!$OMP DO
1135	DO i = 0, nx
1136	DO j = nys, nyn
1137	DO k = 1, nz
1138	f_out(k,j,i) = work_fftx(i,k,j)
1139	ENDDO
1140	ENDDO
1141	ENDDO
1142	!$OMP END PARALLEL
1143
1144	ELSE
1145
1146	!
1147	!-- Cache optimized code (there might be still a potential for better
1148	!-- optimization).
1149	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1150	!$OMP DO
1151	DO j = nys, nyn
1152	DO k = 1, nz
1153
1154	DO i = 0, nx
1155	work_fftx(i,k,j) = work(j,k,i)
1156	ENDDO
1157
1158	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1159
1160	ENDDO
1161	ENDDO
1162
1163	!$OMP DO
1164	DO i = 0, nx
1165	DO j = nys, nyn
1166	DO k = 1, nz
1167	f_out(k,j,i) = work_fftx(i,k,j)
1168	ENDDO
1169	ENDDO
1170	ENDDO
1171	!$OMP END PARALLEL
1172
1173	ENDIF
1174	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1175
1176	END SUBROUTINE tr_yx_fftx
1177
1178
1179	SUBROUTINE ffty_tri_ffty( ar )
1180
1181	!------------------------------------------------------------------------------!
1182	! FFT along y, solution of the tridiagonal system and backward FFT for
1183	! a 1d-decomposition along y
1184	!
1185	! WARNING: this subroutine may still not work for hybrid parallelization
1186	! with OpenMP (for possible necessary changes see the original
1187	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1188	!------------------------------------------------------------------------------!
1189
1190	USE control_parameters
1191	USE cpulog
1192	USE grid_variables
1193	USE indices
1194	USE interfaces
1195	USE pegrid
1196	USE transpose_indices
1197
1198	IMPLICIT NONE
1199
1200	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1201
1202	REAL, DIMENSION(0:ny) :: work_ffty
1203	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1204	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: ar
1205	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1206
1207
1208	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1209
1210	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1211
1212	tn = 0 ! Default thread number in case of one thread
1213	!$OMP PARALLEL PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1214	!$OMP DO
1215	DO i = nxl_y, nxr_y
1216
1217	!$ tn = omp_get_thread_num()
1218
1219	IF ( host(1:3) == 'nec' ) THEN
1220	!
1221	!-- Code optimized for vector processors
1222	DO k = 1, nz
1223
1224	m = 0
1225	DO n = 1, pdims(2)
1226	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1227	work_triy(m,k) = ar(j,k,i,n)
1228	m = m + 1
1229	ENDDO
1230	ENDDO
1231
1232	ENDDO
1233
1234	CALL fft_y_m( work_triy, ny, 'forward' )
1235
1236	ELSE
1237	!
1238	!-- Cache optimized code
1239	DO k = 1, nz
1240
1241	m = 0
1242	DO n = 1, pdims(2)
1243	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1244	work_ffty(m) = ar(j,k,i,n)
1245	m = m + 1
1246	ENDDO
1247	ENDDO
1248
1249	CALL fft_y( work_ffty, 'forward' )
1250
1251	DO j = 0, ny
1252	work_triy(j,k) = work_ffty(j)
1253	ENDDO
1254
1255	ENDDO
1256
1257	ENDIF
1258
1259	!
1260	!-- Solve the linear equation system
1261	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1262
1263	IF ( host(1:3) == 'nec' ) THEN
1264	!
1265	!-- Code optimized for vector processors
1266	CALL fft_y_m( work_triy, ny, 'backward' )
1267
1268	DO k = 1, nz
1269
1270	m = 0
1271	DO n = 1, pdims(2)
1272	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1273	ar(j,k,i,n) = work_triy(m,k)
1274	m = m + 1
1275	ENDDO
1276	ENDDO
1277
1278	ENDDO
1279
1280	ELSE
1281	!
1282	!-- Cache optimized code
1283	DO k = 1, nz
1284
1285	DO j = 0, ny
1286	work_ffty(j) = work_triy(j,k)
1287	ENDDO
1288
1289	CALL fft_y( work_ffty, 'backward' )
1290
1291	m = 0
1292	DO n = 1, pdims(2)
1293	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1294	ar(j,k,i,n) = work_ffty(m)
1295	m = m + 1
1296	ENDDO
1297	ENDDO
1298
1299	ENDDO
1300
1301	ENDIF
1302
1303	ENDDO
1304	!$OMP END PARALLEL
1305
1306	DEALLOCATE( tri )
1307
1308	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1309
1310	END SUBROUTINE ffty_tri_ffty
1311
1312
1313	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1314
1315	!------------------------------------------------------------------------------!
1316	! Solves the linear system of equations for a 1d-decomposition along x (see
1317	! tridia)
1318	!
1319	! Attention: when using the intel compiler, array tri must be passed as an
1320	! argument to the contained subroutines. Otherwise addres faults
1321	! will occur.
1322	! On NEC, tri should not be passed (except for routine substi_1dd)
1323	! because this causes very bad performance.
1324	!------------------------------------------------------------------------------!
1325
1326	USE arrays_3d
1327	USE control_parameters
1328
1329	USE pegrid
1330
1331	IMPLICIT NONE
1332
1333	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1334
1335	REAL :: ddx2, ddy2
1336
1337	REAL, DIMENSION(0:nx,1:nz) :: ar
1338	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1339	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1340
1341
1342	nnyh = ( ny + 1 ) / 2
1343
1344	!
1345	!-- Define constant elements of the tridiagonal matrix.
1346	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1347	!-- the exchanged loops create bank conflicts. The following directive
1348	!-- prohibits loop exchange and the loops perform much better.
1349	! tn = omp_get_thread_num()
1350	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1351	! CALL local_flush( 120+tn )
1352	!CDIR NOLOOPCHG
1353	DO k = 0, nz-1
1354	DO i = 0,nx
1355	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
1356	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
1357	ENDDO
1358	ENDDO
1359	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1360	! CALL local_flush( 120+tn )
1361
1362	IF ( j <= nnyh ) THEN
1363	#if defined( __lc )
1364	CALL maketri_1dd( j, tri )
1365	#else
1366	CALL maketri_1dd( j )
1367	#endif
1368	ELSE
1369	#if defined( __lc )
1370	CALL maketri_1dd( ny+1-j, tri )
1371	#else
1372	CALL maketri_1dd( ny+1-j )
1373	#endif
1374	ENDIF
1375	#if defined( __lc )
1376	CALL split_1dd( tri )
1377	#else
1378	CALL split_1dd
1379	#endif
1380	CALL substi_1dd( ar, tri )
1381
1382	CONTAINS
1383
1384	#if defined( __lc )
1385	SUBROUTINE maketri_1dd( j, tri )
1386	#else
1387	SUBROUTINE maketri_1dd( j )
1388	#endif
1389
1390	!------------------------------------------------------------------------------!
1391	! computes the i- and j-dependent component of the matrix
1392	!------------------------------------------------------------------------------!
1393
1394	USE constants
1395
1396	IMPLICIT NONE
1397
1398	INTEGER :: i, j, k, nnxh
1399	REAL :: a, c
1400
1401	REAL, DIMENSION(0:nx) :: l
1402
1403	#if defined( __lc )
1404	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1405	#endif
1406
1407
1408	nnxh = ( nx + 1 ) / 2
1409	!
1410	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1411	!-- Fourier space. The coefficients are computed following the method of
1412	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1413	!-- Siano's original version by discretizing the Poisson equation,
1414	!-- before it is Fourier-transformed
1415	DO i = 0, nx
1416	IF ( i >= 0 .AND. i <= nnxh ) THEN
1417	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1418	FLOAT( nx+1 ) ) ) * ddx2 + &
1419	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1420	FLOAT( ny+1 ) ) ) * ddy2
1421	ELSE
1422	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1423	FLOAT( nx+1 ) ) ) * ddx2 + &
1424	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1425	FLOAT( ny+1 ) ) ) * ddy2
1426	ENDIF
1427	ENDDO
1428
1429	DO k = 0, nz-1
1430	DO i = 0, nx
1431	a = -1.0 * ddzu(k+2) * ddzw(k+1)
1432	c = -1.0 * ddzu(k+1) * ddzw(k+1)
1433	tri(1,i,k) = a + c - l(i)
1434	ENDDO
1435	ENDDO
1436	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1437	DO i = 0, nx
1438	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1439	ENDDO
1440	ENDIF
1441	IF ( ibc_p_t == 1 ) THEN
1442	DO i = 0, nx
1443	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1444	ENDDO
1445	ENDIF
1446
1447	END SUBROUTINE maketri_1dd
1448
1449
1450	#if defined( __lc )
1451	SUBROUTINE split_1dd( tri )
1452	#else
1453	SUBROUTINE split_1dd
1454	#endif
1455
1456	!------------------------------------------------------------------------------!
1457	! Splitting of the tridiagonal matrix (Thomas algorithm)
1458	!------------------------------------------------------------------------------!
1459
1460	IMPLICIT NONE
1461
1462	INTEGER :: i, k
1463
1464	#if defined( __lc )
1465	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1466	#endif
1467
1468
1469	!
1470	!-- Splitting
1471	DO i = 0, nx
1472	tri(4,i,0) = tri(1,i,0)
1473	ENDDO
1474	DO k = 1, nz-1
1475	DO i = 0, nx
1476	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1477	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1478	ENDDO
1479	ENDDO
1480
1481	END SUBROUTINE split_1dd
1482
1483
1484	SUBROUTINE substi_1dd( ar, tri )
1485
1486	!------------------------------------------------------------------------------!
1487	! Substitution (Forward and Backward) (Thomas algorithm)
1488	!------------------------------------------------------------------------------!
1489
1490	IMPLICIT NONE
1491
1492	INTEGER :: i, k
1493
1494	REAL, DIMENSION(0:nx,nz) :: ar
1495	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1496	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1497
1498	!
1499	!-- Forward substitution
1500	DO i = 0, nx
1501	ar1(i,0) = ar(i,1)
1502	ENDDO
1503	DO k = 1, nz-1
1504	DO i = 0, nx
1505	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1506	ENDDO
1507	ENDDO
1508
1509	!
1510	!-- Backward substitution
1511	DO i = 0, nx
1512	ar(i,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
1513	ENDDO
1514	DO k = nz-2, 0, -1
1515	DO i = 0, nx
1516	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1517	/ tri(4,i,k)
1518	ENDDO
1519	ENDDO
1520
1521	!
1522	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
1523	!-- The respective values of ar should be zero at all k-levels if
1524	!-- acceleration of horizontally averaged vertical velocity is zero.
1525	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
1526	IF ( j == 0 ) THEN
1527	DO k = 1, nz
1528	ar(0,k) = 0.0
1529	ENDDO
1530	ENDIF
1531	ENDIF
1532
1533	END SUBROUTINE substi_1dd
1534
1535	END SUBROUTINE tridia_1dd
1536
1537	#endif
1538
1539	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |