Home

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 568

Last change on this file since 568 was 484, checked in by raasch, 15 years ago
typo in file headers removed
Property svn:keywords set to `Id`
File size: 44.6 KB

Line
1	MODULE poisfft_mod
2
3	!------------------------------------------------------------------------------!
4	! Current revisions:
5	! -----------------
6	!
7	!
8	! Former revisions:
9	! -----------------
10	! $Id: poisfft.f90 484 2010-02-05 07:36:54Z helmke $
11	!
12	! 377 2009-09-04 11:09:00Z raasch
13	! __lcmuk changed to __lc to avoid problems with Intel compiler on sgi-ice
14	!
15	! 164 2008-05-15 08:46:15Z raasch
16	! Arguments removed from transpose routines
17	!
18	! 128 2007-10-26 13:11:14Z raasch
19	! Bugfix: wavenumber calculation for even nx in routines maketri
20	!
21	! 85 2007-05-11 09:35:14Z raasch
22	! Bugfix: work_fft*_vec removed from some PRIVATE-declarations
23	!
24	! 76 2007-03-29 00:58:32Z raasch
25	! Tridiagonal coefficients adjusted for Neumann boundary conditions both at
26	! the bottom and the top.
27	!
28	! RCS Log replace by Id keyword, revision history cleaned up
29	!
30	! Revision 1.24 2006/08/04 15:00:24 raasch
31	! Default setting of the thread number tn in case of not using OpenMP
32	!
33	! Revision 1.23 2006/02/23 12:48:38 raasch
34	! Additional compiler directive in routine tridia_1dd for preventing loop
35	! exchange on NEC-SX6
36	!
37	! Revision 1.20 2004/04/30 12:38:09 raasch
38	! Parts of former poisfft_hybrid moved to this subroutine,
39	! former subroutine changed to a module, renaming of FFT-subroutines and
40	! -module, FFTs completely substituted by calls of fft_x and fft_y,
41	! NAG fft used in the non-parallel case completely removed, l in maketri
42	! is now a 1d-array, variables passed by modules instead of using parameter
43	! lists, enlarged transposition arrays introduced
44	!
45	! Revision 1.1 1997/07/24 11:24:14 raasch
46	! Initial revision
47	!
48	!
49	! Description:
50	! ------------
51	! See below.
52	!------------------------------------------------------------------------------!
53
54	!--------------------------------------------------------------------------!
55	! poisfft !
56	! !
57	! Original version: Stephan Siano (pois3d) !
58	! !
59	! Institute of Meteorology and Climatology, University of Hannover !
60	! Germany !
61	! !
62	! Version as of July 23,1996 !
63	! !
64	! !
65	! Version for parallel computers: Siegfried Raasch !
66	! !
67	! Version as of July 03,1997 !
68	! !
69	! Solves the Poisson equation with a 2D spectral method !
70	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
71	! !
72	! Input: !
73	! real ar contains in the (nnx,nny,nnz) elements, !
74	! starting from the element (1,nys,nxl), the !
75	! values for s !
76	! real work Temporary array !
77	! !
78	! Output: !
79	! real ar contains the solution for p !
80	!--------------------------------------------------------------------------!
81
82	USE fft_xy
83	USE indices
84	USE transpose_indices
85
86	IMPLICIT NONE
87
88	PRIVATE
89	PUBLIC poisfft, poisfft_init
90
91	INTERFACE poisfft
92	MODULE PROCEDURE poisfft
93	END INTERFACE poisfft
94
95	INTERFACE poisfft_init
96	MODULE PROCEDURE poisfft_init
97	END INTERFACE poisfft_init
98
99	CONTAINS
100
101	SUBROUTINE poisfft_init
102
103	CALL fft_init
104
105	END SUBROUTINE poisfft_init
106
107
108	SUBROUTINE poisfft( ar, work )
109
110	USE cpulog
111	USE interfaces
112	USE pegrid
113
114	IMPLICIT NONE
115
116	REAL, DIMENSION(1:nza,nys:nyna,nxl:nxra) :: ar, work
117
118
119	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
120
121	!
122	!-- Two-dimensional Fourier Transformation in x- and y-direction.
123	#if defined( __parallel )
124	IF ( pdims(2) == 1 ) THEN
125
126	!
127	!-- 1d-domain-decomposition along x:
128	!-- FFT along y and transposition y --> x
129	CALL ffty_tr_yx( ar, work, ar )
130
131	!
132	!-- FFT along x, solving the tridiagonal system and backward FFT
133	CALL fftx_tri_fftx( ar )
134
135	!
136	!-- Transposition x --> y and backward FFT along y
137	CALL tr_xy_ffty( ar, work, ar )
138
139	ELSEIF ( pdims(1) == 1 ) THEN
140
141	!
142	!-- 1d-domain-decomposition along y:
143	!-- FFT along x and transposition x --> y
144	CALL fftx_tr_xy( ar, work, ar )
145
146	!
147	!-- FFT along y, solving the tridiagonal system and backward FFT
148	CALL ffty_tri_ffty( ar )
149
150	!
151	!-- Transposition y --> x and backward FFT along x
152	CALL tr_yx_fftx( ar, work, ar )
153
154	ELSE
155
156	!
157	!-- 2d-domain-decomposition
158	!-- Transposition z --> x
159	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
160	CALL transpose_zx( ar, work, ar )
161	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
162
163	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
164	CALL fftxp( ar, 'forward' )
165	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
166
167	!
168	!-- Transposition x --> y
169	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
170	CALL transpose_xy( ar, work, ar )
171	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
172
173	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
174	CALL fftyp( ar, 'forward' )
175	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
176
177	!
178	!-- Transposition y --> z
179	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
180	CALL transpose_yz( ar, work, ar )
181	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
182
183	!
184	!-- Solve the Poisson equation in z-direction in cartesian space.
185	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
186	CALL tridia( ar )
187	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
188
189	!
190	!-- Inverse Fourier Transformation
191	!-- Transposition z --> y
192	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
193	CALL transpose_zy( ar, work, ar )
194	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
195
196	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
197	CALL fftyp( ar, 'backward' )
198	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
199
200	!
201	!-- Transposition y --> x
202	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
203	CALL transpose_yx( ar, work, ar )
204	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
205
206	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
207	CALL fftxp( ar, 'backward' )
208	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
209
210	!
211	!-- Transposition x --> z
212	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
213	CALL transpose_xz( ar, work, ar )
214	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
215
216	ENDIF
217
218	#else
219
220	!
221	!-- Two-dimensional Fourier Transformation along x- and y-direction.
222	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
223	CALL fftx( ar, 'forward' )
224	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
225	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
226	CALL ffty( ar, 'forward' )
227	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
228
229	!
230	!-- Solve the Poisson equation in z-direction in cartesian space.
231	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
232	CALL tridia( ar )
233	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
234
235	!
236	!-- Inverse Fourier Transformation.
237	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
238	CALL ffty( ar, 'backward' )
239	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
240	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
241	CALL fftx( ar, 'backward' )
242	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
243
244	#endif
245
246	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
247
248	END SUBROUTINE poisfft
249
250
251
252	SUBROUTINE tridia( ar )
253
254	!------------------------------------------------------------------------------!
255	! solves the linear system of equations:
256	!
257	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
258	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
259	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
260	!
261	! by using the Thomas algorithm
262	!------------------------------------------------------------------------------!
263
264	USE arrays_3d
265
266	IMPLICIT NONE
267
268	INTEGER :: i, j, k, nnyh
269
270	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
271	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
272
273	#if defined( __parallel )
274	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
275	#else
276	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
277	#endif
278
279
280	nnyh = (ny+1) / 2
281
282	!
283	!-- Define constant elements of the tridiagonal matrix.
284	DO k = 0, nz-1
285	DO i = nxl_z, nxr_z
286	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
287	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
288	ENDDO
289	ENDDO
290
291	#if defined( __parallel )
292	!
293	!-- Repeat for all y-levels.
294	DO j = nys_z, nyn_z
295	IF ( j <= nnyh ) THEN
296	CALL maketri( tri, j )
297	ELSE
298	CALL maketri( tri, ny+1-j )
299	ENDIF
300	CALL split( tri )
301	CALL substi( ar, ar1, tri, j )
302	ENDDO
303	#else
304	!
305	!-- First y-level.
306	CALL maketri( tri, nys_z )
307	CALL split( tri )
308	CALL substi( ar, ar1, tri, 0 )
309
310	!
311	!-- Further y-levels.
312	DO j = 1, nnyh - 1
313	CALL maketri( tri, j )
314	CALL split( tri )
315	CALL substi( ar, ar1, tri, j )
316	CALL substi( ar, ar1, tri, ny+1-j )
317	ENDDO
318	CALL maketri( tri, nnyh )
319	CALL split( tri )
320	CALL substi( ar, ar1, tri, nnyh+nys )
321	#endif
322
323	CONTAINS
324
325	SUBROUTINE maketri( tri, j )
326
327	!------------------------------------------------------------------------------!
328	! Computes the i- and j-dependent component of the matrix
329	!------------------------------------------------------------------------------!
330
331	USE arrays_3d
332	USE constants
333	USE control_parameters
334	USE grid_variables
335
336	IMPLICIT NONE
337
338	INTEGER :: i, j, k, nnxh
339	REAL :: a, c
340	REAL :: ll(nxl_z:nxr_z)
341	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
342
343
344	nnxh = ( nx + 1 ) / 2
345
346	!
347	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
348	!-- Fourier space. The coefficients are computed following the method of
349	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
350	!-- Siano's original version by discretizing the Poisson equation,
351	!-- before it is Fourier-transformed
352	#if defined( __parallel )
353	DO i = nxl_z, nxr_z
354	IF ( i >= 0 .AND. i <= nnxh ) THEN
355	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
356	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
357	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
358	FLOAT( ny+1 ) ) ) / ( dy * dy )
359	ELSE
360	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
361	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
362	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
363	FLOAT( ny+1 ) ) ) / ( dy * dy )
364	ENDIF
365	DO k = 0,nz-1
366	a = -1.0 * ddzu(k+2) * ddzw(k+1)
367	c = -1.0 * ddzu(k+1) * ddzw(k+1)
368	tri(1,i,k) = a + c - ll(i)
369	ENDDO
370	ENDDO
371	#else
372	DO i = 0, nnxh
373	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / FLOAT( nx+1 ) ) ) / &
374	( dx * dx ) + &
375	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / FLOAT( ny+1 ) ) ) / &
376	( dy * dy )
377	DO k = 0, nz-1
378	a = -1.0 * ddzu(k+2) * ddzw(k+1)
379	c = -1.0 * ddzu(k+1) * ddzw(k+1)
380	tri(1,i,k) = a + c - ll(i)
381	IF ( i >= 1 .and. i < nnxh ) THEN
382	tri(1,nx+1-i,k) = tri(1,i,k)
383	ENDIF
384	ENDDO
385	ENDDO
386	#endif
387	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
388	DO i = nxl_z, nxr_z
389	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
390	ENDDO
391	ENDIF
392	IF ( ibc_p_t == 1 ) THEN
393	DO i = nxl_z, nxr_z
394	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
395	ENDDO
396	ENDIF
397
398	END SUBROUTINE maketri
399
400
401	SUBROUTINE substi( ar, ar1, tri, j )
402
403	!------------------------------------------------------------------------------!
404	! Substitution (Forward and Backward) (Thomas algorithm)
405	!------------------------------------------------------------------------------!
406
407	USE control_parameters
408
409	IMPLICIT NONE
410
411	INTEGER :: i, j, k
412	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
413	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
414	#if defined( __parallel )
415	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
416	#else
417	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
418	#endif
419
420	!
421	!-- Forward substitution.
422	DO i = nxl_z, nxr_z
423	#if defined( __parallel )
424	ar1(i,0) = ar(i,j,1)
425	#else
426	ar1(i,0) = ar(1,j,i)
427	#endif
428	ENDDO
429	DO k = 1, nz - 1
430	DO i = nxl_z, nxr_z
431	#if defined( __parallel )
432	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
433	#else
434	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
435	#endif
436	ENDDO
437	ENDDO
438
439	!
440	!-- Backward substitution.
441	DO i = nxl_z, nxr_z
442	#if defined( __parallel )
443	ar(i,j,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
444	#else
445	ar(nz,j,i) = ar1(i,nz-1) / tri(4,i,nz-1)
446	#endif
447	ENDDO
448	DO k = nz-2, 0, -1
449	DO i = nxl_z, nxr_z
450	#if defined( __parallel )
451	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
452	/ tri(4,i,k)
453	#else
454	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
455	/ tri(4,i,k)
456	#endif
457	ENDDO
458	ENDDO
459
460	!
461	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
462	!-- The respective values of ar should be zero at all k-levels if
463	!-- acceleration of horizontally averaged vertical velocity is zero.
464	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
465	IF ( j == 0 .AND. nxl_z == 0 ) THEN
466	#if defined( __parallel )
467	DO k = 1, nz
468	ar(nxl_z,j,k) = 0.0
469	ENDDO
470	#else
471	DO k = 1, nz
472	ar(k,j,nxl_z) = 0.0
473	ENDDO
474	#endif
475	ENDIF
476	ENDIF
477
478	END SUBROUTINE substi
479
480
481	SUBROUTINE split( tri )
482
483	!------------------------------------------------------------------------------!
484	! Splitting of the tridiagonal matrix (Thomas algorithm)
485	!------------------------------------------------------------------------------!
486
487	IMPLICIT NONE
488
489	INTEGER :: i, k
490	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
491
492	!
493	!-- Splitting.
494	DO i = nxl_z, nxr_z
495	tri(4,i,0) = tri(1,i,0)
496	ENDDO
497	DO k = 1, nz-1
498	DO i = nxl_z, nxr_z
499	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
500	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
501	ENDDO
502	ENDDO
503
504	END SUBROUTINE split
505
506	END SUBROUTINE tridia
507
508
509	#if defined( __parallel )
510	SUBROUTINE fftxp( ar, direction )
511
512	!------------------------------------------------------------------------------!
513	! Fourier-transformation along x-direction Parallelized version
514	!------------------------------------------------------------------------------!
515
516	IMPLICIT NONE
517
518	CHARACTER (LEN=*) :: direction
519	INTEGER :: j, k
520	REAL :: ar(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa)
521
522	!
523	!-- Performing the fft with one of the methods implemented
524	DO k = nzb_x, nzt_x
525	DO j = nys_x, nyn_x
526	CALL fft_x( ar(0:nx,j,k), direction )
527	ENDDO
528	ENDDO
529
530	END SUBROUTINE fftxp
531
532	#else
533	SUBROUTINE fftx( ar, direction )
534
535	!------------------------------------------------------------------------------!
536	! Fourier-transformation along x-direction Non parallel version
537	!------------------------------------------------------------------------------!
538
539	IMPLICIT NONE
540
541	CHARACTER (LEN=*) :: direction
542	INTEGER :: i, j, k
543	REAL :: ar(1:nz,0:ny,0:nx)
544
545	!
546	!-- Performing the fft with one of the methods implemented
547	DO k = 1, nz
548	DO j = 0, ny
549	CALL fft_x( ar(k,j,0:nx), direction )
550	ENDDO
551	ENDDO
552
553	END SUBROUTINE fftx
554	#endif
555
556
557	#if defined( __parallel )
558	SUBROUTINE fftyp( ar, direction )
559
560	!------------------------------------------------------------------------------!
561	! Fourier-transformation along y-direction Parallelized version
562	!------------------------------------------------------------------------------!
563
564	IMPLICIT NONE
565
566	CHARACTER (LEN=*) :: direction
567	INTEGER :: i, k
568	REAL :: ar(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya)
569
570	!
571	!-- Performing the fft with one of the methods implemented
572	DO k = nzb_y, nzt_y
573	DO i = nxl_y, nxr_y
574	CALL fft_y( ar(0:ny,i,k), direction )
575	ENDDO
576	ENDDO
577
578	END SUBROUTINE fftyp
579
580	#else
581	SUBROUTINE ffty( ar, direction )
582
583	!------------------------------------------------------------------------------!
584	! Fourier-transformation along y-direction Non parallel version
585	!------------------------------------------------------------------------------!
586
587	IMPLICIT NONE
588
589	CHARACTER (LEN=*) :: direction
590	INTEGER :: i, k
591	REAL :: ar(1:nz,0:ny,0:nx)
592
593	!
594	!-- Performing the fft with one of the methods implemented
595	DO k = 1, nz
596	DO i = 0, nx
597	CALL fft_y( ar(k,0:ny,i), direction )
598	ENDDO
599	ENDDO
600
601	END SUBROUTINE ffty
602	#endif
603
604	#if defined( __parallel )
605	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
606
607	!------------------------------------------------------------------------------!
608	! Fourier-transformation along y with subsequent transposition y --> x for
609	! a 1d-decomposition along x
610	!
611	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
612	! if the first index of work_ffty_vec is odd. Otherwise
613	! memory bank conflicts may occur (especially if the index is a
614	! multiple of 128). That's why work_ffty_vec is dimensioned as
615	! 0:ny+1.
616	! Of course, this will not work if users are using an odd number
617	! of gridpoints along y.
618	!------------------------------------------------------------------------------!
619
620	USE control_parameters
621	USE cpulog
622	USE indices
623	USE interfaces
624	USE pegrid
625	USE transpose_indices
626
627	IMPLICIT NONE
628
629	INTEGER :: i, iend, iouter, ir, j, k
630	INTEGER, PARAMETER :: stridex = 4
631
632	REAL, DIMENSION(0:ny,stridex) :: work_ffty
633	#if defined( __nec )
634	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
635	#endif
636	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_in
637	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_out
638	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
639
640	!
641	!-- Carry out the FFT along y, where all data are present due to the
642	!-- 1d-decomposition along x. Resort the data in a way that x becomes
643	!-- the first index.
644	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
645
646	IF ( host(1:3) == 'nec' ) THEN
647	#if defined( __nec )
648	!
649	!-- Code optimized for vector processors
650	!$OMP PARALLEL PRIVATE ( i, j, k )
651	!$OMP DO
652	DO i = nxl, nxr
653
654	DO j = 0, ny
655	DO k = 1, nz
656	work_ffty_vec(j,k,i) = f_in(k,j,i)
657	ENDDO
658	ENDDO
659
660	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
661
662	ENDDO
663
664	!$OMP DO
665	DO k = 1, nz
666	DO j = 0, ny
667	DO i = nxl, nxr
668	work(i,k,j) = work_ffty_vec(j,k,i)
669	ENDDO
670	ENDDO
671	ENDDO
672	!$OMP END PARALLEL
673	#endif
674
675	ELSE
676
677	!
678	!-- Cache optimized code.
679	!-- The i-(x-)direction is split into a strided outer loop and an inner
680	!-- loop for better cache performance
681	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
682	!$OMP DO
683	DO iouter = nxl, nxr, stridex
684
685	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
686
687	DO k = 1, nz
688
689	DO i = iouter, iend
690
691	ir = i-iouter+1 ! counter within a stride
692	DO j = 0, ny
693	work_ffty(j,ir) = f_in(k,j,i)
694	ENDDO
695	!
696	!-- FFT along y
697	CALL fft_y( work_ffty(:,ir), 'forward' )
698
699	ENDDO
700
701	!
702	!-- Resort
703	DO j = 0, ny
704	DO i = iouter, iend
705	work(i,k,j) = work_ffty(j,i-iouter+1)
706	ENDDO
707	ENDDO
708
709	ENDDO
710
711	ENDDO
712	!$OMP END PARALLEL
713
714	ENDIF
715	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
716
717	!
718	!-- Transpose array
719	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
720	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
721	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
722	comm1dx, ierr )
723	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
724
725	END SUBROUTINE ffty_tr_yx
726
727
728	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
729
730	!------------------------------------------------------------------------------!
731	! Transposition x --> y with a subsequent backward Fourier transformation for
732	! a 1d-decomposition along x
733	!------------------------------------------------------------------------------!
734
735	USE control_parameters
736	USE cpulog
737	USE indices
738	USE interfaces
739	USE pegrid
740	USE transpose_indices
741
742	IMPLICIT NONE
743
744	INTEGER :: i, iend, iouter, ir, j, k
745	INTEGER, PARAMETER :: stridex = 4
746
747	REAL, DIMENSION(0:ny,stridex) :: work_ffty
748	#if defined( __nec )
749	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
750	#endif
751	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_in
752	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_out
753	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
754
755	!
756	!-- Transpose array
757	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
758	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
759	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
760	comm1dx, ierr )
761	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
762
763	!
764	!-- Resort the data in a way that y becomes the first index and carry out the
765	!-- backward fft along y.
766	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
767
768	IF ( host(1:3) == 'nec' ) THEN
769	#if defined( __nec )
770	!
771	!-- Code optimized for vector processors
772	!$OMP PARALLEL PRIVATE ( i, j, k )
773	!$OMP DO
774	DO k = 1, nz
775	DO j = 0, ny
776	DO i = nxl, nxr
777	work_ffty_vec(j,k,i) = work(i,k,j)
778	ENDDO
779	ENDDO
780	ENDDO
781
782	!$OMP DO
783	DO i = nxl, nxr
784
785	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
786
787	DO j = 0, ny
788	DO k = 1, nz
789	f_out(k,j,i) = work_ffty_vec(j,k,i)
790	ENDDO
791	ENDDO
792
793	ENDDO
794	!$OMP END PARALLEL
795	#endif
796
797	ELSE
798
799	!
800	!-- Cache optimized code.
801	!-- The i-(x-)direction is split into a strided outer loop and an inner
802	!-- loop for better cache performance
803	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
804	!$OMP DO
805	DO iouter = nxl, nxr, stridex
806
807	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
808
809	DO k = 1, nz
810	!
811	!-- Resort
812	DO j = 0, ny
813	DO i = iouter, iend
814	work_ffty(j,i-iouter+1) = work(i,k,j)
815	ENDDO
816	ENDDO
817
818	DO i = iouter, iend
819
820	!
821	!-- FFT along y
822	ir = i-iouter+1 ! counter within a stride
823	CALL fft_y( work_ffty(:,ir), 'backward' )
824
825	DO j = 0, ny
826	f_out(k,j,i) = work_ffty(j,ir)
827	ENDDO
828	ENDDO
829
830	ENDDO
831
832	ENDDO
833	!$OMP END PARALLEL
834
835	ENDIF
836
837	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
838
839	END SUBROUTINE tr_xy_ffty
840
841
842	SUBROUTINE fftx_tri_fftx( ar )
843
844	!------------------------------------------------------------------------------!
845	! FFT along x, solution of the tridiagonal system and backward FFT for
846	! a 1d-decomposition along x
847	!
848	! WARNING: this subroutine may still not work for hybrid parallelization
849	! with OpenMP (for possible necessary changes see the original
850	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
851	!------------------------------------------------------------------------------!
852
853	USE control_parameters
854	USE cpulog
855	USE grid_variables
856	USE indices
857	USE interfaces
858	USE pegrid
859	USE transpose_indices
860
861	IMPLICIT NONE
862
863	character(len=3) :: myth_char
864
865	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
866
867	REAL, DIMENSION(0:nx) :: work_fftx
868	REAL, DIMENSION(0:nx,1:nz) :: work_trix
869	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: ar
870	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
871
872
873	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
874
875	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
876
877	tn = 0 ! Default thread number in case of one thread
878	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
879	DO j = nys_x, nyn_x
880
881	!$ tn = omp_get_thread_num()
882
883	IF ( host(1:3) == 'nec' ) THEN
884	!
885	!-- Code optimized for vector processors
886	DO k = 1, nz
887
888	m = 0
889	DO n = 1, pdims(1)
890	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
891	work_trix(m,k) = ar(i,k,j,n)
892	m = m + 1
893	ENDDO
894	ENDDO
895
896	ENDDO
897
898	CALL fft_x_m( work_trix, 'forward' )
899
900	ELSE
901	!
902	!-- Cache optimized code
903	DO k = 1, nz
904
905	m = 0
906	DO n = 1, pdims(1)
907	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
908	work_fftx(m) = ar(i,k,j,n)
909	m = m + 1
910	ENDDO
911	ENDDO
912
913	CALL fft_x( work_fftx, 'forward' )
914
915	DO i = 0, nx
916	work_trix(i,k) = work_fftx(i)
917	ENDDO
918
919	ENDDO
920
921	ENDIF
922
923	!
924	!-- Solve the linear equation system
925	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
926
927	IF ( host(1:3) == 'nec' ) THEN
928	!
929	!-- Code optimized for vector processors
930	CALL fft_x_m( work_trix, 'backward' )
931
932	DO k = 1, nz
933
934	m = 0
935	DO n = 1, pdims(1)
936	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
937	ar(i,k,j,n) = work_trix(m,k)
938	m = m + 1
939	ENDDO
940	ENDDO
941
942	ENDDO
943
944	ELSE
945	!
946	!-- Cache optimized code
947	DO k = 1, nz
948
949	DO i = 0, nx
950	work_fftx(i) = work_trix(i,k)
951	ENDDO
952
953	CALL fft_x( work_fftx, 'backward' )
954
955	m = 0
956	DO n = 1, pdims(1)
957	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
958	ar(i,k,j,n) = work_fftx(m)
959	m = m + 1
960	ENDDO
961	ENDDO
962
963	ENDDO
964
965	ENDIF
966
967	ENDDO
968
969	DEALLOCATE( tri )
970
971	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
972
973	END SUBROUTINE fftx_tri_fftx
974
975
976	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
977
978	!------------------------------------------------------------------------------!
979	! Fourier-transformation along x with subsequent transposition x --> y for
980	! a 1d-decomposition along y
981	!
982	! ATTENTION: The NEC-branch of this routine may significantly profit from
983	! further optimizations. So far, performance is much worse than
984	! for routine ffty_tr_yx (more than three times slower).
985	!------------------------------------------------------------------------------!
986
987	USE control_parameters
988	USE cpulog
989	USE indices
990	USE interfaces
991	USE pegrid
992	USE transpose_indices
993
994	IMPLICIT NONE
995
996	INTEGER :: i, j, k
997
998	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
999	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_in
1000	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_out
1001	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1002
1003	!
1004	!-- Carry out the FFT along x, where all data are present due to the
1005	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1006	!-- the first index.
1007	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
1008
1009	IF ( host(1:3) == 'nec' ) THEN
1010	!
1011	!-- Code for vector processors
1012	!$OMP PARALLEL PRIVATE ( i, j, k )
1013	!$OMP DO
1014	DO i = 0, nx
1015
1016	DO j = nys, nyn
1017	DO k = 1, nz
1018	work_fftx(i,k,j) = f_in(k,j,i)
1019	ENDDO
1020	ENDDO
1021
1022	ENDDO
1023
1024	!$OMP DO
1025	DO j = nys, nyn
1026
1027	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1028
1029	DO k = 1, nz
1030	DO i = 0, nx
1031	work(j,k,i) = work_fftx(i,k,j)
1032	ENDDO
1033	ENDDO
1034
1035	ENDDO
1036	!$OMP END PARALLEL
1037
1038	ELSE
1039
1040	!
1041	!-- Cache optimized code (there might be still a potential for better
1042	!-- optimization).
1043	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1044	!$OMP DO
1045	DO i = 0, nx
1046
1047	DO j = nys, nyn
1048	DO k = 1, nz
1049	work_fftx(i,k,j) = f_in(k,j,i)
1050	ENDDO
1051	ENDDO
1052
1053	ENDDO
1054
1055	!$OMP DO
1056	DO j = nys, nyn
1057	DO k = 1, nz
1058
1059	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1060
1061	DO i = 0, nx
1062	work(j,k,i) = work_fftx(i,k,j)
1063	ENDDO
1064	ENDDO
1065
1066	ENDDO
1067	!$OMP END PARALLEL
1068
1069	ENDIF
1070	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1071
1072	!
1073	!-- Transpose array
1074	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1075	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1076	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1077	comm1dy, ierr )
1078	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1079
1080	END SUBROUTINE fftx_tr_xy
1081
1082
1083	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1084
1085	!------------------------------------------------------------------------------!
1086	! Transposition y --> x with a subsequent backward Fourier transformation for
1087	! a 1d-decomposition along x
1088	!------------------------------------------------------------------------------!
1089
1090	USE control_parameters
1091	USE cpulog
1092	USE indices
1093	USE interfaces
1094	USE pegrid
1095	USE transpose_indices
1096
1097	IMPLICIT NONE
1098
1099	INTEGER :: i, j, k
1100
1101	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1102	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_in
1103	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_out
1104	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1105
1106	!
1107	!-- Transpose array
1108	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1109	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1110	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1111	comm1dy, ierr )
1112	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1113
1114	!
1115	!-- Carry out the FFT along x, where all data are present due to the
1116	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1117	!-- the first index.
1118	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1119
1120	IF ( host(1:3) == 'nec' ) THEN
1121	!
1122	!-- Code optimized for vector processors
1123	!$OMP PARALLEL PRIVATE ( i, j, k )
1124	!$OMP DO
1125	DO j = nys, nyn
1126
1127	DO k = 1, nz
1128	DO i = 0, nx
1129	work_fftx(i,k,j) = work(j,k,i)
1130	ENDDO
1131	ENDDO
1132
1133	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1134
1135	ENDDO
1136
1137	!$OMP DO
1138	DO i = 0, nx
1139	DO j = nys, nyn
1140	DO k = 1, nz
1141	f_out(k,j,i) = work_fftx(i,k,j)
1142	ENDDO
1143	ENDDO
1144	ENDDO
1145	!$OMP END PARALLEL
1146
1147	ELSE
1148
1149	!
1150	!-- Cache optimized code (there might be still a potential for better
1151	!-- optimization).
1152	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1153	!$OMP DO
1154	DO j = nys, nyn
1155	DO k = 1, nz
1156
1157	DO i = 0, nx
1158	work_fftx(i,k,j) = work(j,k,i)
1159	ENDDO
1160
1161	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1162
1163	ENDDO
1164	ENDDO
1165
1166	!$OMP DO
1167	DO i = 0, nx
1168	DO j = nys, nyn
1169	DO k = 1, nz
1170	f_out(k,j,i) = work_fftx(i,k,j)
1171	ENDDO
1172	ENDDO
1173	ENDDO
1174	!$OMP END PARALLEL
1175
1176	ENDIF
1177	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1178
1179	END SUBROUTINE tr_yx_fftx
1180
1181
1182	SUBROUTINE ffty_tri_ffty( ar )
1183
1184	!------------------------------------------------------------------------------!
1185	! FFT along y, solution of the tridiagonal system and backward FFT for
1186	! a 1d-decomposition along y
1187	!
1188	! WARNING: this subroutine may still not work for hybrid parallelization
1189	! with OpenMP (for possible necessary changes see the original
1190	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1191	!------------------------------------------------------------------------------!
1192
1193	USE control_parameters
1194	USE cpulog
1195	USE grid_variables
1196	USE indices
1197	USE interfaces
1198	USE pegrid
1199	USE transpose_indices
1200
1201	IMPLICIT NONE
1202
1203	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1204
1205	REAL, DIMENSION(0:ny) :: work_ffty
1206	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1207	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: ar
1208	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1209
1210
1211	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1212
1213	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1214
1215	tn = 0 ! Default thread number in case of one thread
1216	!$OMP PARALLEL PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1217	!$OMP DO
1218	DO i = nxl_y, nxr_y
1219
1220	!$ tn = omp_get_thread_num()
1221
1222	IF ( host(1:3) == 'nec' ) THEN
1223	!
1224	!-- Code optimized for vector processors
1225	DO k = 1, nz
1226
1227	m = 0
1228	DO n = 1, pdims(2)
1229	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1230	work_triy(m,k) = ar(j,k,i,n)
1231	m = m + 1
1232	ENDDO
1233	ENDDO
1234
1235	ENDDO
1236
1237	CALL fft_y_m( work_triy, ny, 'forward' )
1238
1239	ELSE
1240	!
1241	!-- Cache optimized code
1242	DO k = 1, nz
1243
1244	m = 0
1245	DO n = 1, pdims(2)
1246	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1247	work_ffty(m) = ar(j,k,i,n)
1248	m = m + 1
1249	ENDDO
1250	ENDDO
1251
1252	CALL fft_y( work_ffty, 'forward' )
1253
1254	DO j = 0, ny
1255	work_triy(j,k) = work_ffty(j)
1256	ENDDO
1257
1258	ENDDO
1259
1260	ENDIF
1261
1262	!
1263	!-- Solve the linear equation system
1264	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1265
1266	IF ( host(1:3) == 'nec' ) THEN
1267	!
1268	!-- Code optimized for vector processors
1269	CALL fft_y_m( work_triy, ny, 'backward' )
1270
1271	DO k = 1, nz
1272
1273	m = 0
1274	DO n = 1, pdims(2)
1275	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1276	ar(j,k,i,n) = work_triy(m,k)
1277	m = m + 1
1278	ENDDO
1279	ENDDO
1280
1281	ENDDO
1282
1283	ELSE
1284	!
1285	!-- Cache optimized code
1286	DO k = 1, nz
1287
1288	DO j = 0, ny
1289	work_ffty(j) = work_triy(j,k)
1290	ENDDO
1291
1292	CALL fft_y( work_ffty, 'backward' )
1293
1294	m = 0
1295	DO n = 1, pdims(2)
1296	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1297	ar(j,k,i,n) = work_ffty(m)
1298	m = m + 1
1299	ENDDO
1300	ENDDO
1301
1302	ENDDO
1303
1304	ENDIF
1305
1306	ENDDO
1307	!$OMP END PARALLEL
1308
1309	DEALLOCATE( tri )
1310
1311	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1312
1313	END SUBROUTINE ffty_tri_ffty
1314
1315
1316	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1317
1318	!------------------------------------------------------------------------------!
1319	! Solves the linear system of equations for a 1d-decomposition along x (see
1320	! tridia)
1321	!
1322	! Attention: when using the intel compiler, array tri must be passed as an
1323	! argument to the contained subroutines. Otherwise addres faults
1324	! will occur.
1325	! On NEC, tri should not be passed (except for routine substi_1dd)
1326	! because this causes very bad performance.
1327	!------------------------------------------------------------------------------!
1328
1329	USE arrays_3d
1330	USE control_parameters
1331
1332	USE pegrid
1333
1334	IMPLICIT NONE
1335
1336	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1337
1338	REAL :: ddx2, ddy2
1339
1340	REAL, DIMENSION(0:nx,1:nz) :: ar
1341	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1342	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1343
1344
1345	nnyh = ( ny + 1 ) / 2
1346
1347	!
1348	!-- Define constant elements of the tridiagonal matrix.
1349	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1350	!-- the exchanged loops create bank conflicts. The following directive
1351	!-- prohibits loop exchange and the loops perform much better.
1352	! tn = omp_get_thread_num()
1353	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1354	! CALL local_flush( 120+tn )
1355	!CDIR NOLOOPCHG
1356	DO k = 0, nz-1
1357	DO i = 0,nx
1358	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
1359	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
1360	ENDDO
1361	ENDDO
1362	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1363	! CALL local_flush( 120+tn )
1364
1365	IF ( j <= nnyh ) THEN
1366	#if defined( __lc )
1367	CALL maketri_1dd( j, tri )
1368	#else
1369	CALL maketri_1dd( j )
1370	#endif
1371	ELSE
1372	#if defined( __lc )
1373	CALL maketri_1dd( ny+1-j, tri )
1374	#else
1375	CALL maketri_1dd( ny+1-j )
1376	#endif
1377	ENDIF
1378	#if defined( __lc )
1379	CALL split_1dd( tri )
1380	#else
1381	CALL split_1dd
1382	#endif
1383	CALL substi_1dd( ar, tri )
1384
1385	CONTAINS
1386
1387	#if defined( __lc )
1388	SUBROUTINE maketri_1dd( j, tri )
1389	#else
1390	SUBROUTINE maketri_1dd( j )
1391	#endif
1392
1393	!------------------------------------------------------------------------------!
1394	! computes the i- and j-dependent component of the matrix
1395	!------------------------------------------------------------------------------!
1396
1397	USE constants
1398
1399	IMPLICIT NONE
1400
1401	INTEGER :: i, j, k, nnxh
1402	REAL :: a, c
1403
1404	REAL, DIMENSION(0:nx) :: l
1405
1406	#if defined( __lc )
1407	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1408	#endif
1409
1410
1411	nnxh = ( nx + 1 ) / 2
1412	!
1413	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1414	!-- Fourier space. The coefficients are computed following the method of
1415	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1416	!-- Siano's original version by discretizing the Poisson equation,
1417	!-- before it is Fourier-transformed
1418	DO i = 0, nx
1419	IF ( i >= 0 .AND. i <= nnxh ) THEN
1420	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1421	FLOAT( nx+1 ) ) ) * ddx2 + &
1422	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1423	FLOAT( ny+1 ) ) ) * ddy2
1424	ELSE
1425	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1426	FLOAT( nx+1 ) ) ) * ddx2 + &
1427	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1428	FLOAT( ny+1 ) ) ) * ddy2
1429	ENDIF
1430	ENDDO
1431
1432	DO k = 0, nz-1
1433	DO i = 0, nx
1434	a = -1.0 * ddzu(k+2) * ddzw(k+1)
1435	c = -1.0 * ddzu(k+1) * ddzw(k+1)
1436	tri(1,i,k) = a + c - l(i)
1437	ENDDO
1438	ENDDO
1439	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1440	DO i = 0, nx
1441	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1442	ENDDO
1443	ENDIF
1444	IF ( ibc_p_t == 1 ) THEN
1445	DO i = 0, nx
1446	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1447	ENDDO
1448	ENDIF
1449
1450	END SUBROUTINE maketri_1dd
1451
1452
1453	#if defined( __lc )
1454	SUBROUTINE split_1dd( tri )
1455	#else
1456	SUBROUTINE split_1dd
1457	#endif
1458
1459	!------------------------------------------------------------------------------!
1460	! Splitting of the tridiagonal matrix (Thomas algorithm)
1461	!------------------------------------------------------------------------------!
1462
1463	IMPLICIT NONE
1464
1465	INTEGER :: i, k
1466
1467	#if defined( __lc )
1468	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1469	#endif
1470
1471
1472	!
1473	!-- Splitting
1474	DO i = 0, nx
1475	tri(4,i,0) = tri(1,i,0)
1476	ENDDO
1477	DO k = 1, nz-1
1478	DO i = 0, nx
1479	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1480	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1481	ENDDO
1482	ENDDO
1483
1484	END SUBROUTINE split_1dd
1485
1486
1487	SUBROUTINE substi_1dd( ar, tri )
1488
1489	!------------------------------------------------------------------------------!
1490	! Substitution (Forward and Backward) (Thomas algorithm)
1491	!------------------------------------------------------------------------------!
1492
1493	IMPLICIT NONE
1494
1495	INTEGER :: i, k
1496
1497	REAL, DIMENSION(0:nx,nz) :: ar
1498	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1499	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1500
1501	!
1502	!-- Forward substitution
1503	DO i = 0, nx
1504	ar1(i,0) = ar(i,1)
1505	ENDDO
1506	DO k = 1, nz-1
1507	DO i = 0, nx
1508	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1509	ENDDO
1510	ENDDO
1511
1512	!
1513	!-- Backward substitution
1514	DO i = 0, nx
1515	ar(i,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
1516	ENDDO
1517	DO k = nz-2, 0, -1
1518	DO i = 0, nx
1519	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1520	/ tri(4,i,k)
1521	ENDDO
1522	ENDDO
1523
1524	!
1525	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
1526	!-- The respective values of ar should be zero at all k-levels if
1527	!-- acceleration of horizontally averaged vertical velocity is zero.
1528	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
1529	IF ( j == 0 ) THEN
1530	DO k = 1, nz
1531	ar(0,k) = 0.0
1532	ENDDO
1533	ENDIF
1534	ENDIF
1535
1536	END SUBROUTINE substi_1dd
1537
1538	END SUBROUTINE tridia_1dd
1539
1540	#endif
1541
1542	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |