Home

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 624

Last change on this file since 624 was 623, checked in by raasch, 14 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 44.9 KB

Line
1	MODULE poisfft_mod
2
3	!------------------------------------------------------------------------------!
4	! Current revisions:
5	! -----------------
6	!
7	!
8	! Former revisions:
9	! -----------------
10	! $Id: poisfft.f90 623 2010-12-10 08:52:17Z heinze $
11	!
12	! 622 2010-12-10 08:08:13Z raasch
13	! optional barriers included in order to speed up collective operations
14	!
15	! 377 2009-09-04 11:09:00Z raasch
16	! __lcmuk changed to __lc to avoid problems with Intel compiler on sgi-ice
17	!
18	! 164 2008-05-15 08:46:15Z raasch
19	! Arguments removed from transpose routines
20	!
21	! 128 2007-10-26 13:11:14Z raasch
22	! Bugfix: wavenumber calculation for even nx in routines maketri
23	!
24	! 85 2007-05-11 09:35:14Z raasch
25	! Bugfix: work_fft*_vec removed from some PRIVATE-declarations
26	!
27	! 76 2007-03-29 00:58:32Z raasch
28	! Tridiagonal coefficients adjusted for Neumann boundary conditions both at
29	! the bottom and the top.
30	!
31	! RCS Log replace by Id keyword, revision history cleaned up
32	!
33	! Revision 1.24 2006/08/04 15:00:24 raasch
34	! Default setting of the thread number tn in case of not using OpenMP
35	!
36	! Revision 1.23 2006/02/23 12:48:38 raasch
37	! Additional compiler directive in routine tridia_1dd for preventing loop
38	! exchange on NEC-SX6
39	!
40	! Revision 1.20 2004/04/30 12:38:09 raasch
41	! Parts of former poisfft_hybrid moved to this subroutine,
42	! former subroutine changed to a module, renaming of FFT-subroutines and
43	! -module, FFTs completely substituted by calls of fft_x and fft_y,
44	! NAG fft used in the non-parallel case completely removed, l in maketri
45	! is now a 1d-array, variables passed by modules instead of using parameter
46	! lists, enlarged transposition arrays introduced
47	!
48	! Revision 1.1 1997/07/24 11:24:14 raasch
49	! Initial revision
50	!
51	!
52	! Description:
53	! ------------
54	! See below.
55	!------------------------------------------------------------------------------!
56
57	!--------------------------------------------------------------------------!
58	! poisfft !
59	! !
60	! Original version: Stephan Siano (pois3d) !
61	! !
62	! Institute of Meteorology and Climatology, University of Hannover !
63	! Germany !
64	! !
65	! Version as of July 23,1996 !
66	! !
67	! !
68	! Version for parallel computers: Siegfried Raasch !
69	! !
70	! Version as of July 03,1997 !
71	! !
72	! Solves the Poisson equation with a 2D spectral method !
73	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
74	! !
75	! Input: !
76	! real ar contains in the (nnx,nny,nnz) elements, !
77	! starting from the element (1,nys,nxl), the !
78	! values for s !
79	! real work Temporary array !
80	! !
81	! Output: !
82	! real ar contains the solution for p !
83	!--------------------------------------------------------------------------!
84
85	USE fft_xy
86	USE indices
87	USE transpose_indices
88
89	IMPLICIT NONE
90
91	PRIVATE
92	PUBLIC poisfft, poisfft_init
93
94	INTERFACE poisfft
95	MODULE PROCEDURE poisfft
96	END INTERFACE poisfft
97
98	INTERFACE poisfft_init
99	MODULE PROCEDURE poisfft_init
100	END INTERFACE poisfft_init
101
102	CONTAINS
103
104	SUBROUTINE poisfft_init
105
106	CALL fft_init
107
108	END SUBROUTINE poisfft_init
109
110
111	SUBROUTINE poisfft( ar, work )
112
113	USE cpulog
114	USE interfaces
115	USE pegrid
116
117	IMPLICIT NONE
118
119	REAL, DIMENSION(1:nza,nys:nyna,nxl:nxra) :: ar, work
120
121
122	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
123
124	!
125	!-- Two-dimensional Fourier Transformation in x- and y-direction.
126	#if defined( __parallel )
127	IF ( pdims(2) == 1 ) THEN
128
129	!
130	!-- 1d-domain-decomposition along x:
131	!-- FFT along y and transposition y --> x
132	CALL ffty_tr_yx( ar, work, ar )
133
134	!
135	!-- FFT along x, solving the tridiagonal system and backward FFT
136	CALL fftx_tri_fftx( ar )
137
138	!
139	!-- Transposition x --> y and backward FFT along y
140	CALL tr_xy_ffty( ar, work, ar )
141
142	ELSEIF ( pdims(1) == 1 ) THEN
143
144	!
145	!-- 1d-domain-decomposition along y:
146	!-- FFT along x and transposition x --> y
147	CALL fftx_tr_xy( ar, work, ar )
148
149	!
150	!-- FFT along y, solving the tridiagonal system and backward FFT
151	CALL ffty_tri_ffty( ar )
152
153	!
154	!-- Transposition y --> x and backward FFT along x
155	CALL tr_yx_fftx( ar, work, ar )
156
157	ELSE
158
159	!
160	!-- 2d-domain-decomposition
161	!-- Transposition z --> x
162	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
163	CALL transpose_zx( ar, work, ar )
164	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
165
166	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
167	CALL fftxp( ar, 'forward' )
168	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
169
170	!
171	!-- Transposition x --> y
172	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
173	CALL transpose_xy( ar, work, ar )
174	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
175
176	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
177	CALL fftyp( ar, 'forward' )
178	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
179
180	!
181	!-- Transposition y --> z
182	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
183	CALL transpose_yz( ar, work, ar )
184	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
185
186	!
187	!-- Solve the Poisson equation in z-direction in cartesian space.
188	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
189	CALL tridia( ar )
190	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
191
192	!
193	!-- Inverse Fourier Transformation
194	!-- Transposition z --> y
195	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
196	CALL transpose_zy( ar, work, ar )
197	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
198
199	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
200	CALL fftyp( ar, 'backward' )
201	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
202
203	!
204	!-- Transposition y --> x
205	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
206	CALL transpose_yx( ar, work, ar )
207	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
208
209	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
210	CALL fftxp( ar, 'backward' )
211	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
212
213	!
214	!-- Transposition x --> z
215	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
216	CALL transpose_xz( ar, work, ar )
217	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
218
219	ENDIF
220
221	#else
222
223	!
224	!-- Two-dimensional Fourier Transformation along x- and y-direction.
225	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
226	CALL fftx( ar, 'forward' )
227	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
228	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
229	CALL ffty( ar, 'forward' )
230	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
231
232	!
233	!-- Solve the Poisson equation in z-direction in cartesian space.
234	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
235	CALL tridia( ar )
236	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
237
238	!
239	!-- Inverse Fourier Transformation.
240	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
241	CALL ffty( ar, 'backward' )
242	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
243	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
244	CALL fftx( ar, 'backward' )
245	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
246
247	#endif
248
249	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
250
251	END SUBROUTINE poisfft
252
253
254
255	SUBROUTINE tridia( ar )
256
257	!------------------------------------------------------------------------------!
258	! solves the linear system of equations:
259	!
260	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
261	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
262	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
263	!
264	! by using the Thomas algorithm
265	!------------------------------------------------------------------------------!
266
267	USE arrays_3d
268
269	IMPLICIT NONE
270
271	INTEGER :: i, j, k, nnyh
272
273	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
274	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
275
276	#if defined( __parallel )
277	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
278	#else
279	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
280	#endif
281
282
283	nnyh = (ny+1) / 2
284
285	!
286	!-- Define constant elements of the tridiagonal matrix.
287	DO k = 0, nz-1
288	DO i = nxl_z, nxr_z
289	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
290	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
291	ENDDO
292	ENDDO
293
294	#if defined( __parallel )
295	!
296	!-- Repeat for all y-levels.
297	DO j = nys_z, nyn_z
298	IF ( j <= nnyh ) THEN
299	CALL maketri( tri, j )
300	ELSE
301	CALL maketri( tri, ny+1-j )
302	ENDIF
303	CALL split( tri )
304	CALL substi( ar, ar1, tri, j )
305	ENDDO
306	#else
307	!
308	!-- First y-level.
309	CALL maketri( tri, nys_z )
310	CALL split( tri )
311	CALL substi( ar, ar1, tri, 0 )
312
313	!
314	!-- Further y-levels.
315	DO j = 1, nnyh - 1
316	CALL maketri( tri, j )
317	CALL split( tri )
318	CALL substi( ar, ar1, tri, j )
319	CALL substi( ar, ar1, tri, ny+1-j )
320	ENDDO
321	CALL maketri( tri, nnyh )
322	CALL split( tri )
323	CALL substi( ar, ar1, tri, nnyh+nys )
324	#endif
325
326	CONTAINS
327
328	SUBROUTINE maketri( tri, j )
329
330	!------------------------------------------------------------------------------!
331	! Computes the i- and j-dependent component of the matrix
332	!------------------------------------------------------------------------------!
333
334	USE arrays_3d
335	USE constants
336	USE control_parameters
337	USE grid_variables
338
339	IMPLICIT NONE
340
341	INTEGER :: i, j, k, nnxh
342	REAL :: a, c
343	REAL :: ll(nxl_z:nxr_z)
344	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
345
346
347	nnxh = ( nx + 1 ) / 2
348
349	!
350	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
351	!-- Fourier space. The coefficients are computed following the method of
352	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
353	!-- Siano's original version by discretizing the Poisson equation,
354	!-- before it is Fourier-transformed
355	#if defined( __parallel )
356	DO i = nxl_z, nxr_z
357	IF ( i >= 0 .AND. i <= nnxh ) THEN
358	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
359	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
360	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
361	FLOAT( ny+1 ) ) ) / ( dy * dy )
362	ELSE
363	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
364	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
365	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
366	FLOAT( ny+1 ) ) ) / ( dy * dy )
367	ENDIF
368	DO k = 0,nz-1
369	a = -1.0 * ddzu(k+2) * ddzw(k+1)
370	c = -1.0 * ddzu(k+1) * ddzw(k+1)
371	tri(1,i,k) = a + c - ll(i)
372	ENDDO
373	ENDDO
374	#else
375	DO i = 0, nnxh
376	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / FLOAT( nx+1 ) ) ) / &
377	( dx * dx ) + &
378	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / FLOAT( ny+1 ) ) ) / &
379	( dy * dy )
380	DO k = 0, nz-1
381	a = -1.0 * ddzu(k+2) * ddzw(k+1)
382	c = -1.0 * ddzu(k+1) * ddzw(k+1)
383	tri(1,i,k) = a + c - ll(i)
384	IF ( i >= 1 .and. i < nnxh ) THEN
385	tri(1,nx+1-i,k) = tri(1,i,k)
386	ENDIF
387	ENDDO
388	ENDDO
389	#endif
390	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
391	DO i = nxl_z, nxr_z
392	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
393	ENDDO
394	ENDIF
395	IF ( ibc_p_t == 1 ) THEN
396	DO i = nxl_z, nxr_z
397	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
398	ENDDO
399	ENDIF
400
401	END SUBROUTINE maketri
402
403
404	SUBROUTINE substi( ar, ar1, tri, j )
405
406	!------------------------------------------------------------------------------!
407	! Substitution (Forward and Backward) (Thomas algorithm)
408	!------------------------------------------------------------------------------!
409
410	USE control_parameters
411
412	IMPLICIT NONE
413
414	INTEGER :: i, j, k
415	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
416	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
417	#if defined( __parallel )
418	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
419	#else
420	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
421	#endif
422
423	!
424	!-- Forward substitution.
425	DO i = nxl_z, nxr_z
426	#if defined( __parallel )
427	ar1(i,0) = ar(i,j,1)
428	#else
429	ar1(i,0) = ar(1,j,i)
430	#endif
431	ENDDO
432	DO k = 1, nz - 1
433	DO i = nxl_z, nxr_z
434	#if defined( __parallel )
435	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
436	#else
437	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
438	#endif
439	ENDDO
440	ENDDO
441
442	!
443	!-- Backward substitution.
444	DO i = nxl_z, nxr_z
445	#if defined( __parallel )
446	ar(i,j,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
447	#else
448	ar(nz,j,i) = ar1(i,nz-1) / tri(4,i,nz-1)
449	#endif
450	ENDDO
451	DO k = nz-2, 0, -1
452	DO i = nxl_z, nxr_z
453	#if defined( __parallel )
454	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
455	/ tri(4,i,k)
456	#else
457	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
458	/ tri(4,i,k)
459	#endif
460	ENDDO
461	ENDDO
462
463	!
464	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
465	!-- The respective values of ar should be zero at all k-levels if
466	!-- acceleration of horizontally averaged vertical velocity is zero.
467	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
468	IF ( j == 0 .AND. nxl_z == 0 ) THEN
469	#if defined( __parallel )
470	DO k = 1, nz
471	ar(nxl_z,j,k) = 0.0
472	ENDDO
473	#else
474	DO k = 1, nz
475	ar(k,j,nxl_z) = 0.0
476	ENDDO
477	#endif
478	ENDIF
479	ENDIF
480
481	END SUBROUTINE substi
482
483
484	SUBROUTINE split( tri )
485
486	!------------------------------------------------------------------------------!
487	! Splitting of the tridiagonal matrix (Thomas algorithm)
488	!------------------------------------------------------------------------------!
489
490	IMPLICIT NONE
491
492	INTEGER :: i, k
493	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
494
495	!
496	!-- Splitting.
497	DO i = nxl_z, nxr_z
498	tri(4,i,0) = tri(1,i,0)
499	ENDDO
500	DO k = 1, nz-1
501	DO i = nxl_z, nxr_z
502	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
503	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
504	ENDDO
505	ENDDO
506
507	END SUBROUTINE split
508
509	END SUBROUTINE tridia
510
511
512	#if defined( __parallel )
513	SUBROUTINE fftxp( ar, direction )
514
515	!------------------------------------------------------------------------------!
516	! Fourier-transformation along x-direction Parallelized version
517	!------------------------------------------------------------------------------!
518
519	IMPLICIT NONE
520
521	CHARACTER (LEN=*) :: direction
522	INTEGER :: j, k
523	REAL :: ar(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa)
524
525	!
526	!-- Performing the fft with one of the methods implemented
527	DO k = nzb_x, nzt_x
528	DO j = nys_x, nyn_x
529	CALL fft_x( ar(0:nx,j,k), direction )
530	ENDDO
531	ENDDO
532
533	END SUBROUTINE fftxp
534
535	#else
536	SUBROUTINE fftx( ar, direction )
537
538	!------------------------------------------------------------------------------!
539	! Fourier-transformation along x-direction Non parallel version
540	!------------------------------------------------------------------------------!
541
542	IMPLICIT NONE
543
544	CHARACTER (LEN=*) :: direction
545	INTEGER :: i, j, k
546	REAL :: ar(1:nz,0:ny,0:nx)
547
548	!
549	!-- Performing the fft with one of the methods implemented
550	DO k = 1, nz
551	DO j = 0, ny
552	CALL fft_x( ar(k,j,0:nx), direction )
553	ENDDO
554	ENDDO
555
556	END SUBROUTINE fftx
557	#endif
558
559
560	#if defined( __parallel )
561	SUBROUTINE fftyp( ar, direction )
562
563	!------------------------------------------------------------------------------!
564	! Fourier-transformation along y-direction Parallelized version
565	!------------------------------------------------------------------------------!
566
567	IMPLICIT NONE
568
569	CHARACTER (LEN=*) :: direction
570	INTEGER :: i, k
571	REAL :: ar(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya)
572
573	!
574	!-- Performing the fft with one of the methods implemented
575	DO k = nzb_y, nzt_y
576	DO i = nxl_y, nxr_y
577	CALL fft_y( ar(0:ny,i,k), direction )
578	ENDDO
579	ENDDO
580
581	END SUBROUTINE fftyp
582
583	#else
584	SUBROUTINE ffty( ar, direction )
585
586	!------------------------------------------------------------------------------!
587	! Fourier-transformation along y-direction Non parallel version
588	!------------------------------------------------------------------------------!
589
590	IMPLICIT NONE
591
592	CHARACTER (LEN=*) :: direction
593	INTEGER :: i, k
594	REAL :: ar(1:nz,0:ny,0:nx)
595
596	!
597	!-- Performing the fft with one of the methods implemented
598	DO k = 1, nz
599	DO i = 0, nx
600	CALL fft_y( ar(k,0:ny,i), direction )
601	ENDDO
602	ENDDO
603
604	END SUBROUTINE ffty
605	#endif
606
607	#if defined( __parallel )
608	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
609
610	!------------------------------------------------------------------------------!
611	! Fourier-transformation along y with subsequent transposition y --> x for
612	! a 1d-decomposition along x
613	!
614	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
615	! if the first index of work_ffty_vec is odd. Otherwise
616	! memory bank conflicts may occur (especially if the index is a
617	! multiple of 128). That's why work_ffty_vec is dimensioned as
618	! 0:ny+1.
619	! Of course, this will not work if users are using an odd number
620	! of gridpoints along y.
621	!------------------------------------------------------------------------------!
622
623	USE control_parameters
624	USE cpulog
625	USE indices
626	USE interfaces
627	USE pegrid
628	USE transpose_indices
629
630	IMPLICIT NONE
631
632	INTEGER :: i, iend, iouter, ir, j, k
633	INTEGER, PARAMETER :: stridex = 4
634
635	REAL, DIMENSION(0:ny,stridex) :: work_ffty
636	#if defined( __nec )
637	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
638	#endif
639	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_in
640	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_out
641	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
642
643	!
644	!-- Carry out the FFT along y, where all data are present due to the
645	!-- 1d-decomposition along x. Resort the data in a way that x becomes
646	!-- the first index.
647	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
648
649	IF ( host(1:3) == 'nec' ) THEN
650	#if defined( __nec )
651	!
652	!-- Code optimized for vector processors
653	!$OMP PARALLEL PRIVATE ( i, j, k )
654	!$OMP DO
655	DO i = nxl, nxr
656
657	DO j = 0, ny
658	DO k = 1, nz
659	work_ffty_vec(j,k,i) = f_in(k,j,i)
660	ENDDO
661	ENDDO
662
663	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
664
665	ENDDO
666
667	!$OMP DO
668	DO k = 1, nz
669	DO j = 0, ny
670	DO i = nxl, nxr
671	work(i,k,j) = work_ffty_vec(j,k,i)
672	ENDDO
673	ENDDO
674	ENDDO
675	!$OMP END PARALLEL
676	#endif
677
678	ELSE
679
680	!
681	!-- Cache optimized code.
682	!-- The i-(x-)direction is split into a strided outer loop and an inner
683	!-- loop for better cache performance
684	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
685	!$OMP DO
686	DO iouter = nxl, nxr, stridex
687
688	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
689
690	DO k = 1, nz
691
692	DO i = iouter, iend
693
694	ir = i-iouter+1 ! counter within a stride
695	DO j = 0, ny
696	work_ffty(j,ir) = f_in(k,j,i)
697	ENDDO
698	!
699	!-- FFT along y
700	CALL fft_y( work_ffty(:,ir), 'forward' )
701
702	ENDDO
703
704	!
705	!-- Resort
706	DO j = 0, ny
707	DO i = iouter, iend
708	work(i,k,j) = work_ffty(j,i-iouter+1)
709	ENDDO
710	ENDDO
711
712	ENDDO
713
714	ENDDO
715	!$OMP END PARALLEL
716
717	ENDIF
718	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
719
720	!
721	!-- Transpose array
722	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
723	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
724	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
725	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
726	comm1dx, ierr )
727	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
728
729	END SUBROUTINE ffty_tr_yx
730
731
732	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
733
734	!------------------------------------------------------------------------------!
735	! Transposition x --> y with a subsequent backward Fourier transformation for
736	! a 1d-decomposition along x
737	!------------------------------------------------------------------------------!
738
739	USE control_parameters
740	USE cpulog
741	USE indices
742	USE interfaces
743	USE pegrid
744	USE transpose_indices
745
746	IMPLICIT NONE
747
748	INTEGER :: i, iend, iouter, ir, j, k
749	INTEGER, PARAMETER :: stridex = 4
750
751	REAL, DIMENSION(0:ny,stridex) :: work_ffty
752	#if defined( __nec )
753	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
754	#endif
755	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_in
756	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_out
757	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
758
759	!
760	!-- Transpose array
761	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
762	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
763	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
764	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
765	comm1dx, ierr )
766	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
767
768	!
769	!-- Resort the data in a way that y becomes the first index and carry out the
770	!-- backward fft along y.
771	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
772
773	IF ( host(1:3) == 'nec' ) THEN
774	#if defined( __nec )
775	!
776	!-- Code optimized for vector processors
777	!$OMP PARALLEL PRIVATE ( i, j, k )
778	!$OMP DO
779	DO k = 1, nz
780	DO j = 0, ny
781	DO i = nxl, nxr
782	work_ffty_vec(j,k,i) = work(i,k,j)
783	ENDDO
784	ENDDO
785	ENDDO
786
787	!$OMP DO
788	DO i = nxl, nxr
789
790	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
791
792	DO j = 0, ny
793	DO k = 1, nz
794	f_out(k,j,i) = work_ffty_vec(j,k,i)
795	ENDDO
796	ENDDO
797
798	ENDDO
799	!$OMP END PARALLEL
800	#endif
801
802	ELSE
803
804	!
805	!-- Cache optimized code.
806	!-- The i-(x-)direction is split into a strided outer loop and an inner
807	!-- loop for better cache performance
808	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
809	!$OMP DO
810	DO iouter = nxl, nxr, stridex
811
812	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
813
814	DO k = 1, nz
815	!
816	!-- Resort
817	DO j = 0, ny
818	DO i = iouter, iend
819	work_ffty(j,i-iouter+1) = work(i,k,j)
820	ENDDO
821	ENDDO
822
823	DO i = iouter, iend
824
825	!
826	!-- FFT along y
827	ir = i-iouter+1 ! counter within a stride
828	CALL fft_y( work_ffty(:,ir), 'backward' )
829
830	DO j = 0, ny
831	f_out(k,j,i) = work_ffty(j,ir)
832	ENDDO
833	ENDDO
834
835	ENDDO
836
837	ENDDO
838	!$OMP END PARALLEL
839
840	ENDIF
841
842	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
843
844	END SUBROUTINE tr_xy_ffty
845
846
847	SUBROUTINE fftx_tri_fftx( ar )
848
849	!------------------------------------------------------------------------------!
850	! FFT along x, solution of the tridiagonal system and backward FFT for
851	! a 1d-decomposition along x
852	!
853	! WARNING: this subroutine may still not work for hybrid parallelization
854	! with OpenMP (for possible necessary changes see the original
855	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
856	!------------------------------------------------------------------------------!
857
858	USE control_parameters
859	USE cpulog
860	USE grid_variables
861	USE indices
862	USE interfaces
863	USE pegrid
864	USE transpose_indices
865
866	IMPLICIT NONE
867
868	character(len=3) :: myth_char
869
870	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
871
872	REAL, DIMENSION(0:nx) :: work_fftx
873	REAL, DIMENSION(0:nx,1:nz) :: work_trix
874	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: ar
875	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
876
877
878	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
879
880	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
881
882	tn = 0 ! Default thread number in case of one thread
883	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
884	DO j = nys_x, nyn_x
885
886	!$ tn = omp_get_thread_num()
887
888	IF ( host(1:3) == 'nec' ) THEN
889	!
890	!-- Code optimized for vector processors
891	DO k = 1, nz
892
893	m = 0
894	DO n = 1, pdims(1)
895	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
896	work_trix(m,k) = ar(i,k,j,n)
897	m = m + 1
898	ENDDO
899	ENDDO
900
901	ENDDO
902
903	CALL fft_x_m( work_trix, 'forward' )
904
905	ELSE
906	!
907	!-- Cache optimized code
908	DO k = 1, nz
909
910	m = 0
911	DO n = 1, pdims(1)
912	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
913	work_fftx(m) = ar(i,k,j,n)
914	m = m + 1
915	ENDDO
916	ENDDO
917
918	CALL fft_x( work_fftx, 'forward' )
919
920	DO i = 0, nx
921	work_trix(i,k) = work_fftx(i)
922	ENDDO
923
924	ENDDO
925
926	ENDIF
927
928	!
929	!-- Solve the linear equation system
930	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
931
932	IF ( host(1:3) == 'nec' ) THEN
933	!
934	!-- Code optimized for vector processors
935	CALL fft_x_m( work_trix, 'backward' )
936
937	DO k = 1, nz
938
939	m = 0
940	DO n = 1, pdims(1)
941	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
942	ar(i,k,j,n) = work_trix(m,k)
943	m = m + 1
944	ENDDO
945	ENDDO
946
947	ENDDO
948
949	ELSE
950	!
951	!-- Cache optimized code
952	DO k = 1, nz
953
954	DO i = 0, nx
955	work_fftx(i) = work_trix(i,k)
956	ENDDO
957
958	CALL fft_x( work_fftx, 'backward' )
959
960	m = 0
961	DO n = 1, pdims(1)
962	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
963	ar(i,k,j,n) = work_fftx(m)
964	m = m + 1
965	ENDDO
966	ENDDO
967
968	ENDDO
969
970	ENDIF
971
972	ENDDO
973
974	DEALLOCATE( tri )
975
976	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
977
978	END SUBROUTINE fftx_tri_fftx
979
980
981	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
982
983	!------------------------------------------------------------------------------!
984	! Fourier-transformation along x with subsequent transposition x --> y for
985	! a 1d-decomposition along y
986	!
987	! ATTENTION: The NEC-branch of this routine may significantly profit from
988	! further optimizations. So far, performance is much worse than
989	! for routine ffty_tr_yx (more than three times slower).
990	!------------------------------------------------------------------------------!
991
992	USE control_parameters
993	USE cpulog
994	USE indices
995	USE interfaces
996	USE pegrid
997	USE transpose_indices
998
999	IMPLICIT NONE
1000
1001	INTEGER :: i, j, k
1002
1003	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1004	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_in
1005	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_out
1006	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1007
1008	!
1009	!-- Carry out the FFT along x, where all data are present due to the
1010	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1011	!-- the first index.
1012	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
1013
1014	IF ( host(1:3) == 'nec' ) THEN
1015	!
1016	!-- Code for vector processors
1017	!$OMP PARALLEL PRIVATE ( i, j, k )
1018	!$OMP DO
1019	DO i = 0, nx
1020
1021	DO j = nys, nyn
1022	DO k = 1, nz
1023	work_fftx(i,k,j) = f_in(k,j,i)
1024	ENDDO
1025	ENDDO
1026
1027	ENDDO
1028
1029	!$OMP DO
1030	DO j = nys, nyn
1031
1032	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1033
1034	DO k = 1, nz
1035	DO i = 0, nx
1036	work(j,k,i) = work_fftx(i,k,j)
1037	ENDDO
1038	ENDDO
1039
1040	ENDDO
1041	!$OMP END PARALLEL
1042
1043	ELSE
1044
1045	!
1046	!-- Cache optimized code (there might be still a potential for better
1047	!-- optimization).
1048	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1049	!$OMP DO
1050	DO i = 0, nx
1051
1052	DO j = nys, nyn
1053	DO k = 1, nz
1054	work_fftx(i,k,j) = f_in(k,j,i)
1055	ENDDO
1056	ENDDO
1057
1058	ENDDO
1059
1060	!$OMP DO
1061	DO j = nys, nyn
1062	DO k = 1, nz
1063
1064	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1065
1066	DO i = 0, nx
1067	work(j,k,i) = work_fftx(i,k,j)
1068	ENDDO
1069	ENDDO
1070
1071	ENDDO
1072	!$OMP END PARALLEL
1073
1074	ENDIF
1075	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1076
1077	!
1078	!-- Transpose array
1079	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1080	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1081	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1082	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1083	comm1dy, ierr )
1084	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1085
1086	END SUBROUTINE fftx_tr_xy
1087
1088
1089	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1090
1091	!------------------------------------------------------------------------------!
1092	! Transposition y --> x with a subsequent backward Fourier transformation for
1093	! a 1d-decomposition along x
1094	!------------------------------------------------------------------------------!
1095
1096	USE control_parameters
1097	USE cpulog
1098	USE indices
1099	USE interfaces
1100	USE pegrid
1101	USE transpose_indices
1102
1103	IMPLICIT NONE
1104
1105	INTEGER :: i, j, k
1106
1107	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1108	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_in
1109	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_out
1110	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1111
1112	!
1113	!-- Transpose array
1114	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1115	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1116	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1117	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1118	comm1dy, ierr )
1119	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1120
1121	!
1122	!-- Carry out the FFT along x, where all data are present due to the
1123	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1124	!-- the first index.
1125	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1126
1127	IF ( host(1:3) == 'nec' ) THEN
1128	!
1129	!-- Code optimized for vector processors
1130	!$OMP PARALLEL PRIVATE ( i, j, k )
1131	!$OMP DO
1132	DO j = nys, nyn
1133
1134	DO k = 1, nz
1135	DO i = 0, nx
1136	work_fftx(i,k,j) = work(j,k,i)
1137	ENDDO
1138	ENDDO
1139
1140	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1141
1142	ENDDO
1143
1144	!$OMP DO
1145	DO i = 0, nx
1146	DO j = nys, nyn
1147	DO k = 1, nz
1148	f_out(k,j,i) = work_fftx(i,k,j)
1149	ENDDO
1150	ENDDO
1151	ENDDO
1152	!$OMP END PARALLEL
1153
1154	ELSE
1155
1156	!
1157	!-- Cache optimized code (there might be still a potential for better
1158	!-- optimization).
1159	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1160	!$OMP DO
1161	DO j = nys, nyn
1162	DO k = 1, nz
1163
1164	DO i = 0, nx
1165	work_fftx(i,k,j) = work(j,k,i)
1166	ENDDO
1167
1168	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1169
1170	ENDDO
1171	ENDDO
1172
1173	!$OMP DO
1174	DO i = 0, nx
1175	DO j = nys, nyn
1176	DO k = 1, nz
1177	f_out(k,j,i) = work_fftx(i,k,j)
1178	ENDDO
1179	ENDDO
1180	ENDDO
1181	!$OMP END PARALLEL
1182
1183	ENDIF
1184	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1185
1186	END SUBROUTINE tr_yx_fftx
1187
1188
1189	SUBROUTINE ffty_tri_ffty( ar )
1190
1191	!------------------------------------------------------------------------------!
1192	! FFT along y, solution of the tridiagonal system and backward FFT for
1193	! a 1d-decomposition along y
1194	!
1195	! WARNING: this subroutine may still not work for hybrid parallelization
1196	! with OpenMP (for possible necessary changes see the original
1197	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1198	!------------------------------------------------------------------------------!
1199
1200	USE control_parameters
1201	USE cpulog
1202	USE grid_variables
1203	USE indices
1204	USE interfaces
1205	USE pegrid
1206	USE transpose_indices
1207
1208	IMPLICIT NONE
1209
1210	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1211
1212	REAL, DIMENSION(0:ny) :: work_ffty
1213	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1214	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: ar
1215	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1216
1217
1218	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1219
1220	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1221
1222	tn = 0 ! Default thread number in case of one thread
1223	!$OMP PARALLEL PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1224	!$OMP DO
1225	DO i = nxl_y, nxr_y
1226
1227	!$ tn = omp_get_thread_num()
1228
1229	IF ( host(1:3) == 'nec' ) THEN
1230	!
1231	!-- Code optimized for vector processors
1232	DO k = 1, nz
1233
1234	m = 0
1235	DO n = 1, pdims(2)
1236	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1237	work_triy(m,k) = ar(j,k,i,n)
1238	m = m + 1
1239	ENDDO
1240	ENDDO
1241
1242	ENDDO
1243
1244	CALL fft_y_m( work_triy, ny, 'forward' )
1245
1246	ELSE
1247	!
1248	!-- Cache optimized code
1249	DO k = 1, nz
1250
1251	m = 0
1252	DO n = 1, pdims(2)
1253	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1254	work_ffty(m) = ar(j,k,i,n)
1255	m = m + 1
1256	ENDDO
1257	ENDDO
1258
1259	CALL fft_y( work_ffty, 'forward' )
1260
1261	DO j = 0, ny
1262	work_triy(j,k) = work_ffty(j)
1263	ENDDO
1264
1265	ENDDO
1266
1267	ENDIF
1268
1269	!
1270	!-- Solve the linear equation system
1271	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1272
1273	IF ( host(1:3) == 'nec' ) THEN
1274	!
1275	!-- Code optimized for vector processors
1276	CALL fft_y_m( work_triy, ny, 'backward' )
1277
1278	DO k = 1, nz
1279
1280	m = 0
1281	DO n = 1, pdims(2)
1282	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1283	ar(j,k,i,n) = work_triy(m,k)
1284	m = m + 1
1285	ENDDO
1286	ENDDO
1287
1288	ENDDO
1289
1290	ELSE
1291	!
1292	!-- Cache optimized code
1293	DO k = 1, nz
1294
1295	DO j = 0, ny
1296	work_ffty(j) = work_triy(j,k)
1297	ENDDO
1298
1299	CALL fft_y( work_ffty, 'backward' )
1300
1301	m = 0
1302	DO n = 1, pdims(2)
1303	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1304	ar(j,k,i,n) = work_ffty(m)
1305	m = m + 1
1306	ENDDO
1307	ENDDO
1308
1309	ENDDO
1310
1311	ENDIF
1312
1313	ENDDO
1314	!$OMP END PARALLEL
1315
1316	DEALLOCATE( tri )
1317
1318	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1319
1320	END SUBROUTINE ffty_tri_ffty
1321
1322
1323	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1324
1325	!------------------------------------------------------------------------------!
1326	! Solves the linear system of equations for a 1d-decomposition along x (see
1327	! tridia)
1328	!
1329	! Attention: when using the intel compiler, array tri must be passed as an
1330	! argument to the contained subroutines. Otherwise addres faults
1331	! will occur.
1332	! On NEC, tri should not be passed (except for routine substi_1dd)
1333	! because this causes very bad performance.
1334	!------------------------------------------------------------------------------!
1335
1336	USE arrays_3d
1337	USE control_parameters
1338
1339	USE pegrid
1340
1341	IMPLICIT NONE
1342
1343	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1344
1345	REAL :: ddx2, ddy2
1346
1347	REAL, DIMENSION(0:nx,1:nz) :: ar
1348	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1349	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1350
1351
1352	nnyh = ( ny + 1 ) / 2
1353
1354	!
1355	!-- Define constant elements of the tridiagonal matrix.
1356	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1357	!-- the exchanged loops create bank conflicts. The following directive
1358	!-- prohibits loop exchange and the loops perform much better.
1359	! tn = omp_get_thread_num()
1360	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1361	! CALL local_flush( 120+tn )
1362	!CDIR NOLOOPCHG
1363	DO k = 0, nz-1
1364	DO i = 0,nx
1365	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
1366	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
1367	ENDDO
1368	ENDDO
1369	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1370	! CALL local_flush( 120+tn )
1371
1372	IF ( j <= nnyh ) THEN
1373	#if defined( __lc )
1374	CALL maketri_1dd( j, tri )
1375	#else
1376	CALL maketri_1dd( j )
1377	#endif
1378	ELSE
1379	#if defined( __lc )
1380	CALL maketri_1dd( ny+1-j, tri )
1381	#else
1382	CALL maketri_1dd( ny+1-j )
1383	#endif
1384	ENDIF
1385	#if defined( __lc )
1386	CALL split_1dd( tri )
1387	#else
1388	CALL split_1dd
1389	#endif
1390	CALL substi_1dd( ar, tri )
1391
1392	CONTAINS
1393
1394	#if defined( __lc )
1395	SUBROUTINE maketri_1dd( j, tri )
1396	#else
1397	SUBROUTINE maketri_1dd( j )
1398	#endif
1399
1400	!------------------------------------------------------------------------------!
1401	! computes the i- and j-dependent component of the matrix
1402	!------------------------------------------------------------------------------!
1403
1404	USE constants
1405
1406	IMPLICIT NONE
1407
1408	INTEGER :: i, j, k, nnxh
1409	REAL :: a, c
1410
1411	REAL, DIMENSION(0:nx) :: l
1412
1413	#if defined( __lc )
1414	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1415	#endif
1416
1417
1418	nnxh = ( nx + 1 ) / 2
1419	!
1420	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1421	!-- Fourier space. The coefficients are computed following the method of
1422	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1423	!-- Siano's original version by discretizing the Poisson equation,
1424	!-- before it is Fourier-transformed
1425	DO i = 0, nx
1426	IF ( i >= 0 .AND. i <= nnxh ) THEN
1427	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1428	FLOAT( nx+1 ) ) ) * ddx2 + &
1429	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1430	FLOAT( ny+1 ) ) ) * ddy2
1431	ELSE
1432	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1433	FLOAT( nx+1 ) ) ) * ddx2 + &
1434	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1435	FLOAT( ny+1 ) ) ) * ddy2
1436	ENDIF
1437	ENDDO
1438
1439	DO k = 0, nz-1
1440	DO i = 0, nx
1441	a = -1.0 * ddzu(k+2) * ddzw(k+1)
1442	c = -1.0 * ddzu(k+1) * ddzw(k+1)
1443	tri(1,i,k) = a + c - l(i)
1444	ENDDO
1445	ENDDO
1446	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1447	DO i = 0, nx
1448	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1449	ENDDO
1450	ENDIF
1451	IF ( ibc_p_t == 1 ) THEN
1452	DO i = 0, nx
1453	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1454	ENDDO
1455	ENDIF
1456
1457	END SUBROUTINE maketri_1dd
1458
1459
1460	#if defined( __lc )
1461	SUBROUTINE split_1dd( tri )
1462	#else
1463	SUBROUTINE split_1dd
1464	#endif
1465
1466	!------------------------------------------------------------------------------!
1467	! Splitting of the tridiagonal matrix (Thomas algorithm)
1468	!------------------------------------------------------------------------------!
1469
1470	IMPLICIT NONE
1471
1472	INTEGER :: i, k
1473
1474	#if defined( __lc )
1475	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1476	#endif
1477
1478
1479	!
1480	!-- Splitting
1481	DO i = 0, nx
1482	tri(4,i,0) = tri(1,i,0)
1483	ENDDO
1484	DO k = 1, nz-1
1485	DO i = 0, nx
1486	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1487	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1488	ENDDO
1489	ENDDO
1490
1491	END SUBROUTINE split_1dd
1492
1493
1494	SUBROUTINE substi_1dd( ar, tri )
1495
1496	!------------------------------------------------------------------------------!
1497	! Substitution (Forward and Backward) (Thomas algorithm)
1498	!------------------------------------------------------------------------------!
1499
1500	IMPLICIT NONE
1501
1502	INTEGER :: i, k
1503
1504	REAL, DIMENSION(0:nx,nz) :: ar
1505	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1506	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1507
1508	!
1509	!-- Forward substitution
1510	DO i = 0, nx
1511	ar1(i,0) = ar(i,1)
1512	ENDDO
1513	DO k = 1, nz-1
1514	DO i = 0, nx
1515	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1516	ENDDO
1517	ENDDO
1518
1519	!
1520	!-- Backward substitution
1521	DO i = 0, nx
1522	ar(i,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
1523	ENDDO
1524	DO k = nz-2, 0, -1
1525	DO i = 0, nx
1526	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1527	/ tri(4,i,k)
1528	ENDDO
1529	ENDDO
1530
1531	!
1532	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
1533	!-- The respective values of ar should be zero at all k-levels if
1534	!-- acceleration of horizontally averaged vertical velocity is zero.
1535	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
1536	IF ( j == 0 ) THEN
1537	DO k = 1, nz
1538	ar(0,k) = 0.0
1539	ENDDO
1540	ENDIF
1541	ENDIF
1542
1543	END SUBROUTINE substi_1dd
1544
1545	END SUBROUTINE tridia_1dd
1546
1547	#endif
1548
1549	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |