Home

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 1108

Last change on this file since 1108 was 1107, checked in by raasch, 12 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 45.2 KB

Line
1	MODULE poisfft_mod
2
3	!--------------------------------------------------------------------------------!
4	! This file is part of PALM.
5	!
6	! PALM is free software: you can redistribute it and/or modify it under the terms
7	! of the GNU General Public License as published by the Free Software Foundation,
8	! either version 3 of the License, or (at your option) any later version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2012 Leibniz University Hannover
18	!--------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	!
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: poisfft.f90 1107 2013-03-04 06:23:14Z raasch $
27	!
28	! 1106 2013-03-04 05:31:38Z raasch
29	! routines fftx, ffty, fftxp, fftyp removed, calls replaced by fft_x, fft_y,
30	! in the 1D-decomposition routines fft_x, ffty are replaced by fft_x_1d,
31	! fft_y_1d
32	!
33	! 1103 2013-02-20 02:15:53Z raasch
34	! tri, ar, and ar1 arguments in tridia-routines (2d) are removed because they
35	! sometimes cause segmentation faults with intel 12.1 compiler
36	!
37	! 1092 2013-02-02 11:24:22Z raasch
38	! unused variables removed
39	!
40	! 1036 2012-10-22 13:43:42Z raasch
41	! code put under GPL (PALM 3.9)
42	!
43	! 2012-09-21 07:03:55Z raasch
44	! FLOAT type conversion replaced by REAL
45	!
46	! 1003 2012-09-14 14:35:53Z raasch
47	! indices nxa, nya, etc. replaced by nx, ny, etc.
48	!
49	! 940 2012-07-09 14:31:00Z raasch
50	! special handling of tri-array as an argument in tridia_1dd routines switched
51	! off because it caused segmentation faults with intel 12.1 compiler
52	!
53	! 877 2012-04-03 11:21:44Z suehring
54	! Bugfix: Avoid divisions by zero in case of using a 'neumann' bc for the
55	! pressure at the top of the model domain.
56	!
57	! 809 2012-01-30 13:32:58Z maronga
58	! Bugfix: replaced .AND. and .NOT. with && and ! in the preprocessor directives
59	!
60	! 807 2012-01-25 11:53:51Z maronga
61	! New cpp directive "__check" implemented which is used by check_namelist_files
62	! (most of the code is unneeded by check_namelist_files).
63	!
64	! 763 2011-10-06 09:32:09Z suehring
65	! Comment added concerning the last change.
66	!
67	! 761 2011-10-05 17:58:52Z suehring
68	! Bugfix: Avoid divisions by zero in case of using a 'neumann' bc for the
69	! pressure at the top of the model domain.
70	!
71	! 696 2011-03-18 07:03:49Z raasch
72	! work_fftx removed from PRIVATE clauses in fftx_tr_xy and tr_yx_fftx
73	!
74	! 683 2011-02-09 14:25:15Z raasch
75	! openMP parallelization for 2d-domain-decomposition
76	!
77	! 667 2010-12-23 12:06:00Z suehring/gryschka
78	! ddzu replaced by ddzu_pres due to changes in zu(0)
79	!
80	! 622 2010-12-10 08:08:13Z raasch
81	! optional barriers included in order to speed up collective operations
82	!
83	! 377 2009-09-04 11:09:00Z raasch
84	! __lcmuk changed to __lc to avoid problems with Intel compiler on sgi-ice
85	!
86	! 164 2008-05-15 08:46:15Z raasch
87	! Arguments removed from transpose routines
88	!
89	! 128 2007-10-26 13:11:14Z raasch
90	! Bugfix: wavenumber calculation for even nx in routines maketri
91	!
92	! 85 2007-05-11 09:35:14Z raasch
93	! Bugfix: work_fft*_vec removed from some PRIVATE-declarations
94	!
95	! 76 2007-03-29 00:58:32Z raasch
96	! Tridiagonal coefficients adjusted for Neumann boundary conditions both at
97	! the bottom and the top.
98	!
99	! RCS Log replace by Id keyword, revision history cleaned up
100	!
101	! Revision 1.24 2006/08/04 15:00:24 raasch
102	! Default setting of the thread number tn in case of not using OpenMP
103	!
104	! Revision 1.23 2006/02/23 12:48:38 raasch
105	! Additional compiler directive in routine tridia_1dd for preventing loop
106	! exchange on NEC-SX6
107	!
108	! Revision 1.20 2004/04/30 12:38:09 raasch
109	! Parts of former poisfft_hybrid moved to this subroutine,
110	! former subroutine changed to a module, renaming of FFT-subroutines and
111	! -module, FFTs completely substituted by calls of fft_x and fft_y,
112	! NAG fft used in the non-parallel case completely removed, l in maketri
113	! is now a 1d-array, variables passed by modules instead of using parameter
114	! lists, enlarged transposition arrays introduced
115	!
116	! Revision 1.1 1997/07/24 11:24:14 raasch
117	! Initial revision
118	!
119	!
120	! Description:
121	! ------------
122	! See below.
123	!------------------------------------------------------------------------------!
124
125	!--------------------------------------------------------------------------!
126	! poisfft !
127	! !
128	! Original version: Stephan Siano (pois3d) !
129	! !
130	! Institute of Meteorology and Climatology, University of Hannover !
131	! Germany !
132	! !
133	! Version as of July 23,1996 !
134	! !
135	! !
136	! Version for parallel computers: Siegfried Raasch !
137	! !
138	! Version as of July 03,1997 !
139	! !
140	! Solves the Poisson equation with a 2D spectral method !
141	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
142	! !
143	! Input: !
144	! real ar contains in the (nnx,nny,nnz) elements, !
145	! starting from the element (1,nys,nxl), the !
146	! values for s !
147	! real work Temporary array !
148	! !
149	! Output: !
150	! real ar contains the solution for p !
151	!--------------------------------------------------------------------------!
152
153	USE fft_xy
154	USE indices
155	USE transpose_indices
156
157	IMPLICIT NONE
158
159	PRIVATE
160
161	#if ! defined ( __check )
162	PUBLIC poisfft, poisfft_init
163
164	INTERFACE poisfft
165	MODULE PROCEDURE poisfft
166	END INTERFACE poisfft
167
168	INTERFACE poisfft_init
169	MODULE PROCEDURE poisfft_init
170	END INTERFACE poisfft_init
171	#else
172	PUBLIC poisfft_init
173
174	INTERFACE poisfft_init
175	MODULE PROCEDURE poisfft_init
176	END INTERFACE poisfft_init
177	#endif
178
179	CONTAINS
180
181	SUBROUTINE poisfft_init
182
183	CALL fft_init
184
185	END SUBROUTINE poisfft_init
186
187	#if ! defined ( __check )
188	SUBROUTINE poisfft( ar, work )
189
190	USE cpulog
191	USE interfaces
192	USE pegrid
193
194	IMPLICIT NONE
195
196	REAL, DIMENSION(1:nz,nys:nyn,nxl:nxr) :: ar, work
197
198
199	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
200
201	!
202	!-- Two-dimensional Fourier Transformation in x- and y-direction.
203	#if defined( __parallel )
204	IF ( pdims(2) == 1 ) THEN
205
206	!
207	!-- 1d-domain-decomposition along x:
208	!-- FFT along y and transposition y --> x
209	CALL ffty_tr_yx( ar, work, ar )
210
211	!
212	!-- FFT along x, solving the tridiagonal system and backward FFT
213	CALL fftx_tri_fftx( ar )
214
215	!
216	!-- Transposition x --> y and backward FFT along y
217	CALL tr_xy_ffty( ar, work, ar )
218
219	ELSEIF ( pdims(1) == 1 ) THEN
220
221	!
222	!-- 1d-domain-decomposition along y:
223	!-- FFT along x and transposition x --> y
224	CALL fftx_tr_xy( ar, work, ar )
225
226	!
227	!-- FFT along y, solving the tridiagonal system and backward FFT
228	CALL ffty_tri_ffty( ar )
229
230	!
231	!-- Transposition y --> x and backward FFT along x
232	CALL tr_yx_fftx( ar, work, ar )
233
234	ELSE
235
236	!
237	!-- 2d-domain-decomposition
238	!-- Transposition z --> x
239	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
240	CALL transpose_zx( ar, work, ar )
241	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
242
243	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
244	CALL fft_x( ar, 'forward' )
245	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
246
247	!
248	!-- Transposition x --> y
249	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
250	CALL transpose_xy( ar, work, ar )
251	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
252
253	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
254	CALL fft_y( ar, 'forward' )
255	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
256
257	!
258	!-- Transposition y --> z
259	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
260	CALL transpose_yz( ar, work, ar )
261	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
262
263	!
264	!-- Solve the tridiagonal equation system along z
265	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
266	CALL tridia( ar )
267	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
268
269	!
270	!-- Inverse Fourier Transformation
271	!-- Transposition z --> y
272	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
273	CALL transpose_zy( ar, work, ar )
274	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
275
276	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
277	CALL fft_y( ar, 'backward' )
278	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
279
280	!
281	!-- Transposition y --> x
282	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
283	CALL transpose_yx( ar, work, ar )
284	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
285
286	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
287	CALL fft_x( ar, 'backward' )
288	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
289
290	!
291	!-- Transposition x --> z
292	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
293	CALL transpose_xz( ar, work, ar )
294	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
295
296	ENDIF
297
298	#else
299
300	!
301	!-- Two-dimensional Fourier Transformation along x- and y-direction.
302	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
303	!$acc data copyin( ar, work )
304	CALL transpose_zx( ar, work, ar )
305	!$acc update host( ar )
306	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
307
308
309	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
310	CALL fft_x( ar, 'forward' )
311	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
312
313	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
314	CALL transpose_xy( ar, work, ar )
315	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
316
317	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
318	CALL fft_y( ar, 'forward' )
319	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
320
321	!
322	!-- Solve the tridiagonal equation system along z
323	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
324	CALL transpose_yz( ar, work, ar )
325	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
326
327	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
328	CALL tridia( ar )
329	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
330
331	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
332	CALL transpose_zy( ar, work, ar )
333	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
334
335	!
336	!-- Inverse Fourier Transformation.
337	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
338	CALL fft_y( ar, 'backward' )
339	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
340
341	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
342	CALL transpose_yx( ar, work, ar )
343	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
344
345	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
346	CALL fft_x( ar, 'backward' )
347	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
348
349	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
350	CALL transpose_xz( ar, work, ar )
351	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
352
353	!$acc end data
354
355	#endif
356
357	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
358
359	END SUBROUTINE poisfft
360
361
362
363	SUBROUTINE tridia( ar )
364
365	!------------------------------------------------------------------------------!
366	! solves the linear system of equations:
367	!
368	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
369	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
370	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
371	!
372	! by using the Thomas algorithm
373	!------------------------------------------------------------------------------!
374
375	USE arrays_3d
376
377	IMPLICIT NONE
378
379	INTEGER :: i, j, k, nnyh
380
381	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
382	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
383
384	REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)
385
386
387	nnyh = (ny+1) / 2
388
389	!
390	!-- Define constant elements of the tridiagonal matrix.
391	!$OMP PARALLEL PRIVATE ( k, i )
392	!$OMP DO
393	DO k = 0, nz-1
394	DO i = nxl_z, nxr_z
395	tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
396	tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
397	ENDDO
398	ENDDO
399	!$OMP END PARALLEL
400
401	#if defined( __parallel )
402	!
403	!-- Repeat for all y-levels.
404	!$OMP PARALLEL FIRSTPRIVATE( tri ) PRIVATE ( ar1, j )
405	!$OMP DO
406	DO j = nys_z, nyn_z
407	IF ( j <= nnyh ) THEN
408	CALL maketri( j )
409	ELSE
410	CALL maketri( ny+1-j )
411	ENDIF
412	CALL split
413	CALL substi( j )
414	ENDDO
415	!$OMP END PARALLEL
416	#else
417	!
418	!-- First y-level.
419	CALL maketri( nys_z )
420	CALL split
421	CALL substi( 0 )
422
423	!
424	!-- Further y-levels.
425	DO j = 1, nnyh - 1
426	CALL maketri( j )
427	CALL split
428	CALL substi( j )
429	CALL substi( ny+1-j )
430	ENDDO
431	CALL maketri( nnyh )
432	CALL split
433	CALL substi( nnyh+nys )
434	#endif
435
436	CONTAINS
437
438	SUBROUTINE maketri( j )
439
440	!------------------------------------------------------------------------------!
441	! Computes the i- and j-dependent component of the matrix
442	!------------------------------------------------------------------------------!
443
444	USE arrays_3d
445	USE constants
446	USE control_parameters
447	USE grid_variables
448
449	IMPLICIT NONE
450
451	INTEGER :: i, j, k, nnxh
452	REAL :: a, c
453	REAL :: ll(nxl_z:nxr_z)
454
455
456	nnxh = ( nx + 1 ) / 2
457
458	!
459	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
460	!-- Fourier space. The coefficients are computed following the method of
461	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
462	!-- Siano's original version by discretizing the Poisson equation,
463	!-- before it is Fourier-transformed
464	#if defined( __parallel )
465	DO i = nxl_z, nxr_z
466	IF ( i >= 0 .AND. i <= nnxh ) THEN
467	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
468	REAL( nx+1 ) ) ) / ( dx * dx ) + &
469	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
470	REAL( ny+1 ) ) ) / ( dy * dy )
471	ELSE
472	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
473	REAL( nx+1 ) ) ) / ( dx * dx ) + &
474	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
475	REAL( ny+1 ) ) ) / ( dy * dy )
476	ENDIF
477	DO k = 0,nz-1
478	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
479	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
480	tri(1,i,k) = a + c - ll(i)
481	ENDDO
482	ENDDO
483	#else
484	DO i = 0, nnxh
485	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / REAL( nx+1 ) ) ) / &
486	( dx * dx ) + &
487	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / REAL( ny+1 ) ) ) / &
488	( dy * dy )
489	DO k = 0, nz-1
490	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
491	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
492	tri(1,i,k) = a + c - ll(i)
493	IF ( i >= 1 .and. i < nnxh ) THEN
494	tri(1,nx+1-i,k) = tri(1,i,k)
495	ENDIF
496	ENDDO
497	ENDDO
498	#endif
499	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
500	DO i = nxl_z, nxr_z
501	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
502	ENDDO
503	ENDIF
504	IF ( ibc_p_t == 1 ) THEN
505	DO i = nxl_z, nxr_z
506	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
507	ENDDO
508	ENDIF
509
510	END SUBROUTINE maketri
511
512
513	SUBROUTINE substi( j )
514
515	!------------------------------------------------------------------------------!
516	! Substitution (Forward and Backward) (Thomas algorithm)
517	!------------------------------------------------------------------------------!
518
519	USE control_parameters
520
521	IMPLICIT NONE
522
523	INTEGER :: i, j, k
524
525	!
526	!-- Forward substitution.
527	DO i = nxl_z, nxr_z
528	ar1(i,0) = ar(i,j,1)
529	ENDDO
530	DO k = 1, nz - 1
531	DO i = nxl_z, nxr_z
532	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
533	ENDDO
534	ENDDO
535
536	!
537	!-- Backward substitution
538	!-- Note, the 1.0E-20 in the denominator is due to avoid divisions
539	!-- by zero appearing if the pressure bc is set to neumann at the top of
540	!-- the model domain.
541	DO i = nxl_z, nxr_z
542	ar(i,j,nz) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )
543	ENDDO
544	DO k = nz-2, 0, -1
545	DO i = nxl_z, nxr_z
546	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
547	/ tri(4,i,k)
548	ENDDO
549	ENDDO
550
551	!
552	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
553	!-- The respective values of ar should be zero at all k-levels if
554	!-- acceleration of horizontally averaged vertical velocity is zero.
555	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
556	IF ( j == 0 .AND. nxl_z == 0 ) THEN
557	DO k = 1, nz
558	ar(nxl_z,j,k) = 0.0
559	ENDDO
560	ENDIF
561	ENDIF
562
563	END SUBROUTINE substi
564
565
566	SUBROUTINE split
567
568	!------------------------------------------------------------------------------!
569	! Splitting of the tridiagonal matrix (Thomas algorithm)
570	!------------------------------------------------------------------------------!
571
572	IMPLICIT NONE
573
574	INTEGER :: i, k
575
576	!
577	!-- Splitting.
578	DO i = nxl_z, nxr_z
579	tri(4,i,0) = tri(1,i,0)
580	ENDDO
581	DO k = 1, nz-1
582	DO i = nxl_z, nxr_z
583	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
584	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
585	ENDDO
586	ENDDO
587
588	END SUBROUTINE split
589
590	END SUBROUTINE tridia
591
592
593	#if defined( __parallel )
594	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
595
596	!------------------------------------------------------------------------------!
597	! Fourier-transformation along y with subsequent transposition y --> x for
598	! a 1d-decomposition along x
599	!
600	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
601	! if the first index of work_ffty_vec is odd. Otherwise
602	! memory bank conflicts may occur (especially if the index is a
603	! multiple of 128). That's why work_ffty_vec is dimensioned as
604	! 0:ny+1.
605	! Of course, this will not work if users are using an odd number
606	! of gridpoints along y.
607	!------------------------------------------------------------------------------!
608
609	USE control_parameters
610	USE cpulog
611	USE indices
612	USE interfaces
613	USE pegrid
614	USE transpose_indices
615
616	IMPLICIT NONE
617
618	INTEGER :: i, iend, iouter, ir, j, k
619	INTEGER, PARAMETER :: stridex = 4
620
621	REAL, DIMENSION(0:ny,stridex) :: work_ffty
622	#if defined( __nec )
623	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
624	#endif
625	REAL, DIMENSION(1:nz,0:ny,nxl:nxr) :: f_in
626	REAL, DIMENSION(nnx,1:nz,nys_x:nyn_x,pdims(1)) :: f_out
627	REAL, DIMENSION(nxl:nxr,1:nz,0:ny) :: work
628
629	!
630	!-- Carry out the FFT along y, where all data are present due to the
631	!-- 1d-decomposition along x. Resort the data in a way that x becomes
632	!-- the first index.
633	CALL cpu_log( log_point_s(7), 'fft_y_1d', 'start' )
634
635	IF ( host(1:3) == 'nec' ) THEN
636	#if defined( __nec )
637	!
638	!-- Code optimized for vector processors
639	!$OMP PARALLEL PRIVATE ( i, j, k )
640	!$OMP DO
641	DO i = nxl, nxr
642
643	DO j = 0, ny
644	DO k = 1, nz
645	work_ffty_vec(j,k,i) = f_in(k,j,i)
646	ENDDO
647	ENDDO
648
649	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
650
651	ENDDO
652
653	!$OMP DO
654	DO k = 1, nz
655	DO j = 0, ny
656	DO i = nxl, nxr
657	work(i,k,j) = work_ffty_vec(j,k,i)
658	ENDDO
659	ENDDO
660	ENDDO
661	!$OMP END PARALLEL
662	#endif
663
664	ELSE
665
666	!
667	!-- Cache optimized code.
668	!-- The i-(x-)direction is split into a strided outer loop and an inner
669	!-- loop for better cache performance
670	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
671	!$OMP DO
672	DO iouter = nxl, nxr, stridex
673
674	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
675
676	DO k = 1, nz
677
678	DO i = iouter, iend
679
680	ir = i-iouter+1 ! counter within a stride
681	DO j = 0, ny
682	work_ffty(j,ir) = f_in(k,j,i)
683	ENDDO
684	!
685	!-- FFT along y
686	CALL fft_y_1d( work_ffty(:,ir), 'forward' )
687
688	ENDDO
689
690	!
691	!-- Resort
692	DO j = 0, ny
693	DO i = iouter, iend
694	work(i,k,j) = work_ffty(j,i-iouter+1)
695	ENDDO
696	ENDDO
697
698	ENDDO
699
700	ENDDO
701	!$OMP END PARALLEL
702
703	ENDIF
704	CALL cpu_log( log_point_s(7), 'fft_y_1d', 'pause' )
705
706	!
707	!-- Transpose array
708	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
709	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
710	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
711	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
712	comm1dx, ierr )
713	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
714
715	END SUBROUTINE ffty_tr_yx
716
717
718	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
719
720	!------------------------------------------------------------------------------!
721	! Transposition x --> y with a subsequent backward Fourier transformation for
722	! a 1d-decomposition along x
723	!------------------------------------------------------------------------------!
724
725	USE control_parameters
726	USE cpulog
727	USE indices
728	USE interfaces
729	USE pegrid
730	USE transpose_indices
731
732	IMPLICIT NONE
733
734	INTEGER :: i, iend, iouter, ir, j, k
735	INTEGER, PARAMETER :: stridex = 4
736
737	REAL, DIMENSION(0:ny,stridex) :: work_ffty
738	#if defined( __nec )
739	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
740	#endif
741	REAL, DIMENSION(nnx,1:nz,nys_x:nyn_x,pdims(1)) :: f_in
742	REAL, DIMENSION(1:nz,0:ny,nxl:nxr) :: f_out
743	REAL, DIMENSION(nxl:nxr,1:nz,0:ny) :: work
744
745	!
746	!-- Transpose array
747	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
748	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
749	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
750	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
751	comm1dx, ierr )
752	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
753
754	!
755	!-- Resort the data in a way that y becomes the first index and carry out the
756	!-- backward fft along y.
757	CALL cpu_log( log_point_s(7), 'fft_y_1d', 'continue' )
758
759	IF ( host(1:3) == 'nec' ) THEN
760	#if defined( __nec )
761	!
762	!-- Code optimized for vector processors
763	!$OMP PARALLEL PRIVATE ( i, j, k )
764	!$OMP DO
765	DO k = 1, nz
766	DO j = 0, ny
767	DO i = nxl, nxr
768	work_ffty_vec(j,k,i) = work(i,k,j)
769	ENDDO
770	ENDDO
771	ENDDO
772
773	!$OMP DO
774	DO i = nxl, nxr
775
776	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
777
778	DO j = 0, ny
779	DO k = 1, nz
780	f_out(k,j,i) = work_ffty_vec(j,k,i)
781	ENDDO
782	ENDDO
783
784	ENDDO
785	!$OMP END PARALLEL
786	#endif
787
788	ELSE
789
790	!
791	!-- Cache optimized code.
792	!-- The i-(x-)direction is split into a strided outer loop and an inner
793	!-- loop for better cache performance
794	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
795	!$OMP DO
796	DO iouter = nxl, nxr, stridex
797
798	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
799
800	DO k = 1, nz
801	!
802	!-- Resort
803	DO j = 0, ny
804	DO i = iouter, iend
805	work_ffty(j,i-iouter+1) = work(i,k,j)
806	ENDDO
807	ENDDO
808
809	DO i = iouter, iend
810
811	!
812	!-- FFT along y
813	ir = i-iouter+1 ! counter within a stride
814	CALL fft_y_1d( work_ffty(:,ir), 'backward' )
815
816	DO j = 0, ny
817	f_out(k,j,i) = work_ffty(j,ir)
818	ENDDO
819	ENDDO
820
821	ENDDO
822
823	ENDDO
824	!$OMP END PARALLEL
825
826	ENDIF
827
828	CALL cpu_log( log_point_s(7), 'fft_y_1d', 'stop' )
829
830	END SUBROUTINE tr_xy_ffty
831
832
833	SUBROUTINE fftx_tri_fftx( ar )
834
835	!------------------------------------------------------------------------------!
836	! FFT along x, solution of the tridiagonal system and backward FFT for
837	! a 1d-decomposition along x
838	!
839	! WARNING: this subroutine may still not work for hybrid parallelization
840	! with OpenMP (for possible necessary changes see the original
841	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
842	!------------------------------------------------------------------------------!
843
844	USE control_parameters
845	USE cpulog
846	USE grid_variables
847	USE indices
848	USE interfaces
849	USE pegrid
850	USE transpose_indices
851
852	IMPLICIT NONE
853
854	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
855
856	REAL, DIMENSION(0:nx) :: work_fftx
857	REAL, DIMENSION(0:nx,1:nz) :: work_trix
858	REAL, DIMENSION(nnx,1:nz,nys_x:nyn_x,pdims(1)) :: ar
859	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
860
861
862	CALL cpu_log( log_point_s(33), 'fft_x_1d + tridia', 'start' )
863
864	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
865
866	tn = 0 ! Default thread number in case of one thread
867	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
868	DO j = nys_x, nyn_x
869
870	!$ tn = omp_get_thread_num()
871
872	IF ( host(1:3) == 'nec' ) THEN
873	!
874	!-- Code optimized for vector processors
875	DO k = 1, nz
876
877	m = 0
878	DO n = 1, pdims(1)
879	DO i = 1, nnx
880	work_trix(m,k) = ar(i,k,j,n)
881	m = m + 1
882	ENDDO
883	ENDDO
884
885	ENDDO
886
887	CALL fft_x_m( work_trix, 'forward' )
888
889	ELSE
890	!
891	!-- Cache optimized code
892	DO k = 1, nz
893
894	m = 0
895	DO n = 1, pdims(1)
896	DO i = 1, nnx
897	work_fftx(m) = ar(i,k,j,n)
898	m = m + 1
899	ENDDO
900	ENDDO
901
902	CALL fft_x_1d( work_fftx, 'forward' )
903
904	DO i = 0, nx
905	work_trix(i,k) = work_fftx(i)
906	ENDDO
907
908	ENDDO
909
910	ENDIF
911
912	!
913	!-- Solve the linear equation system
914	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
915
916	IF ( host(1:3) == 'nec' ) THEN
917	!
918	!-- Code optimized for vector processors
919	CALL fft_x_m( work_trix, 'backward' )
920
921	DO k = 1, nz
922
923	m = 0
924	DO n = 1, pdims(1)
925	DO i = 1, nnx
926	ar(i,k,j,n) = work_trix(m,k)
927	m = m + 1
928	ENDDO
929	ENDDO
930
931	ENDDO
932
933	ELSE
934	!
935	!-- Cache optimized code
936	DO k = 1, nz
937
938	DO i = 0, nx
939	work_fftx(i) = work_trix(i,k)
940	ENDDO
941
942	CALL fft_x_1d( work_fftx, 'backward' )
943
944	m = 0
945	DO n = 1, pdims(1)
946	DO i = 1, nnx
947	ar(i,k,j,n) = work_fftx(m)
948	m = m + 1
949	ENDDO
950	ENDDO
951
952	ENDDO
953
954	ENDIF
955
956	ENDDO
957
958	DEALLOCATE( tri )
959
960	CALL cpu_log( log_point_s(33), 'fft_x_1d + tridia', 'stop' )
961
962	END SUBROUTINE fftx_tri_fftx
963
964
965	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
966
967	!------------------------------------------------------------------------------!
968	! Fourier-transformation along x with subsequent transposition x --> y for
969	! a 1d-decomposition along y
970	!
971	! ATTENTION: The NEC-branch of this routine may significantly profit from
972	! further optimizations. So far, performance is much worse than
973	! for routine ffty_tr_yx (more than three times slower).
974	!------------------------------------------------------------------------------!
975
976	USE control_parameters
977	USE cpulog
978	USE indices
979	USE interfaces
980	USE pegrid
981	USE transpose_indices
982
983	IMPLICIT NONE
984
985	INTEGER :: i, j, k
986
987	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
988	REAL, DIMENSION(1:nz,nys:nyn,0:nx) :: f_in
989	REAL, DIMENSION(nny,1:nz,nxl_y:nxr_y,pdims(2)) :: f_out
990	REAL, DIMENSION(nys:nyn,1:nz,0:nx) :: work
991
992	!
993	!-- Carry out the FFT along x, where all data are present due to the
994	!-- 1d-decomposition along y. Resort the data in a way that y becomes
995	!-- the first index.
996	CALL cpu_log( log_point_s(4), 'fft_x_1d', 'start' )
997
998	IF ( host(1:3) == 'nec' ) THEN
999	!
1000	!-- Code for vector processors
1001	!$OMP PARALLEL PRIVATE ( i, j, k )
1002	!$OMP DO
1003	DO i = 0, nx
1004
1005	DO j = nys, nyn
1006	DO k = 1, nz
1007	work_fftx(i,k,j) = f_in(k,j,i)
1008	ENDDO
1009	ENDDO
1010
1011	ENDDO
1012
1013	!$OMP DO
1014	DO j = nys, nyn
1015
1016	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1017
1018	DO k = 1, nz
1019	DO i = 0, nx
1020	work(j,k,i) = work_fftx(i,k,j)
1021	ENDDO
1022	ENDDO
1023
1024	ENDDO
1025	!$OMP END PARALLEL
1026
1027	ELSE
1028
1029	!
1030	!-- Cache optimized code (there might be still a potential for better
1031	!-- optimization).
1032	!$OMP PARALLEL PRIVATE (i,j,k)
1033	!$OMP DO
1034	DO i = 0, nx
1035
1036	DO j = nys, nyn
1037	DO k = 1, nz
1038	work_fftx(i,k,j) = f_in(k,j,i)
1039	ENDDO
1040	ENDDO
1041
1042	ENDDO
1043
1044	!$OMP DO
1045	DO j = nys, nyn
1046	DO k = 1, nz
1047
1048	CALL fft_x_1d( work_fftx(0:nx,k,j), 'forward' )
1049
1050	DO i = 0, nx
1051	work(j,k,i) = work_fftx(i,k,j)
1052	ENDDO
1053	ENDDO
1054
1055	ENDDO
1056	!$OMP END PARALLEL
1057
1058	ENDIF
1059	CALL cpu_log( log_point_s(4), 'fft_x_1d', 'pause' )
1060
1061	!
1062	!-- Transpose array
1063	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1064	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1065	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1066	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1067	comm1dy, ierr )
1068	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1069
1070	END SUBROUTINE fftx_tr_xy
1071
1072
1073	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1074
1075	!------------------------------------------------------------------------------!
1076	! Transposition y --> x with a subsequent backward Fourier transformation for
1077	! a 1d-decomposition along x
1078	!------------------------------------------------------------------------------!
1079
1080	USE control_parameters
1081	USE cpulog
1082	USE indices
1083	USE interfaces
1084	USE pegrid
1085	USE transpose_indices
1086
1087	IMPLICIT NONE
1088
1089	INTEGER :: i, j, k
1090
1091	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1092	REAL, DIMENSION(nny,1:nz,nxl_y:nxr_y,pdims(2)) :: f_in
1093	REAL, DIMENSION(1:nz,nys:nyn,0:nx) :: f_out
1094	REAL, DIMENSION(nys:nyn,1:nz,0:nx) :: work
1095
1096	!
1097	!-- Transpose array
1098	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1099	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1100	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1101	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1102	comm1dy, ierr )
1103	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1104
1105	!
1106	!-- Carry out the FFT along x, where all data are present due to the
1107	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1108	!-- the first index.
1109	CALL cpu_log( log_point_s(4), 'fft_x_1d', 'continue' )
1110
1111	IF ( host(1:3) == 'nec' ) THEN
1112	!
1113	!-- Code optimized for vector processors
1114	!$OMP PARALLEL PRIVATE ( i, j, k )
1115	!$OMP DO
1116	DO j = nys, nyn
1117
1118	DO k = 1, nz
1119	DO i = 0, nx
1120	work_fftx(i,k,j) = work(j,k,i)
1121	ENDDO
1122	ENDDO
1123
1124	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1125
1126	ENDDO
1127
1128	!$OMP DO
1129	DO i = 0, nx
1130	DO j = nys, nyn
1131	DO k = 1, nz
1132	f_out(k,j,i) = work_fftx(i,k,j)
1133	ENDDO
1134	ENDDO
1135	ENDDO
1136	!$OMP END PARALLEL
1137
1138	ELSE
1139
1140	!
1141	!-- Cache optimized code (there might be still a potential for better
1142	!-- optimization).
1143	!$OMP PARALLEL PRIVATE (i,j,k)
1144	!$OMP DO
1145	DO j = nys, nyn
1146	DO k = 1, nz
1147
1148	DO i = 0, nx
1149	work_fftx(i,k,j) = work(j,k,i)
1150	ENDDO
1151
1152	CALL fft_x_1d( work_fftx(0:nx,k,j), 'backward' )
1153
1154	ENDDO
1155	ENDDO
1156
1157	!$OMP DO
1158	DO i = 0, nx
1159	DO j = nys, nyn
1160	DO k = 1, nz
1161	f_out(k,j,i) = work_fftx(i,k,j)
1162	ENDDO
1163	ENDDO
1164	ENDDO
1165	!$OMP END PARALLEL
1166
1167	ENDIF
1168	CALL cpu_log( log_point_s(4), 'fft_x_1d', 'stop' )
1169
1170	END SUBROUTINE tr_yx_fftx
1171
1172
1173	SUBROUTINE ffty_tri_ffty( ar )
1174
1175	!------------------------------------------------------------------------------!
1176	! FFT along y, solution of the tridiagonal system and backward FFT for
1177	! a 1d-decomposition along y
1178	!
1179	! WARNING: this subroutine may still not work for hybrid parallelization
1180	! with OpenMP (for possible necessary changes see the original
1181	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1182	!------------------------------------------------------------------------------!
1183
1184	USE control_parameters
1185	USE cpulog
1186	USE grid_variables
1187	USE indices
1188	USE interfaces
1189	USE pegrid
1190	USE transpose_indices
1191
1192	IMPLICIT NONE
1193
1194	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1195
1196	REAL, DIMENSION(0:ny) :: work_ffty
1197	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1198	REAL, DIMENSION(nny,1:nz,nxl_y:nxr_y,pdims(2)) :: ar
1199	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1200
1201
1202	CALL cpu_log( log_point_s(39), 'fft_y_1d + tridia', 'start' )
1203
1204	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1205
1206	tn = 0 ! Default thread number in case of one thread
1207	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1208	DO i = nxl_y, nxr_y
1209
1210	!$ tn = omp_get_thread_num()
1211
1212	IF ( host(1:3) == 'nec' ) THEN
1213	!
1214	!-- Code optimized for vector processors
1215	DO k = 1, nz
1216
1217	m = 0
1218	DO n = 1, pdims(2)
1219	DO j = 1, nny
1220	work_triy(m,k) = ar(j,k,i,n)
1221	m = m + 1
1222	ENDDO
1223	ENDDO
1224
1225	ENDDO
1226
1227	CALL fft_y_m( work_triy, ny, 'forward' )
1228
1229	ELSE
1230	!
1231	!-- Cache optimized code
1232	DO k = 1, nz
1233
1234	m = 0
1235	DO n = 1, pdims(2)
1236	DO j = 1, nny
1237	work_ffty(m) = ar(j,k,i,n)
1238	m = m + 1
1239	ENDDO
1240	ENDDO
1241
1242	CALL fft_y_1d( work_ffty, 'forward' )
1243
1244	DO j = 0, ny
1245	work_triy(j,k) = work_ffty(j)
1246	ENDDO
1247
1248	ENDDO
1249
1250	ENDIF
1251
1252	!
1253	!-- Solve the linear equation system
1254	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1255
1256	IF ( host(1:3) == 'nec' ) THEN
1257	!
1258	!-- Code optimized for vector processors
1259	CALL fft_y_m( work_triy, ny, 'backward' )
1260
1261	DO k = 1, nz
1262
1263	m = 0
1264	DO n = 1, pdims(2)
1265	DO j = 1, nny
1266	ar(j,k,i,n) = work_triy(m,k)
1267	m = m + 1
1268	ENDDO
1269	ENDDO
1270
1271	ENDDO
1272
1273	ELSE
1274	!
1275	!-- Cache optimized code
1276	DO k = 1, nz
1277
1278	DO j = 0, ny
1279	work_ffty(j) = work_triy(j,k)
1280	ENDDO
1281
1282	CALL fft_y_1d( work_ffty, 'backward' )
1283
1284	m = 0
1285	DO n = 1, pdims(2)
1286	DO j = 1, nny
1287	ar(j,k,i,n) = work_ffty(m)
1288	m = m + 1
1289	ENDDO
1290	ENDDO
1291
1292	ENDDO
1293
1294	ENDIF
1295
1296	ENDDO
1297
1298	DEALLOCATE( tri )
1299
1300	CALL cpu_log( log_point_s(39), 'fft_y_1d + tridia', 'stop' )
1301
1302	END SUBROUTINE ffty_tri_ffty
1303
1304
1305	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1306
1307	!------------------------------------------------------------------------------!
1308	! Solves the linear system of equations for a 1d-decomposition along x (see
1309	! tridia)
1310	!
1311	! Attention: when using the intel compilers older than 12.0, array tri must
1312	! be passed as an argument to the contained subroutines. Otherwise
1313	! addres faults will occur. This feature can be activated with
1314	! cpp-switch __intel11
1315	! On NEC, tri should not be passed (except for routine substi_1dd)
1316	! because this causes very bad performance.
1317	!------------------------------------------------------------------------------!
1318
1319	USE arrays_3d
1320	USE control_parameters
1321
1322	USE pegrid
1323
1324	IMPLICIT NONE
1325
1326	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1327
1328	REAL :: ddx2, ddy2
1329
1330	REAL, DIMENSION(0:nx,1:nz) :: ar
1331	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1332
1333
1334	nnyh = ( ny + 1 ) / 2
1335
1336	!
1337	!-- Define constant elements of the tridiagonal matrix.
1338	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1339	!-- the exchanged loops create bank conflicts. The following directive
1340	!-- prohibits loop exchange and the loops perform much better.
1341	! tn = omp_get_thread_num()
1342	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1343	! CALL local_flush( 120+tn )
1344	!CDIR NOLOOPCHG
1345	DO k = 0, nz-1
1346	DO i = 0,nx
1347	tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
1348	tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
1349	ENDDO
1350	ENDDO
1351	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1352	! CALL local_flush( 120+tn )
1353
1354	IF ( j <= nnyh ) THEN
1355	#if defined( __intel11 )
1356	CALL maketri_1dd( j, tri )
1357	#else
1358	CALL maketri_1dd( j )
1359	#endif
1360	ELSE
1361	#if defined( __intel11 )
1362	CALL maketri_1dd( ny+1-j, tri )
1363	#else
1364	CALL maketri_1dd( ny+1-j )
1365	#endif
1366	ENDIF
1367	#if defined( __intel11 )
1368	CALL split_1dd( tri )
1369	#else
1370	CALL split_1dd
1371	#endif
1372	CALL substi_1dd( ar, tri )
1373
1374	CONTAINS
1375
1376	#if defined( __intel11 )
1377	SUBROUTINE maketri_1dd( j, tri )
1378	#else
1379	SUBROUTINE maketri_1dd( j )
1380	#endif
1381
1382	!------------------------------------------------------------------------------!
1383	! computes the i- and j-dependent component of the matrix
1384	!------------------------------------------------------------------------------!
1385
1386	USE constants
1387
1388	IMPLICIT NONE
1389
1390	INTEGER :: i, j, k, nnxh
1391	REAL :: a, c
1392
1393	REAL, DIMENSION(0:nx) :: l
1394
1395	#if defined( __intel11 )
1396	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1397	#endif
1398
1399
1400	nnxh = ( nx + 1 ) / 2
1401	!
1402	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1403	!-- Fourier space. The coefficients are computed following the method of
1404	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1405	!-- Siano's original version by discretizing the Poisson equation,
1406	!-- before it is Fourier-transformed
1407	DO i = 0, nx
1408	IF ( i >= 0 .AND. i <= nnxh ) THEN
1409	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1410	REAL( nx+1 ) ) ) * ddx2 + &
1411	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1412	REAL( ny+1 ) ) ) * ddy2
1413	ELSE
1414	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1415	REAL( nx+1 ) ) ) * ddx2 + &
1416	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1417	REAL( ny+1 ) ) ) * ddy2
1418	ENDIF
1419	ENDDO
1420
1421	DO k = 0, nz-1
1422	DO i = 0, nx
1423	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
1424	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
1425	tri(1,i,k) = a + c - l(i)
1426	ENDDO
1427	ENDDO
1428	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1429	DO i = 0, nx
1430	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1431	ENDDO
1432	ENDIF
1433	IF ( ibc_p_t == 1 ) THEN
1434	DO i = 0, nx
1435	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1436	ENDDO
1437	ENDIF
1438
1439	END SUBROUTINE maketri_1dd
1440
1441
1442	#if defined( __intel11 )
1443	SUBROUTINE split_1dd( tri )
1444	#else
1445	SUBROUTINE split_1dd
1446	#endif
1447
1448	!------------------------------------------------------------------------------!
1449	! Splitting of the tridiagonal matrix (Thomas algorithm)
1450	!------------------------------------------------------------------------------!
1451
1452	IMPLICIT NONE
1453
1454	INTEGER :: i, k
1455
1456	#if defined( __intel11 )
1457	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1458	#endif
1459
1460
1461	!
1462	!-- Splitting
1463	DO i = 0, nx
1464	tri(4,i,0) = tri(1,i,0)
1465	ENDDO
1466	DO k = 1, nz-1
1467	DO i = 0, nx
1468	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1469	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1470	ENDDO
1471	ENDDO
1472
1473	END SUBROUTINE split_1dd
1474
1475
1476	SUBROUTINE substi_1dd( ar, tri )
1477
1478	!------------------------------------------------------------------------------!
1479	! Substitution (Forward and Backward) (Thomas algorithm)
1480	!------------------------------------------------------------------------------!
1481
1482	IMPLICIT NONE
1483
1484	INTEGER :: i, k
1485
1486	REAL, DIMENSION(0:nx,nz) :: ar
1487	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1488	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1489
1490	!
1491	!-- Forward substitution
1492	DO i = 0, nx
1493	ar1(i,0) = ar(i,1)
1494	ENDDO
1495	DO k = 1, nz-1
1496	DO i = 0, nx
1497	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1498	ENDDO
1499	ENDDO
1500
1501	!
1502	!-- Backward substitution
1503	!-- Note, the add of 1.0E-20 in the denominator is due to avoid divisions
1504	!-- by zero appearing if the pressure bc is set to neumann at the top of
1505	!-- the model domain.
1506	DO i = 0, nx
1507	ar(i,nz) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )
1508	ENDDO
1509	DO k = nz-2, 0, -1
1510	DO i = 0, nx
1511	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1512	/ tri(4,i,k)
1513	ENDDO
1514	ENDDO
1515
1516	!
1517	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
1518	!-- The respective values of ar should be zero at all k-levels if
1519	!-- acceleration of horizontally averaged vertical velocity is zero.
1520	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
1521	IF ( j == 0 ) THEN
1522	DO k = 1, nz
1523	ar(0,k) = 0.0
1524	ENDDO
1525	ENDIF
1526	ENDIF
1527
1528	END SUBROUTINE substi_1dd
1529
1530	END SUBROUTINE tridia_1dd
1531
1532	#endif
1533	#endif
1534	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |