Home

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 1092

Last change on this file since 1092 was 1092, checked in by raasch, 11 years ago
unused variables remove from several routines
Property svn:keywords set to `Id`
File size: 47.7 KB

Line
1	MODULE poisfft_mod
2
3	!--------------------------------------------------------------------------------!
4	! This file is part of PALM.
5	!
6	! PALM is free software: you can redistribute it and/or modify it under the terms
7	! of the GNU General Public License as published by the Free Software Foundation,
8	! either version 3 of the License, or (at your option) any later version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2012 Leibniz University Hannover
18	!--------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	! unused variables removed
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: poisfft.f90 1092 2013-02-02 11:24:22Z raasch $
27	!
28	! 1036 2012-10-22 13:43:42Z raasch
29	! code put under GPL (PALM 3.9)
30	!
31	! 2012-09-21 07:03:55Z raasch
32	! FLOAT type conversion replaced by REAL
33	!
34	! 1003 2012-09-14 14:35:53Z raasch
35	! indices nxa, nya, etc. replaced by nx, ny, etc.
36	!
37	! 940 2012-07-09 14:31:00Z raasch
38	! special handling of tri-array as an argument in tridia_1dd routines switched
39	! off because it caused segmentation faults with intel 12.1 compiler
40	!
41	! 877 2012-04-03 11:21:44Z suehring
42	! Bugfix: Avoid divisions by zero in case of using a 'neumann' bc for the
43	! pressure at the top of the model domain.
44	!
45	! 809 2012-01-30 13:32:58Z maronga
46	! Bugfix: replaced .AND. and .NOT. with && and ! in the preprocessor directives
47	!
48	! 807 2012-01-25 11:53:51Z maronga
49	! New cpp directive "__check" implemented which is used by check_namelist_files
50	! (most of the code is unneeded by check_namelist_files).
51	!
52	! 763 2011-10-06 09:32:09Z suehring
53	! Comment added concerning the last change.
54	!
55	! 761 2011-10-05 17:58:52Z suehring
56	! Bugfix: Avoid divisions by zero in case of using a 'neumann' bc for the
57	! pressure at the top of the model domain.
58	!
59	! 696 2011-03-18 07:03:49Z raasch
60	! work_fftx removed from PRIVATE clauses in fftx_tr_xy and tr_yx_fftx
61	!
62	! 683 2011-02-09 14:25:15Z raasch
63	! openMP parallelization for 2d-domain-decomposition
64	!
65	! 667 2010-12-23 12:06:00Z suehring/gryschka
66	! ddzu replaced by ddzu_pres due to changes in zu(0)
67	!
68	! 622 2010-12-10 08:08:13Z raasch
69	! optional barriers included in order to speed up collective operations
70	!
71	! 377 2009-09-04 11:09:00Z raasch
72	! __lcmuk changed to __lc to avoid problems with Intel compiler on sgi-ice
73	!
74	! 164 2008-05-15 08:46:15Z raasch
75	! Arguments removed from transpose routines
76	!
77	! 128 2007-10-26 13:11:14Z raasch
78	! Bugfix: wavenumber calculation for even nx in routines maketri
79	!
80	! 85 2007-05-11 09:35:14Z raasch
81	! Bugfix: work_fft*_vec removed from some PRIVATE-declarations
82	!
83	! 76 2007-03-29 00:58:32Z raasch
84	! Tridiagonal coefficients adjusted for Neumann boundary conditions both at
85	! the bottom and the top.
86	!
87	! RCS Log replace by Id keyword, revision history cleaned up
88	!
89	! Revision 1.24 2006/08/04 15:00:24 raasch
90	! Default setting of the thread number tn in case of not using OpenMP
91	!
92	! Revision 1.23 2006/02/23 12:48:38 raasch
93	! Additional compiler directive in routine tridia_1dd for preventing loop
94	! exchange on NEC-SX6
95	!
96	! Revision 1.20 2004/04/30 12:38:09 raasch
97	! Parts of former poisfft_hybrid moved to this subroutine,
98	! former subroutine changed to a module, renaming of FFT-subroutines and
99	! -module, FFTs completely substituted by calls of fft_x and fft_y,
100	! NAG fft used in the non-parallel case completely removed, l in maketri
101	! is now a 1d-array, variables passed by modules instead of using parameter
102	! lists, enlarged transposition arrays introduced
103	!
104	! Revision 1.1 1997/07/24 11:24:14 raasch
105	! Initial revision
106	!
107	!
108	! Description:
109	! ------------
110	! See below.
111	!------------------------------------------------------------------------------!
112
113	!--------------------------------------------------------------------------!
114	! poisfft !
115	! !
116	! Original version: Stephan Siano (pois3d) !
117	! !
118	! Institute of Meteorology and Climatology, University of Hannover !
119	! Germany !
120	! !
121	! Version as of July 23,1996 !
122	! !
123	! !
124	! Version for parallel computers: Siegfried Raasch !
125	! !
126	! Version as of July 03,1997 !
127	! !
128	! Solves the Poisson equation with a 2D spectral method !
129	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
130	! !
131	! Input: !
132	! real ar contains in the (nnx,nny,nnz) elements, !
133	! starting from the element (1,nys,nxl), the !
134	! values for s !
135	! real work Temporary array !
136	! !
137	! Output: !
138	! real ar contains the solution for p !
139	!--------------------------------------------------------------------------!
140
141	USE fft_xy
142	USE indices
143	USE transpose_indices
144
145	IMPLICIT NONE
146
147	PRIVATE
148
149	#if ! defined ( __check )
150	PUBLIC poisfft, poisfft_init
151
152	INTERFACE poisfft
153	MODULE PROCEDURE poisfft
154	END INTERFACE poisfft
155
156	INTERFACE poisfft_init
157	MODULE PROCEDURE poisfft_init
158	END INTERFACE poisfft_init
159	#else
160	PUBLIC poisfft_init
161
162	INTERFACE poisfft_init
163	MODULE PROCEDURE poisfft_init
164	END INTERFACE poisfft_init
165	#endif
166
167	CONTAINS
168
169	SUBROUTINE poisfft_init
170
171	CALL fft_init
172
173	END SUBROUTINE poisfft_init
174
175	#if ! defined ( __check )
176	SUBROUTINE poisfft( ar, work )
177
178	USE cpulog
179	USE interfaces
180	USE pegrid
181
182	IMPLICIT NONE
183
184	REAL, DIMENSION(1:nz,nys:nyn,nxl:nxr) :: ar, work
185
186
187	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
188
189	!
190	!-- Two-dimensional Fourier Transformation in x- and y-direction.
191	#if defined( __parallel )
192	IF ( pdims(2) == 1 ) THEN
193
194	!
195	!-- 1d-domain-decomposition along x:
196	!-- FFT along y and transposition y --> x
197	CALL ffty_tr_yx( ar, work, ar )
198
199	!
200	!-- FFT along x, solving the tridiagonal system and backward FFT
201	CALL fftx_tri_fftx( ar )
202
203	!
204	!-- Transposition x --> y and backward FFT along y
205	CALL tr_xy_ffty( ar, work, ar )
206
207	ELSEIF ( pdims(1) == 1 ) THEN
208
209	!
210	!-- 1d-domain-decomposition along y:
211	!-- FFT along x and transposition x --> y
212	CALL fftx_tr_xy( ar, work, ar )
213
214	!
215	!-- FFT along y, solving the tridiagonal system and backward FFT
216	CALL ffty_tri_ffty( ar )
217
218	!
219	!-- Transposition y --> x and backward FFT along x
220	CALL tr_yx_fftx( ar, work, ar )
221
222	ELSE
223
224	!
225	!-- 2d-domain-decomposition
226	!-- Transposition z --> x
227	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
228	CALL transpose_zx( ar, work, ar )
229	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
230
231	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
232	CALL fftxp( ar, 'forward' )
233	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
234
235	!
236	!-- Transposition x --> y
237	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
238	CALL transpose_xy( ar, work, ar )
239	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
240
241	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
242	CALL fftyp( ar, 'forward' )
243	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
244
245	!
246	!-- Transposition y --> z
247	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
248	CALL transpose_yz( ar, work, ar )
249	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
250
251	!
252	!-- Solve the Poisson equation in z-direction in cartesian space.
253	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
254	CALL tridia( ar )
255	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
256
257	!
258	!-- Inverse Fourier Transformation
259	!-- Transposition z --> y
260	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
261	CALL transpose_zy( ar, work, ar )
262	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
263
264	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
265	CALL fftyp( ar, 'backward' )
266	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
267
268	!
269	!-- Transposition y --> x
270	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
271	CALL transpose_yx( ar, work, ar )
272	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
273
274	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
275	CALL fftxp( ar, 'backward' )
276	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
277
278	!
279	!-- Transposition x --> z
280	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
281	CALL transpose_xz( ar, work, ar )
282	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
283
284	ENDIF
285
286	#else
287
288	!
289	!-- Two-dimensional Fourier Transformation along x- and y-direction.
290	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
291	CALL fftx( ar, 'forward' )
292	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
293	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
294	CALL ffty( ar, 'forward' )
295	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
296
297	!
298	!-- Solve the Poisson equation in z-direction in cartesian space.
299	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
300	CALL tridia( ar )
301	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
302
303	!
304	!-- Inverse Fourier Transformation.
305	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
306	CALL ffty( ar, 'backward' )
307	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
308	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
309	CALL fftx( ar, 'backward' )
310	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
311
312	#endif
313
314	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
315
316	END SUBROUTINE poisfft
317
318
319
320	SUBROUTINE tridia( ar )
321
322	!------------------------------------------------------------------------------!
323	! solves the linear system of equations:
324	!
325	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
326	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
327	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
328	!
329	! by using the Thomas algorithm
330	!------------------------------------------------------------------------------!
331
332	USE arrays_3d
333
334	IMPLICIT NONE
335
336	INTEGER :: i, j, k, nnyh
337
338	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
339	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
340
341	#if defined( __parallel )
342	REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)
343	#else
344	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
345	#endif
346
347
348	nnyh = (ny+1) / 2
349
350	!
351	!-- Define constant elements of the tridiagonal matrix.
352	!$OMP PARALLEL PRIVATE ( k, i )
353	!$OMP DO
354	DO k = 0, nz-1
355	DO i = nxl_z, nxr_z
356	tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
357	tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
358	ENDDO
359	ENDDO
360	!$OMP END PARALLEL
361
362	#if defined( __parallel )
363	!
364	!-- Repeat for all y-levels.
365	!$OMP PARALLEL FIRSTPRIVATE( tri ) PRIVATE ( ar1, j )
366	!$OMP DO
367	DO j = nys_z, nyn_z
368	IF ( j <= nnyh ) THEN
369	CALL maketri( tri, j )
370	ELSE
371	CALL maketri( tri, ny+1-j )
372	ENDIF
373	CALL split( tri )
374	CALL substi( ar, ar1, tri, j )
375	ENDDO
376	!$OMP END PARALLEL
377	#else
378	!
379	!-- First y-level.
380	CALL maketri( tri, nys_z )
381	CALL split( tri )
382	CALL substi( ar, ar1, tri, 0 )
383
384	!
385	!-- Further y-levels.
386	DO j = 1, nnyh - 1
387	CALL maketri( tri, j )
388	CALL split( tri )
389	CALL substi( ar, ar1, tri, j )
390	CALL substi( ar, ar1, tri, ny+1-j )
391	ENDDO
392	CALL maketri( tri, nnyh )
393	CALL split( tri )
394	CALL substi( ar, ar1, tri, nnyh+nys )
395	#endif
396
397	CONTAINS
398
399	SUBROUTINE maketri( tri, j )
400
401	!------------------------------------------------------------------------------!
402	! Computes the i- and j-dependent component of the matrix
403	!------------------------------------------------------------------------------!
404
405	USE arrays_3d
406	USE constants
407	USE control_parameters
408	USE grid_variables
409
410	IMPLICIT NONE
411
412	INTEGER :: i, j, k, nnxh
413	REAL :: a, c
414	REAL :: ll(nxl_z:nxr_z)
415	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
416
417
418	nnxh = ( nx + 1 ) / 2
419
420	!
421	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
422	!-- Fourier space. The coefficients are computed following the method of
423	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
424	!-- Siano's original version by discretizing the Poisson equation,
425	!-- before it is Fourier-transformed
426	#if defined( __parallel )
427	DO i = nxl_z, nxr_z
428	IF ( i >= 0 .AND. i <= nnxh ) THEN
429	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
430	REAL( nx+1 ) ) ) / ( dx * dx ) + &
431	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
432	REAL( ny+1 ) ) ) / ( dy * dy )
433	ELSE
434	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
435	REAL( nx+1 ) ) ) / ( dx * dx ) + &
436	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
437	REAL( ny+1 ) ) ) / ( dy * dy )
438	ENDIF
439	DO k = 0,nz-1
440	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
441	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
442	tri(1,i,k) = a + c - ll(i)
443	ENDDO
444	ENDDO
445	#else
446	DO i = 0, nnxh
447	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / REAL( nx+1 ) ) ) / &
448	( dx * dx ) + &
449	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / REAL( ny+1 ) ) ) / &
450	( dy * dy )
451	DO k = 0, nz-1
452	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
453	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
454	tri(1,i,k) = a + c - ll(i)
455	IF ( i >= 1 .and. i < nnxh ) THEN
456	tri(1,nx+1-i,k) = tri(1,i,k)
457	ENDIF
458	ENDDO
459	ENDDO
460	#endif
461	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
462	DO i = nxl_z, nxr_z
463	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
464	ENDDO
465	ENDIF
466	IF ( ibc_p_t == 1 ) THEN
467	DO i = nxl_z, nxr_z
468	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
469	ENDDO
470	ENDIF
471
472	END SUBROUTINE maketri
473
474
475	SUBROUTINE substi( ar, ar1, tri, j )
476
477	!------------------------------------------------------------------------------!
478	! Substitution (Forward and Backward) (Thomas algorithm)
479	!------------------------------------------------------------------------------!
480
481	USE control_parameters
482
483	IMPLICIT NONE
484
485	INTEGER :: i, j, k
486	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
487	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
488	#if defined( __parallel )
489	REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)
490	#else
491	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
492	#endif
493
494	!
495	!-- Forward substitution.
496	DO i = nxl_z, nxr_z
497	#if defined( __parallel )
498	ar1(i,0) = ar(i,j,1)
499	#else
500	ar1(i,0) = ar(1,j,i)
501	#endif
502	ENDDO
503	DO k = 1, nz - 1
504	DO i = nxl_z, nxr_z
505	#if defined( __parallel )
506	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
507	#else
508	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
509	#endif
510	ENDDO
511	ENDDO
512
513	!
514	!-- Backward substitution
515	!-- Note, the 1.0E-20 in the denominator is due to avoid divisions
516	!-- by zero appearing if the pressure bc is set to neumann at the top of
517	!-- the model domain.
518	DO i = nxl_z, nxr_z
519	#if defined( __parallel )
520	ar(i,j,nz) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )
521	#else
522	ar(nz,j,i) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )
523	#endif
524	ENDDO
525	DO k = nz-2, 0, -1
526	DO i = nxl_z, nxr_z
527	#if defined( __parallel )
528	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
529	/ tri(4,i,k)
530	#else
531	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
532	/ tri(4,i,k)
533	#endif
534	ENDDO
535	ENDDO
536
537	!
538	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
539	!-- The respective values of ar should be zero at all k-levels if
540	!-- acceleration of horizontally averaged vertical velocity is zero.
541	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
542	IF ( j == 0 .AND. nxl_z == 0 ) THEN
543	#if defined( __parallel )
544	DO k = 1, nz
545	ar(nxl_z,j,k) = 0.0
546	ENDDO
547	#else
548	DO k = 1, nz
549	ar(k,j,nxl_z) = 0.0
550	ENDDO
551	#endif
552	ENDIF
553	ENDIF
554
555	END SUBROUTINE substi
556
557
558	SUBROUTINE split( tri )
559
560	!------------------------------------------------------------------------------!
561	! Splitting of the tridiagonal matrix (Thomas algorithm)
562	!------------------------------------------------------------------------------!
563
564	IMPLICIT NONE
565
566	INTEGER :: i, k
567	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
568
569	!
570	!-- Splitting.
571	DO i = nxl_z, nxr_z
572	tri(4,i,0) = tri(1,i,0)
573	ENDDO
574	DO k = 1, nz-1
575	DO i = nxl_z, nxr_z
576	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
577	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
578	ENDDO
579	ENDDO
580
581	END SUBROUTINE split
582
583	END SUBROUTINE tridia
584
585
586	#if defined( __parallel )
587	SUBROUTINE fftxp( ar, direction )
588
589	!------------------------------------------------------------------------------!
590	! Fourier-transformation along x-direction Parallelized version
591	!------------------------------------------------------------------------------!
592
593	IMPLICIT NONE
594
595	CHARACTER (LEN=*) :: direction
596	INTEGER :: j, k
597	REAL :: ar(0:nx,nys_x:nyn_x,nzb_x:nzt_x)
598
599	!
600	!-- Performing the fft with one of the methods implemented
601	!$OMP PARALLEL PRIVATE ( j, k )
602	!$OMP DO
603	DO k = nzb_x, nzt_x
604	DO j = nys_x, nyn_x
605	CALL fft_x( ar(0:nx,j,k), direction )
606	ENDDO
607	ENDDO
608	!$OMP END PARALLEL
609
610	END SUBROUTINE fftxp
611
612	#else
613	SUBROUTINE fftx( ar, direction )
614
615	!------------------------------------------------------------------------------!
616	! Fourier-transformation along x-direction Non parallel version
617	!------------------------------------------------------------------------------!
618
619	IMPLICIT NONE
620
621	CHARACTER (LEN=*) :: direction
622	INTEGER :: i, j, k
623	REAL :: ar(1:nz,0:ny,0:nx)
624
625	!
626	!-- Performing the fft with one of the methods implemented
627	!$OMP PARALLEL PRIVATE ( j, k )
628	!$OMP DO
629	DO k = 1, nz
630	DO j = 0, ny
631	CALL fft_x( ar(k,j,0:nx), direction )
632	ENDDO
633	ENDDO
634	!$OMP END PARALLEL
635
636	END SUBROUTINE fftx
637	#endif
638
639
640	#if defined( __parallel )
641	SUBROUTINE fftyp( ar, direction )
642
643	!------------------------------------------------------------------------------!
644	! Fourier-transformation along y-direction Parallelized version
645	!------------------------------------------------------------------------------!
646
647	IMPLICIT NONE
648
649	CHARACTER (LEN=*) :: direction
650	INTEGER :: i, k
651	REAL :: ar(0:ny,nxl_y:nxr_y,nzb_y:nzt_y)
652
653	!
654	!-- Performing the fft with one of the methods implemented
655	!$OMP PARALLEL PRIVATE ( i, k )
656	!$OMP DO
657	DO k = nzb_y, nzt_y
658	DO i = nxl_y, nxr_y
659	CALL fft_y( ar(0:ny,i,k), direction )
660	ENDDO
661	ENDDO
662	!$OMP END PARALLEL
663
664	END SUBROUTINE fftyp
665
666	#else
667	SUBROUTINE ffty( ar, direction )
668
669	!------------------------------------------------------------------------------!
670	! Fourier-transformation along y-direction Non parallel version
671	!------------------------------------------------------------------------------!
672
673	IMPLICIT NONE
674
675	CHARACTER (LEN=*) :: direction
676	INTEGER :: i, k
677	REAL :: ar(1:nz,0:ny,0:nx)
678
679	!
680	!-- Performing the fft with one of the methods implemented
681	!$OMP PARALLEL PRIVATE ( i, k )
682	!$OMP DO
683	DO k = 1, nz
684	DO i = 0, nx
685	CALL fft_y( ar(k,0:ny,i), direction )
686	ENDDO
687	ENDDO
688	!$OMP END PARALLEL
689
690	END SUBROUTINE ffty
691	#endif
692
693	#if defined( __parallel )
694	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
695
696	!------------------------------------------------------------------------------!
697	! Fourier-transformation along y with subsequent transposition y --> x for
698	! a 1d-decomposition along x
699	!
700	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
701	! if the first index of work_ffty_vec is odd. Otherwise
702	! memory bank conflicts may occur (especially if the index is a
703	! multiple of 128). That's why work_ffty_vec is dimensioned as
704	! 0:ny+1.
705	! Of course, this will not work if users are using an odd number
706	! of gridpoints along y.
707	!------------------------------------------------------------------------------!
708
709	USE control_parameters
710	USE cpulog
711	USE indices
712	USE interfaces
713	USE pegrid
714	USE transpose_indices
715
716	IMPLICIT NONE
717
718	INTEGER :: i, iend, iouter, ir, j, k
719	INTEGER, PARAMETER :: stridex = 4
720
721	REAL, DIMENSION(0:ny,stridex) :: work_ffty
722	#if defined( __nec )
723	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
724	#endif
725	REAL, DIMENSION(1:nz,0:ny,nxl:nxr) :: f_in
726	REAL, DIMENSION(nnx,1:nz,nys_x:nyn_x,pdims(1)) :: f_out
727	REAL, DIMENSION(nxl:nxr,1:nz,0:ny) :: work
728
729	!
730	!-- Carry out the FFT along y, where all data are present due to the
731	!-- 1d-decomposition along x. Resort the data in a way that x becomes
732	!-- the first index.
733	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
734
735	IF ( host(1:3) == 'nec' ) THEN
736	#if defined( __nec )
737	!
738	!-- Code optimized for vector processors
739	!$OMP PARALLEL PRIVATE ( i, j, k )
740	!$OMP DO
741	DO i = nxl, nxr
742
743	DO j = 0, ny
744	DO k = 1, nz
745	work_ffty_vec(j,k,i) = f_in(k,j,i)
746	ENDDO
747	ENDDO
748
749	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
750
751	ENDDO
752
753	!$OMP DO
754	DO k = 1, nz
755	DO j = 0, ny
756	DO i = nxl, nxr
757	work(i,k,j) = work_ffty_vec(j,k,i)
758	ENDDO
759	ENDDO
760	ENDDO
761	!$OMP END PARALLEL
762	#endif
763
764	ELSE
765
766	!
767	!-- Cache optimized code.
768	!-- The i-(x-)direction is split into a strided outer loop and an inner
769	!-- loop for better cache performance
770	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
771	!$OMP DO
772	DO iouter = nxl, nxr, stridex
773
774	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
775
776	DO k = 1, nz
777
778	DO i = iouter, iend
779
780	ir = i-iouter+1 ! counter within a stride
781	DO j = 0, ny
782	work_ffty(j,ir) = f_in(k,j,i)
783	ENDDO
784	!
785	!-- FFT along y
786	CALL fft_y( work_ffty(:,ir), 'forward' )
787
788	ENDDO
789
790	!
791	!-- Resort
792	DO j = 0, ny
793	DO i = iouter, iend
794	work(i,k,j) = work_ffty(j,i-iouter+1)
795	ENDDO
796	ENDDO
797
798	ENDDO
799
800	ENDDO
801	!$OMP END PARALLEL
802
803	ENDIF
804	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
805
806	!
807	!-- Transpose array
808	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
809	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
810	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
811	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
812	comm1dx, ierr )
813	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
814
815	END SUBROUTINE ffty_tr_yx
816
817
818	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
819
820	!------------------------------------------------------------------------------!
821	! Transposition x --> y with a subsequent backward Fourier transformation for
822	! a 1d-decomposition along x
823	!------------------------------------------------------------------------------!
824
825	USE control_parameters
826	USE cpulog
827	USE indices
828	USE interfaces
829	USE pegrid
830	USE transpose_indices
831
832	IMPLICIT NONE
833
834	INTEGER :: i, iend, iouter, ir, j, k
835	INTEGER, PARAMETER :: stridex = 4
836
837	REAL, DIMENSION(0:ny,stridex) :: work_ffty
838	#if defined( __nec )
839	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
840	#endif
841	REAL, DIMENSION(nnx,1:nz,nys_x:nyn_x,pdims(1)) :: f_in
842	REAL, DIMENSION(1:nz,0:ny,nxl:nxr) :: f_out
843	REAL, DIMENSION(nxl:nxr,1:nz,0:ny) :: work
844
845	!
846	!-- Transpose array
847	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
848	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
849	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
850	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
851	comm1dx, ierr )
852	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
853
854	!
855	!-- Resort the data in a way that y becomes the first index and carry out the
856	!-- backward fft along y.
857	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
858
859	IF ( host(1:3) == 'nec' ) THEN
860	#if defined( __nec )
861	!
862	!-- Code optimized for vector processors
863	!$OMP PARALLEL PRIVATE ( i, j, k )
864	!$OMP DO
865	DO k = 1, nz
866	DO j = 0, ny
867	DO i = nxl, nxr
868	work_ffty_vec(j,k,i) = work(i,k,j)
869	ENDDO
870	ENDDO
871	ENDDO
872
873	!$OMP DO
874	DO i = nxl, nxr
875
876	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
877
878	DO j = 0, ny
879	DO k = 1, nz
880	f_out(k,j,i) = work_ffty_vec(j,k,i)
881	ENDDO
882	ENDDO
883
884	ENDDO
885	!$OMP END PARALLEL
886	#endif
887
888	ELSE
889
890	!
891	!-- Cache optimized code.
892	!-- The i-(x-)direction is split into a strided outer loop and an inner
893	!-- loop for better cache performance
894	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
895	!$OMP DO
896	DO iouter = nxl, nxr, stridex
897
898	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
899
900	DO k = 1, nz
901	!
902	!-- Resort
903	DO j = 0, ny
904	DO i = iouter, iend
905	work_ffty(j,i-iouter+1) = work(i,k,j)
906	ENDDO
907	ENDDO
908
909	DO i = iouter, iend
910
911	!
912	!-- FFT along y
913	ir = i-iouter+1 ! counter within a stride
914	CALL fft_y( work_ffty(:,ir), 'backward' )
915
916	DO j = 0, ny
917	f_out(k,j,i) = work_ffty(j,ir)
918	ENDDO
919	ENDDO
920
921	ENDDO
922
923	ENDDO
924	!$OMP END PARALLEL
925
926	ENDIF
927
928	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
929
930	END SUBROUTINE tr_xy_ffty
931
932
933	SUBROUTINE fftx_tri_fftx( ar )
934
935	!------------------------------------------------------------------------------!
936	! FFT along x, solution of the tridiagonal system and backward FFT for
937	! a 1d-decomposition along x
938	!
939	! WARNING: this subroutine may still not work for hybrid parallelization
940	! with OpenMP (for possible necessary changes see the original
941	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
942	!------------------------------------------------------------------------------!
943
944	USE control_parameters
945	USE cpulog
946	USE grid_variables
947	USE indices
948	USE interfaces
949	USE pegrid
950	USE transpose_indices
951
952	IMPLICIT NONE
953
954	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
955
956	REAL, DIMENSION(0:nx) :: work_fftx
957	REAL, DIMENSION(0:nx,1:nz) :: work_trix
958	REAL, DIMENSION(nnx,1:nz,nys_x:nyn_x,pdims(1)) :: ar
959	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
960
961
962	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
963
964	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
965
966	tn = 0 ! Default thread number in case of one thread
967	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
968	DO j = nys_x, nyn_x
969
970	!$ tn = omp_get_thread_num()
971
972	IF ( host(1:3) == 'nec' ) THEN
973	!
974	!-- Code optimized for vector processors
975	DO k = 1, nz
976
977	m = 0
978	DO n = 1, pdims(1)
979	DO i = 1, nnx
980	work_trix(m,k) = ar(i,k,j,n)
981	m = m + 1
982	ENDDO
983	ENDDO
984
985	ENDDO
986
987	CALL fft_x_m( work_trix, 'forward' )
988
989	ELSE
990	!
991	!-- Cache optimized code
992	DO k = 1, nz
993
994	m = 0
995	DO n = 1, pdims(1)
996	DO i = 1, nnx
997	work_fftx(m) = ar(i,k,j,n)
998	m = m + 1
999	ENDDO
1000	ENDDO
1001
1002	CALL fft_x( work_fftx, 'forward' )
1003
1004	DO i = 0, nx
1005	work_trix(i,k) = work_fftx(i)
1006	ENDDO
1007
1008	ENDDO
1009
1010	ENDIF
1011
1012	!
1013	!-- Solve the linear equation system
1014	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
1015
1016	IF ( host(1:3) == 'nec' ) THEN
1017	!
1018	!-- Code optimized for vector processors
1019	CALL fft_x_m( work_trix, 'backward' )
1020
1021	DO k = 1, nz
1022
1023	m = 0
1024	DO n = 1, pdims(1)
1025	DO i = 1, nnx
1026	ar(i,k,j,n) = work_trix(m,k)
1027	m = m + 1
1028	ENDDO
1029	ENDDO
1030
1031	ENDDO
1032
1033	ELSE
1034	!
1035	!-- Cache optimized code
1036	DO k = 1, nz
1037
1038	DO i = 0, nx
1039	work_fftx(i) = work_trix(i,k)
1040	ENDDO
1041
1042	CALL fft_x( work_fftx, 'backward' )
1043
1044	m = 0
1045	DO n = 1, pdims(1)
1046	DO i = 1, nnx
1047	ar(i,k,j,n) = work_fftx(m)
1048	m = m + 1
1049	ENDDO
1050	ENDDO
1051
1052	ENDDO
1053
1054	ENDIF
1055
1056	ENDDO
1057
1058	DEALLOCATE( tri )
1059
1060	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
1061
1062	END SUBROUTINE fftx_tri_fftx
1063
1064
1065	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
1066
1067	!------------------------------------------------------------------------------!
1068	! Fourier-transformation along x with subsequent transposition x --> y for
1069	! a 1d-decomposition along y
1070	!
1071	! ATTENTION: The NEC-branch of this routine may significantly profit from
1072	! further optimizations. So far, performance is much worse than
1073	! for routine ffty_tr_yx (more than three times slower).
1074	!------------------------------------------------------------------------------!
1075
1076	USE control_parameters
1077	USE cpulog
1078	USE indices
1079	USE interfaces
1080	USE pegrid
1081	USE transpose_indices
1082
1083	IMPLICIT NONE
1084
1085	INTEGER :: i, j, k
1086
1087	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1088	REAL, DIMENSION(1:nz,nys:nyn,0:nx) :: f_in
1089	REAL, DIMENSION(nny,1:nz,nxl_y:nxr_y,pdims(2)) :: f_out
1090	REAL, DIMENSION(nys:nyn,1:nz,0:nx) :: work
1091
1092	!
1093	!-- Carry out the FFT along x, where all data are present due to the
1094	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1095	!-- the first index.
1096	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
1097
1098	IF ( host(1:3) == 'nec' ) THEN
1099	!
1100	!-- Code for vector processors
1101	!$OMP PARALLEL PRIVATE ( i, j, k )
1102	!$OMP DO
1103	DO i = 0, nx
1104
1105	DO j = nys, nyn
1106	DO k = 1, nz
1107	work_fftx(i,k,j) = f_in(k,j,i)
1108	ENDDO
1109	ENDDO
1110
1111	ENDDO
1112
1113	!$OMP DO
1114	DO j = nys, nyn
1115
1116	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1117
1118	DO k = 1, nz
1119	DO i = 0, nx
1120	work(j,k,i) = work_fftx(i,k,j)
1121	ENDDO
1122	ENDDO
1123
1124	ENDDO
1125	!$OMP END PARALLEL
1126
1127	ELSE
1128
1129	!
1130	!-- Cache optimized code (there might be still a potential for better
1131	!-- optimization).
1132	!$OMP PARALLEL PRIVATE (i,j,k)
1133	!$OMP DO
1134	DO i = 0, nx
1135
1136	DO j = nys, nyn
1137	DO k = 1, nz
1138	work_fftx(i,k,j) = f_in(k,j,i)
1139	ENDDO
1140	ENDDO
1141
1142	ENDDO
1143
1144	!$OMP DO
1145	DO j = nys, nyn
1146	DO k = 1, nz
1147
1148	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1149
1150	DO i = 0, nx
1151	work(j,k,i) = work_fftx(i,k,j)
1152	ENDDO
1153	ENDDO
1154
1155	ENDDO
1156	!$OMP END PARALLEL
1157
1158	ENDIF
1159	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1160
1161	!
1162	!-- Transpose array
1163	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1164	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1165	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1166	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1167	comm1dy, ierr )
1168	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1169
1170	END SUBROUTINE fftx_tr_xy
1171
1172
1173	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1174
1175	!------------------------------------------------------------------------------!
1176	! Transposition y --> x with a subsequent backward Fourier transformation for
1177	! a 1d-decomposition along x
1178	!------------------------------------------------------------------------------!
1179
1180	USE control_parameters
1181	USE cpulog
1182	USE indices
1183	USE interfaces
1184	USE pegrid
1185	USE transpose_indices
1186
1187	IMPLICIT NONE
1188
1189	INTEGER :: i, j, k
1190
1191	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1192	REAL, DIMENSION(nny,1:nz,nxl_y:nxr_y,pdims(2)) :: f_in
1193	REAL, DIMENSION(1:nz,nys:nyn,0:nx) :: f_out
1194	REAL, DIMENSION(nys:nyn,1:nz,0:nx) :: work
1195
1196	!
1197	!-- Transpose array
1198	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1199	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1200	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1201	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1202	comm1dy, ierr )
1203	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1204
1205	!
1206	!-- Carry out the FFT along x, where all data are present due to the
1207	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1208	!-- the first index.
1209	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1210
1211	IF ( host(1:3) == 'nec' ) THEN
1212	!
1213	!-- Code optimized for vector processors
1214	!$OMP PARALLEL PRIVATE ( i, j, k )
1215	!$OMP DO
1216	DO j = nys, nyn
1217
1218	DO k = 1, nz
1219	DO i = 0, nx
1220	work_fftx(i,k,j) = work(j,k,i)
1221	ENDDO
1222	ENDDO
1223
1224	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1225
1226	ENDDO
1227
1228	!$OMP DO
1229	DO i = 0, nx
1230	DO j = nys, nyn
1231	DO k = 1, nz
1232	f_out(k,j,i) = work_fftx(i,k,j)
1233	ENDDO
1234	ENDDO
1235	ENDDO
1236	!$OMP END PARALLEL
1237
1238	ELSE
1239
1240	!
1241	!-- Cache optimized code (there might be still a potential for better
1242	!-- optimization).
1243	!$OMP PARALLEL PRIVATE (i,j,k)
1244	!$OMP DO
1245	DO j = nys, nyn
1246	DO k = 1, nz
1247
1248	DO i = 0, nx
1249	work_fftx(i,k,j) = work(j,k,i)
1250	ENDDO
1251
1252	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1253
1254	ENDDO
1255	ENDDO
1256
1257	!$OMP DO
1258	DO i = 0, nx
1259	DO j = nys, nyn
1260	DO k = 1, nz
1261	f_out(k,j,i) = work_fftx(i,k,j)
1262	ENDDO
1263	ENDDO
1264	ENDDO
1265	!$OMP END PARALLEL
1266
1267	ENDIF
1268	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1269
1270	END SUBROUTINE tr_yx_fftx
1271
1272
1273	SUBROUTINE ffty_tri_ffty( ar )
1274
1275	!------------------------------------------------------------------------------!
1276	! FFT along y, solution of the tridiagonal system and backward FFT for
1277	! a 1d-decomposition along y
1278	!
1279	! WARNING: this subroutine may still not work for hybrid parallelization
1280	! with OpenMP (for possible necessary changes see the original
1281	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1282	!------------------------------------------------------------------------------!
1283
1284	USE control_parameters
1285	USE cpulog
1286	USE grid_variables
1287	USE indices
1288	USE interfaces
1289	USE pegrid
1290	USE transpose_indices
1291
1292	IMPLICIT NONE
1293
1294	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1295
1296	REAL, DIMENSION(0:ny) :: work_ffty
1297	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1298	REAL, DIMENSION(nny,1:nz,nxl_y:nxr_y,pdims(2)) :: ar
1299	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1300
1301
1302	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1303
1304	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1305
1306	tn = 0 ! Default thread number in case of one thread
1307	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1308	DO i = nxl_y, nxr_y
1309
1310	!$ tn = omp_get_thread_num()
1311
1312	IF ( host(1:3) == 'nec' ) THEN
1313	!
1314	!-- Code optimized for vector processors
1315	DO k = 1, nz
1316
1317	m = 0
1318	DO n = 1, pdims(2)
1319	DO j = 1, nny
1320	work_triy(m,k) = ar(j,k,i,n)
1321	m = m + 1
1322	ENDDO
1323	ENDDO
1324
1325	ENDDO
1326
1327	CALL fft_y_m( work_triy, ny, 'forward' )
1328
1329	ELSE
1330	!
1331	!-- Cache optimized code
1332	DO k = 1, nz
1333
1334	m = 0
1335	DO n = 1, pdims(2)
1336	DO j = 1, nny
1337	work_ffty(m) = ar(j,k,i,n)
1338	m = m + 1
1339	ENDDO
1340	ENDDO
1341
1342	CALL fft_y( work_ffty, 'forward' )
1343
1344	DO j = 0, ny
1345	work_triy(j,k) = work_ffty(j)
1346	ENDDO
1347
1348	ENDDO
1349
1350	ENDIF
1351
1352	!
1353	!-- Solve the linear equation system
1354	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1355
1356	IF ( host(1:3) == 'nec' ) THEN
1357	!
1358	!-- Code optimized for vector processors
1359	CALL fft_y_m( work_triy, ny, 'backward' )
1360
1361	DO k = 1, nz
1362
1363	m = 0
1364	DO n = 1, pdims(2)
1365	DO j = 1, nny
1366	ar(j,k,i,n) = work_triy(m,k)
1367	m = m + 1
1368	ENDDO
1369	ENDDO
1370
1371	ENDDO
1372
1373	ELSE
1374	!
1375	!-- Cache optimized code
1376	DO k = 1, nz
1377
1378	DO j = 0, ny
1379	work_ffty(j) = work_triy(j,k)
1380	ENDDO
1381
1382	CALL fft_y( work_ffty, 'backward' )
1383
1384	m = 0
1385	DO n = 1, pdims(2)
1386	DO j = 1, nny
1387	ar(j,k,i,n) = work_ffty(m)
1388	m = m + 1
1389	ENDDO
1390	ENDDO
1391
1392	ENDDO
1393
1394	ENDIF
1395
1396	ENDDO
1397
1398	DEALLOCATE( tri )
1399
1400	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1401
1402	END SUBROUTINE ffty_tri_ffty
1403
1404
1405	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1406
1407	!------------------------------------------------------------------------------!
1408	! Solves the linear system of equations for a 1d-decomposition along x (see
1409	! tridia)
1410	!
1411	! Attention: when using the intel compilers older than 12.0, array tri must
1412	! be passed as an argument to the contained subroutines. Otherwise
1413	! addres faults will occur. This feature can be activated with
1414	! cpp-switch __intel11
1415	! On NEC, tri should not be passed (except for routine substi_1dd)
1416	! because this causes very bad performance.
1417	!------------------------------------------------------------------------------!
1418
1419	USE arrays_3d
1420	USE control_parameters
1421
1422	USE pegrid
1423
1424	IMPLICIT NONE
1425
1426	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1427
1428	REAL :: ddx2, ddy2
1429
1430	REAL, DIMENSION(0:nx,1:nz) :: ar
1431	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1432
1433
1434	nnyh = ( ny + 1 ) / 2
1435
1436	!
1437	!-- Define constant elements of the tridiagonal matrix.
1438	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1439	!-- the exchanged loops create bank conflicts. The following directive
1440	!-- prohibits loop exchange and the loops perform much better.
1441	! tn = omp_get_thread_num()
1442	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1443	! CALL local_flush( 120+tn )
1444	!CDIR NOLOOPCHG
1445	DO k = 0, nz-1
1446	DO i = 0,nx
1447	tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
1448	tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
1449	ENDDO
1450	ENDDO
1451	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1452	! CALL local_flush( 120+tn )
1453
1454	IF ( j <= nnyh ) THEN
1455	#if defined( __intel11 )
1456	CALL maketri_1dd( j, tri )
1457	#else
1458	CALL maketri_1dd( j )
1459	#endif
1460	ELSE
1461	#if defined( __intel11 )
1462	CALL maketri_1dd( ny+1-j, tri )
1463	#else
1464	CALL maketri_1dd( ny+1-j )
1465	#endif
1466	ENDIF
1467	#if defined( __intel11 )
1468	CALL split_1dd( tri )
1469	#else
1470	CALL split_1dd
1471	#endif
1472	CALL substi_1dd( ar, tri )
1473
1474	CONTAINS
1475
1476	#if defined( __intel11 )
1477	SUBROUTINE maketri_1dd( j, tri )
1478	#else
1479	SUBROUTINE maketri_1dd( j )
1480	#endif
1481
1482	!------------------------------------------------------------------------------!
1483	! computes the i- and j-dependent component of the matrix
1484	!------------------------------------------------------------------------------!
1485
1486	USE constants
1487
1488	IMPLICIT NONE
1489
1490	INTEGER :: i, j, k, nnxh
1491	REAL :: a, c
1492
1493	REAL, DIMENSION(0:nx) :: l
1494
1495	#if defined( __intel11 )
1496	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1497	#endif
1498
1499
1500	nnxh = ( nx + 1 ) / 2
1501	!
1502	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1503	!-- Fourier space. The coefficients are computed following the method of
1504	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1505	!-- Siano's original version by discretizing the Poisson equation,
1506	!-- before it is Fourier-transformed
1507	DO i = 0, nx
1508	IF ( i >= 0 .AND. i <= nnxh ) THEN
1509	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1510	REAL( nx+1 ) ) ) * ddx2 + &
1511	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1512	REAL( ny+1 ) ) ) * ddy2
1513	ELSE
1514	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1515	REAL( nx+1 ) ) ) * ddx2 + &
1516	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1517	REAL( ny+1 ) ) ) * ddy2
1518	ENDIF
1519	ENDDO
1520
1521	DO k = 0, nz-1
1522	DO i = 0, nx
1523	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
1524	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
1525	tri(1,i,k) = a + c - l(i)
1526	ENDDO
1527	ENDDO
1528	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1529	DO i = 0, nx
1530	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1531	ENDDO
1532	ENDIF
1533	IF ( ibc_p_t == 1 ) THEN
1534	DO i = 0, nx
1535	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1536	ENDDO
1537	ENDIF
1538
1539	END SUBROUTINE maketri_1dd
1540
1541
1542	#if defined( __intel11 )
1543	SUBROUTINE split_1dd( tri )
1544	#else
1545	SUBROUTINE split_1dd
1546	#endif
1547
1548	!------------------------------------------------------------------------------!
1549	! Splitting of the tridiagonal matrix (Thomas algorithm)
1550	!------------------------------------------------------------------------------!
1551
1552	IMPLICIT NONE
1553
1554	INTEGER :: i, k
1555
1556	#if defined( __intel11 )
1557	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1558	#endif
1559
1560
1561	!
1562	!-- Splitting
1563	DO i = 0, nx
1564	tri(4,i,0) = tri(1,i,0)
1565	ENDDO
1566	DO k = 1, nz-1
1567	DO i = 0, nx
1568	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1569	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1570	ENDDO
1571	ENDDO
1572
1573	END SUBROUTINE split_1dd
1574
1575
1576	SUBROUTINE substi_1dd( ar, tri )
1577
1578	!------------------------------------------------------------------------------!
1579	! Substitution (Forward and Backward) (Thomas algorithm)
1580	!------------------------------------------------------------------------------!
1581
1582	IMPLICIT NONE
1583
1584	INTEGER :: i, k
1585
1586	REAL, DIMENSION(0:nx,nz) :: ar
1587	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1588	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1589
1590	!
1591	!-- Forward substitution
1592	DO i = 0, nx
1593	ar1(i,0) = ar(i,1)
1594	ENDDO
1595	DO k = 1, nz-1
1596	DO i = 0, nx
1597	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1598	ENDDO
1599	ENDDO
1600
1601	!
1602	!-- Backward substitution
1603	!-- Note, the add of 1.0E-20 in the denominator is due to avoid divisions
1604	!-- by zero appearing if the pressure bc is set to neumann at the top of
1605	!-- the model domain.
1606	DO i = 0, nx
1607	ar(i,nz) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )
1608	ENDDO
1609	DO k = nz-2, 0, -1
1610	DO i = 0, nx
1611	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1612	/ tri(4,i,k)
1613	ENDDO
1614	ENDDO
1615
1616	!
1617	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
1618	!-- The respective values of ar should be zero at all k-levels if
1619	!-- acceleration of horizontally averaged vertical velocity is zero.
1620	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
1621	IF ( j == 0 ) THEN
1622	DO k = 1, nz
1623	ar(0,k) = 0.0
1624	ENDDO
1625	ENDIF
1626	ENDIF
1627
1628	END SUBROUTINE substi_1dd
1629
1630	END SUBROUTINE tridia_1dd
1631
1632	#endif
1633	#endif
1634	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |