Home

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 1036

Last change on this file since 1036 was 1036, checked in by raasch, 11 years ago
code has been put under the GNU General Public License (v3)
Property svn:keywords set to `Id`
File size: 47.6 KB

Line
1	MODULE poisfft_mod
2
3	!--------------------------------------------------------------------------------!
4	! This file is part of PALM.
5	!
6	! PALM is free software: you can redistribute it and/or modify it under the terms
7	! of the GNU General Public License as published by the Free Software Foundation,
8	! either version 3 of the License, or (at your option) any later version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2012 Leibniz University Hannover
18	!--------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! -----------------
22	!
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: poisfft.f90 1036 2012-10-22 13:43:42Z raasch $
27	!
28	! 2012-09-21 07:03:55Z raasch
29	! FLOAT type conversion replaced by REAL
30	!
31	! 1003 2012-09-14 14:35:53Z raasch
32	! indices nxa, nya, etc. replaced by nx, ny, etc.
33	!
34	! 940 2012-07-09 14:31:00Z raasch
35	! special handling of tri-array as an argument in tridia_1dd routines switched
36	! off because it caused segmentation faults with intel 12.1 compiler
37	!
38	! 877 2012-04-03 11:21:44Z suehring
39	! Bugfix: Avoid divisions by zero in case of using a 'neumann' bc for the
40	! pressure at the top of the model domain.
41	!
42	! 809 2012-01-30 13:32:58Z maronga
43	! Bugfix: replaced .AND. and .NOT. with && and ! in the preprocessor directives
44	!
45	! 807 2012-01-25 11:53:51Z maronga
46	! New cpp directive "__check" implemented which is used by check_namelist_files
47	! (most of the code is unneeded by check_namelist_files).
48	!
49	! 763 2011-10-06 09:32:09Z suehring
50	! Comment added concerning the last change.
51	!
52	! 761 2011-10-05 17:58:52Z suehring
53	! Bugfix: Avoid divisions by zero in case of using a 'neumann' bc for the
54	! pressure at the top of the model domain.
55	!
56	! 696 2011-03-18 07:03:49Z raasch
57	! work_fftx removed from PRIVATE clauses in fftx_tr_xy and tr_yx_fftx
58	!
59	! 683 2011-02-09 14:25:15Z raasch
60	! openMP parallelization for 2d-domain-decomposition
61	!
62	! 667 2010-12-23 12:06:00Z suehring/gryschka
63	! ddzu replaced by ddzu_pres due to changes in zu(0)
64	!
65	! 622 2010-12-10 08:08:13Z raasch
66	! optional barriers included in order to speed up collective operations
67	!
68	! 377 2009-09-04 11:09:00Z raasch
69	! __lcmuk changed to __lc to avoid problems with Intel compiler on sgi-ice
70	!
71	! 164 2008-05-15 08:46:15Z raasch
72	! Arguments removed from transpose routines
73	!
74	! 128 2007-10-26 13:11:14Z raasch
75	! Bugfix: wavenumber calculation for even nx in routines maketri
76	!
77	! 85 2007-05-11 09:35:14Z raasch
78	! Bugfix: work_fft*_vec removed from some PRIVATE-declarations
79	!
80	! 76 2007-03-29 00:58:32Z raasch
81	! Tridiagonal coefficients adjusted for Neumann boundary conditions both at
82	! the bottom and the top.
83	!
84	! RCS Log replace by Id keyword, revision history cleaned up
85	!
86	! Revision 1.24 2006/08/04 15:00:24 raasch
87	! Default setting of the thread number tn in case of not using OpenMP
88	!
89	! Revision 1.23 2006/02/23 12:48:38 raasch
90	! Additional compiler directive in routine tridia_1dd for preventing loop
91	! exchange on NEC-SX6
92	!
93	! Revision 1.20 2004/04/30 12:38:09 raasch
94	! Parts of former poisfft_hybrid moved to this subroutine,
95	! former subroutine changed to a module, renaming of FFT-subroutines and
96	! -module, FFTs completely substituted by calls of fft_x and fft_y,
97	! NAG fft used in the non-parallel case completely removed, l in maketri
98	! is now a 1d-array, variables passed by modules instead of using parameter
99	! lists, enlarged transposition arrays introduced
100	!
101	! Revision 1.1 1997/07/24 11:24:14 raasch
102	! Initial revision
103	!
104	!
105	! Description:
106	! ------------
107	! See below.
108	!------------------------------------------------------------------------------!
109
110	!--------------------------------------------------------------------------!
111	! poisfft !
112	! !
113	! Original version: Stephan Siano (pois3d) !
114	! !
115	! Institute of Meteorology and Climatology, University of Hannover !
116	! Germany !
117	! !
118	! Version as of July 23,1996 !
119	! !
120	! !
121	! Version for parallel computers: Siegfried Raasch !
122	! !
123	! Version as of July 03,1997 !
124	! !
125	! Solves the Poisson equation with a 2D spectral method !
126	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
127	! !
128	! Input: !
129	! real ar contains in the (nnx,nny,nnz) elements, !
130	! starting from the element (1,nys,nxl), the !
131	! values for s !
132	! real work Temporary array !
133	! !
134	! Output: !
135	! real ar contains the solution for p !
136	!--------------------------------------------------------------------------!
137
138	USE fft_xy
139	USE indices
140	USE transpose_indices
141
142	IMPLICIT NONE
143
144	PRIVATE
145
146	#if ! defined ( __check )
147	PUBLIC poisfft, poisfft_init
148
149	INTERFACE poisfft
150	MODULE PROCEDURE poisfft
151	END INTERFACE poisfft
152
153	INTERFACE poisfft_init
154	MODULE PROCEDURE poisfft_init
155	END INTERFACE poisfft_init
156	#else
157	PUBLIC poisfft_init
158
159	INTERFACE poisfft_init
160	MODULE PROCEDURE poisfft_init
161	END INTERFACE poisfft_init
162	#endif
163
164	CONTAINS
165
166	SUBROUTINE poisfft_init
167
168	CALL fft_init
169
170	END SUBROUTINE poisfft_init
171
172	#if ! defined ( __check )
173	SUBROUTINE poisfft( ar, work )
174
175	USE cpulog
176	USE interfaces
177	USE pegrid
178
179	IMPLICIT NONE
180
181	REAL, DIMENSION(1:nz,nys:nyn,nxl:nxr) :: ar, work
182
183
184	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
185
186	!
187	!-- Two-dimensional Fourier Transformation in x- and y-direction.
188	#if defined( __parallel )
189	IF ( pdims(2) == 1 ) THEN
190
191	!
192	!-- 1d-domain-decomposition along x:
193	!-- FFT along y and transposition y --> x
194	CALL ffty_tr_yx( ar, work, ar )
195
196	!
197	!-- FFT along x, solving the tridiagonal system and backward FFT
198	CALL fftx_tri_fftx( ar )
199
200	!
201	!-- Transposition x --> y and backward FFT along y
202	CALL tr_xy_ffty( ar, work, ar )
203
204	ELSEIF ( pdims(1) == 1 ) THEN
205
206	!
207	!-- 1d-domain-decomposition along y:
208	!-- FFT along x and transposition x --> y
209	CALL fftx_tr_xy( ar, work, ar )
210
211	!
212	!-- FFT along y, solving the tridiagonal system and backward FFT
213	CALL ffty_tri_ffty( ar )
214
215	!
216	!-- Transposition y --> x and backward FFT along x
217	CALL tr_yx_fftx( ar, work, ar )
218
219	ELSE
220
221	!
222	!-- 2d-domain-decomposition
223	!-- Transposition z --> x
224	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
225	CALL transpose_zx( ar, work, ar )
226	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
227
228	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
229	CALL fftxp( ar, 'forward' )
230	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
231
232	!
233	!-- Transposition x --> y
234	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
235	CALL transpose_xy( ar, work, ar )
236	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
237
238	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
239	CALL fftyp( ar, 'forward' )
240	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
241
242	!
243	!-- Transposition y --> z
244	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
245	CALL transpose_yz( ar, work, ar )
246	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
247
248	!
249	!-- Solve the Poisson equation in z-direction in cartesian space.
250	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
251	CALL tridia( ar )
252	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
253
254	!
255	!-- Inverse Fourier Transformation
256	!-- Transposition z --> y
257	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
258	CALL transpose_zy( ar, work, ar )
259	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
260
261	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
262	CALL fftyp( ar, 'backward' )
263	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
264
265	!
266	!-- Transposition y --> x
267	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
268	CALL transpose_yx( ar, work, ar )
269	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
270
271	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
272	CALL fftxp( ar, 'backward' )
273	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
274
275	!
276	!-- Transposition x --> z
277	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
278	CALL transpose_xz( ar, work, ar )
279	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
280
281	ENDIF
282
283	#else
284
285	!
286	!-- Two-dimensional Fourier Transformation along x- and y-direction.
287	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
288	CALL fftx( ar, 'forward' )
289	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
290	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
291	CALL ffty( ar, 'forward' )
292	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
293
294	!
295	!-- Solve the Poisson equation in z-direction in cartesian space.
296	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
297	CALL tridia( ar )
298	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
299
300	!
301	!-- Inverse Fourier Transformation.
302	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
303	CALL ffty( ar, 'backward' )
304	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
305	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
306	CALL fftx( ar, 'backward' )
307	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
308
309	#endif
310
311	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
312
313	END SUBROUTINE poisfft
314
315
316
317	SUBROUTINE tridia( ar )
318
319	!------------------------------------------------------------------------------!
320	! solves the linear system of equations:
321	!
322	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
323	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
324	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
325	!
326	! by using the Thomas algorithm
327	!------------------------------------------------------------------------------!
328
329	USE arrays_3d
330
331	IMPLICIT NONE
332
333	INTEGER :: i, j, k, nnyh
334
335	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
336	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
337
338	#if defined( __parallel )
339	REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)
340	#else
341	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
342	#endif
343
344
345	nnyh = (ny+1) / 2
346
347	!
348	!-- Define constant elements of the tridiagonal matrix.
349	!$OMP PARALLEL PRIVATE ( k, i )
350	!$OMP DO
351	DO k = 0, nz-1
352	DO i = nxl_z, nxr_z
353	tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
354	tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
355	ENDDO
356	ENDDO
357	!$OMP END PARALLEL
358
359	#if defined( __parallel )
360	!
361	!-- Repeat for all y-levels.
362	!$OMP PARALLEL FIRSTPRIVATE( tri ) PRIVATE ( ar1, j )
363	!$OMP DO
364	DO j = nys_z, nyn_z
365	IF ( j <= nnyh ) THEN
366	CALL maketri( tri, j )
367	ELSE
368	CALL maketri( tri, ny+1-j )
369	ENDIF
370	CALL split( tri )
371	CALL substi( ar, ar1, tri, j )
372	ENDDO
373	!$OMP END PARALLEL
374	#else
375	!
376	!-- First y-level.
377	CALL maketri( tri, nys_z )
378	CALL split( tri )
379	CALL substi( ar, ar1, tri, 0 )
380
381	!
382	!-- Further y-levels.
383	DO j = 1, nnyh - 1
384	CALL maketri( tri, j )
385	CALL split( tri )
386	CALL substi( ar, ar1, tri, j )
387	CALL substi( ar, ar1, tri, ny+1-j )
388	ENDDO
389	CALL maketri( tri, nnyh )
390	CALL split( tri )
391	CALL substi( ar, ar1, tri, nnyh+nys )
392	#endif
393
394	CONTAINS
395
396	SUBROUTINE maketri( tri, j )
397
398	!------------------------------------------------------------------------------!
399	! Computes the i- and j-dependent component of the matrix
400	!------------------------------------------------------------------------------!
401
402	USE arrays_3d
403	USE constants
404	USE control_parameters
405	USE grid_variables
406
407	IMPLICIT NONE
408
409	INTEGER :: i, j, k, nnxh
410	REAL :: a, c
411	REAL :: ll(nxl_z:nxr_z)
412	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
413
414
415	nnxh = ( nx + 1 ) / 2
416
417	!
418	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
419	!-- Fourier space. The coefficients are computed following the method of
420	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
421	!-- Siano's original version by discretizing the Poisson equation,
422	!-- before it is Fourier-transformed
423	#if defined( __parallel )
424	DO i = nxl_z, nxr_z
425	IF ( i >= 0 .AND. i <= nnxh ) THEN
426	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
427	REAL( nx+1 ) ) ) / ( dx * dx ) + &
428	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
429	REAL( ny+1 ) ) ) / ( dy * dy )
430	ELSE
431	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
432	REAL( nx+1 ) ) ) / ( dx * dx ) + &
433	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
434	REAL( ny+1 ) ) ) / ( dy * dy )
435	ENDIF
436	DO k = 0,nz-1
437	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
438	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
439	tri(1,i,k) = a + c - ll(i)
440	ENDDO
441	ENDDO
442	#else
443	DO i = 0, nnxh
444	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / REAL( nx+1 ) ) ) / &
445	( dx * dx ) + &
446	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / REAL( ny+1 ) ) ) / &
447	( dy * dy )
448	DO k = 0, nz-1
449	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
450	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
451	tri(1,i,k) = a + c - ll(i)
452	IF ( i >= 1 .and. i < nnxh ) THEN
453	tri(1,nx+1-i,k) = tri(1,i,k)
454	ENDIF
455	ENDDO
456	ENDDO
457	#endif
458	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
459	DO i = nxl_z, nxr_z
460	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
461	ENDDO
462	ENDIF
463	IF ( ibc_p_t == 1 ) THEN
464	DO i = nxl_z, nxr_z
465	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
466	ENDDO
467	ENDIF
468
469	END SUBROUTINE maketri
470
471
472	SUBROUTINE substi( ar, ar1, tri, j )
473
474	!------------------------------------------------------------------------------!
475	! Substitution (Forward and Backward) (Thomas algorithm)
476	!------------------------------------------------------------------------------!
477
478	USE control_parameters
479
480	IMPLICIT NONE
481
482	INTEGER :: i, j, k
483	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
484	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
485	#if defined( __parallel )
486	REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)
487	#else
488	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
489	#endif
490
491	!
492	!-- Forward substitution.
493	DO i = nxl_z, nxr_z
494	#if defined( __parallel )
495	ar1(i,0) = ar(i,j,1)
496	#else
497	ar1(i,0) = ar(1,j,i)
498	#endif
499	ENDDO
500	DO k = 1, nz - 1
501	DO i = nxl_z, nxr_z
502	#if defined( __parallel )
503	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
504	#else
505	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
506	#endif
507	ENDDO
508	ENDDO
509
510	!
511	!-- Backward substitution
512	!-- Note, the 1.0E-20 in the denominator is due to avoid divisions
513	!-- by zero appearing if the pressure bc is set to neumann at the top of
514	!-- the model domain.
515	DO i = nxl_z, nxr_z
516	#if defined( __parallel )
517	ar(i,j,nz) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )
518	#else
519	ar(nz,j,i) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )
520	#endif
521	ENDDO
522	DO k = nz-2, 0, -1
523	DO i = nxl_z, nxr_z
524	#if defined( __parallel )
525	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
526	/ tri(4,i,k)
527	#else
528	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
529	/ tri(4,i,k)
530	#endif
531	ENDDO
532	ENDDO
533
534	!
535	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
536	!-- The respective values of ar should be zero at all k-levels if
537	!-- acceleration of horizontally averaged vertical velocity is zero.
538	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
539	IF ( j == 0 .AND. nxl_z == 0 ) THEN
540	#if defined( __parallel )
541	DO k = 1, nz
542	ar(nxl_z,j,k) = 0.0
543	ENDDO
544	#else
545	DO k = 1, nz
546	ar(k,j,nxl_z) = 0.0
547	ENDDO
548	#endif
549	ENDIF
550	ENDIF
551
552	END SUBROUTINE substi
553
554
555	SUBROUTINE split( tri )
556
557	!------------------------------------------------------------------------------!
558	! Splitting of the tridiagonal matrix (Thomas algorithm)
559	!------------------------------------------------------------------------------!
560
561	IMPLICIT NONE
562
563	INTEGER :: i, k
564	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
565
566	!
567	!-- Splitting.
568	DO i = nxl_z, nxr_z
569	tri(4,i,0) = tri(1,i,0)
570	ENDDO
571	DO k = 1, nz-1
572	DO i = nxl_z, nxr_z
573	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
574	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
575	ENDDO
576	ENDDO
577
578	END SUBROUTINE split
579
580	END SUBROUTINE tridia
581
582
583	#if defined( __parallel )
584	SUBROUTINE fftxp( ar, direction )
585
586	!------------------------------------------------------------------------------!
587	! Fourier-transformation along x-direction Parallelized version
588	!------------------------------------------------------------------------------!
589
590	IMPLICIT NONE
591
592	CHARACTER (LEN=*) :: direction
593	INTEGER :: j, k
594	REAL :: ar(0:nx,nys_x:nyn_x,nzb_x:nzt_x)
595
596	!
597	!-- Performing the fft with one of the methods implemented
598	!$OMP PARALLEL PRIVATE ( j, k )
599	!$OMP DO
600	DO k = nzb_x, nzt_x
601	DO j = nys_x, nyn_x
602	CALL fft_x( ar(0:nx,j,k), direction )
603	ENDDO
604	ENDDO
605	!$OMP END PARALLEL
606
607	END SUBROUTINE fftxp
608
609	#else
610	SUBROUTINE fftx( ar, direction )
611
612	!------------------------------------------------------------------------------!
613	! Fourier-transformation along x-direction Non parallel version
614	!------------------------------------------------------------------------------!
615
616	IMPLICIT NONE
617
618	CHARACTER (LEN=*) :: direction
619	INTEGER :: i, j, k
620	REAL :: ar(1:nz,0:ny,0:nx)
621
622	!
623	!-- Performing the fft with one of the methods implemented
624	!$OMP PARALLEL PRIVATE ( j, k )
625	!$OMP DO
626	DO k = 1, nz
627	DO j = 0, ny
628	CALL fft_x( ar(k,j,0:nx), direction )
629	ENDDO
630	ENDDO
631	!$OMP END PARALLEL
632
633	END SUBROUTINE fftx
634	#endif
635
636
637	#if defined( __parallel )
638	SUBROUTINE fftyp( ar, direction )
639
640	!------------------------------------------------------------------------------!
641	! Fourier-transformation along y-direction Parallelized version
642	!------------------------------------------------------------------------------!
643
644	IMPLICIT NONE
645
646	CHARACTER (LEN=*) :: direction
647	INTEGER :: i, k
648	REAL :: ar(0:ny,nxl_y:nxr_y,nzb_y:nzt_y)
649
650	!
651	!-- Performing the fft with one of the methods implemented
652	!$OMP PARALLEL PRIVATE ( i, k )
653	!$OMP DO
654	DO k = nzb_y, nzt_y
655	DO i = nxl_y, nxr_y
656	CALL fft_y( ar(0:ny,i,k), direction )
657	ENDDO
658	ENDDO
659	!$OMP END PARALLEL
660
661	END SUBROUTINE fftyp
662
663	#else
664	SUBROUTINE ffty( ar, direction )
665
666	!------------------------------------------------------------------------------!
667	! Fourier-transformation along y-direction Non parallel version
668	!------------------------------------------------------------------------------!
669
670	IMPLICIT NONE
671
672	CHARACTER (LEN=*) :: direction
673	INTEGER :: i, k
674	REAL :: ar(1:nz,0:ny,0:nx)
675
676	!
677	!-- Performing the fft with one of the methods implemented
678	!$OMP PARALLEL PRIVATE ( i, k )
679	!$OMP DO
680	DO k = 1, nz
681	DO i = 0, nx
682	CALL fft_y( ar(k,0:ny,i), direction )
683	ENDDO
684	ENDDO
685	!$OMP END PARALLEL
686
687	END SUBROUTINE ffty
688	#endif
689
690	#if defined( __parallel )
691	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
692
693	!------------------------------------------------------------------------------!
694	! Fourier-transformation along y with subsequent transposition y --> x for
695	! a 1d-decomposition along x
696	!
697	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
698	! if the first index of work_ffty_vec is odd. Otherwise
699	! memory bank conflicts may occur (especially if the index is a
700	! multiple of 128). That's why work_ffty_vec is dimensioned as
701	! 0:ny+1.
702	! Of course, this will not work if users are using an odd number
703	! of gridpoints along y.
704	!------------------------------------------------------------------------------!
705
706	USE control_parameters
707	USE cpulog
708	USE indices
709	USE interfaces
710	USE pegrid
711	USE transpose_indices
712
713	IMPLICIT NONE
714
715	INTEGER :: i, iend, iouter, ir, j, k
716	INTEGER, PARAMETER :: stridex = 4
717
718	REAL, DIMENSION(0:ny,stridex) :: work_ffty
719	#if defined( __nec )
720	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
721	#endif
722	REAL, DIMENSION(1:nz,0:ny,nxl:nxr) :: f_in
723	REAL, DIMENSION(nnx,1:nz,nys_x:nyn_x,pdims(1)) :: f_out
724	REAL, DIMENSION(nxl:nxr,1:nz,0:ny) :: work
725
726	!
727	!-- Carry out the FFT along y, where all data are present due to the
728	!-- 1d-decomposition along x. Resort the data in a way that x becomes
729	!-- the first index.
730	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
731
732	IF ( host(1:3) == 'nec' ) THEN
733	#if defined( __nec )
734	!
735	!-- Code optimized for vector processors
736	!$OMP PARALLEL PRIVATE ( i, j, k )
737	!$OMP DO
738	DO i = nxl, nxr
739
740	DO j = 0, ny
741	DO k = 1, nz
742	work_ffty_vec(j,k,i) = f_in(k,j,i)
743	ENDDO
744	ENDDO
745
746	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
747
748	ENDDO
749
750	!$OMP DO
751	DO k = 1, nz
752	DO j = 0, ny
753	DO i = nxl, nxr
754	work(i,k,j) = work_ffty_vec(j,k,i)
755	ENDDO
756	ENDDO
757	ENDDO
758	!$OMP END PARALLEL
759	#endif
760
761	ELSE
762
763	!
764	!-- Cache optimized code.
765	!-- The i-(x-)direction is split into a strided outer loop and an inner
766	!-- loop for better cache performance
767	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
768	!$OMP DO
769	DO iouter = nxl, nxr, stridex
770
771	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
772
773	DO k = 1, nz
774
775	DO i = iouter, iend
776
777	ir = i-iouter+1 ! counter within a stride
778	DO j = 0, ny
779	work_ffty(j,ir) = f_in(k,j,i)
780	ENDDO
781	!
782	!-- FFT along y
783	CALL fft_y( work_ffty(:,ir), 'forward' )
784
785	ENDDO
786
787	!
788	!-- Resort
789	DO j = 0, ny
790	DO i = iouter, iend
791	work(i,k,j) = work_ffty(j,i-iouter+1)
792	ENDDO
793	ENDDO
794
795	ENDDO
796
797	ENDDO
798	!$OMP END PARALLEL
799
800	ENDIF
801	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
802
803	!
804	!-- Transpose array
805	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
806	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
807	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
808	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
809	comm1dx, ierr )
810	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
811
812	END SUBROUTINE ffty_tr_yx
813
814
815	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
816
817	!------------------------------------------------------------------------------!
818	! Transposition x --> y with a subsequent backward Fourier transformation for
819	! a 1d-decomposition along x
820	!------------------------------------------------------------------------------!
821
822	USE control_parameters
823	USE cpulog
824	USE indices
825	USE interfaces
826	USE pegrid
827	USE transpose_indices
828
829	IMPLICIT NONE
830
831	INTEGER :: i, iend, iouter, ir, j, k
832	INTEGER, PARAMETER :: stridex = 4
833
834	REAL, DIMENSION(0:ny,stridex) :: work_ffty
835	#if defined( __nec )
836	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
837	#endif
838	REAL, DIMENSION(nnx,1:nz,nys_x:nyn_x,pdims(1)) :: f_in
839	REAL, DIMENSION(1:nz,0:ny,nxl:nxr) :: f_out
840	REAL, DIMENSION(nxl:nxr,1:nz,0:ny) :: work
841
842	!
843	!-- Transpose array
844	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
845	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
846	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
847	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
848	comm1dx, ierr )
849	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
850
851	!
852	!-- Resort the data in a way that y becomes the first index and carry out the
853	!-- backward fft along y.
854	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
855
856	IF ( host(1:3) == 'nec' ) THEN
857	#if defined( __nec )
858	!
859	!-- Code optimized for vector processors
860	!$OMP PARALLEL PRIVATE ( i, j, k )
861	!$OMP DO
862	DO k = 1, nz
863	DO j = 0, ny
864	DO i = nxl, nxr
865	work_ffty_vec(j,k,i) = work(i,k,j)
866	ENDDO
867	ENDDO
868	ENDDO
869
870	!$OMP DO
871	DO i = nxl, nxr
872
873	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
874
875	DO j = 0, ny
876	DO k = 1, nz
877	f_out(k,j,i) = work_ffty_vec(j,k,i)
878	ENDDO
879	ENDDO
880
881	ENDDO
882	!$OMP END PARALLEL
883	#endif
884
885	ELSE
886
887	!
888	!-- Cache optimized code.
889	!-- The i-(x-)direction is split into a strided outer loop and an inner
890	!-- loop for better cache performance
891	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
892	!$OMP DO
893	DO iouter = nxl, nxr, stridex
894
895	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
896
897	DO k = 1, nz
898	!
899	!-- Resort
900	DO j = 0, ny
901	DO i = iouter, iend
902	work_ffty(j,i-iouter+1) = work(i,k,j)
903	ENDDO
904	ENDDO
905
906	DO i = iouter, iend
907
908	!
909	!-- FFT along y
910	ir = i-iouter+1 ! counter within a stride
911	CALL fft_y( work_ffty(:,ir), 'backward' )
912
913	DO j = 0, ny
914	f_out(k,j,i) = work_ffty(j,ir)
915	ENDDO
916	ENDDO
917
918	ENDDO
919
920	ENDDO
921	!$OMP END PARALLEL
922
923	ENDIF
924
925	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
926
927	END SUBROUTINE tr_xy_ffty
928
929
930	SUBROUTINE fftx_tri_fftx( ar )
931
932	!------------------------------------------------------------------------------!
933	! FFT along x, solution of the tridiagonal system and backward FFT for
934	! a 1d-decomposition along x
935	!
936	! WARNING: this subroutine may still not work for hybrid parallelization
937	! with OpenMP (for possible necessary changes see the original
938	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
939	!------------------------------------------------------------------------------!
940
941	USE control_parameters
942	USE cpulog
943	USE grid_variables
944	USE indices
945	USE interfaces
946	USE pegrid
947	USE transpose_indices
948
949	IMPLICIT NONE
950
951	character(len=3) :: myth_char
952
953	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
954
955	REAL, DIMENSION(0:nx) :: work_fftx
956	REAL, DIMENSION(0:nx,1:nz) :: work_trix
957	REAL, DIMENSION(nnx,1:nz,nys_x:nyn_x,pdims(1)) :: ar
958	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
959
960
961	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
962
963	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
964
965	tn = 0 ! Default thread number in case of one thread
966	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
967	DO j = nys_x, nyn_x
968
969	!$ tn = omp_get_thread_num()
970
971	IF ( host(1:3) == 'nec' ) THEN
972	!
973	!-- Code optimized for vector processors
974	DO k = 1, nz
975
976	m = 0
977	DO n = 1, pdims(1)
978	DO i = 1, nnx
979	work_trix(m,k) = ar(i,k,j,n)
980	m = m + 1
981	ENDDO
982	ENDDO
983
984	ENDDO
985
986	CALL fft_x_m( work_trix, 'forward' )
987
988	ELSE
989	!
990	!-- Cache optimized code
991	DO k = 1, nz
992
993	m = 0
994	DO n = 1, pdims(1)
995	DO i = 1, nnx
996	work_fftx(m) = ar(i,k,j,n)
997	m = m + 1
998	ENDDO
999	ENDDO
1000
1001	CALL fft_x( work_fftx, 'forward' )
1002
1003	DO i = 0, nx
1004	work_trix(i,k) = work_fftx(i)
1005	ENDDO
1006
1007	ENDDO
1008
1009	ENDIF
1010
1011	!
1012	!-- Solve the linear equation system
1013	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
1014
1015	IF ( host(1:3) == 'nec' ) THEN
1016	!
1017	!-- Code optimized for vector processors
1018	CALL fft_x_m( work_trix, 'backward' )
1019
1020	DO k = 1, nz
1021
1022	m = 0
1023	DO n = 1, pdims(1)
1024	DO i = 1, nnx
1025	ar(i,k,j,n) = work_trix(m,k)
1026	m = m + 1
1027	ENDDO
1028	ENDDO
1029
1030	ENDDO
1031
1032	ELSE
1033	!
1034	!-- Cache optimized code
1035	DO k = 1, nz
1036
1037	DO i = 0, nx
1038	work_fftx(i) = work_trix(i,k)
1039	ENDDO
1040
1041	CALL fft_x( work_fftx, 'backward' )
1042
1043	m = 0
1044	DO n = 1, pdims(1)
1045	DO i = 1, nnx
1046	ar(i,k,j,n) = work_fftx(m)
1047	m = m + 1
1048	ENDDO
1049	ENDDO
1050
1051	ENDDO
1052
1053	ENDIF
1054
1055	ENDDO
1056
1057	DEALLOCATE( tri )
1058
1059	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
1060
1061	END SUBROUTINE fftx_tri_fftx
1062
1063
1064	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
1065
1066	!------------------------------------------------------------------------------!
1067	! Fourier-transformation along x with subsequent transposition x --> y for
1068	! a 1d-decomposition along y
1069	!
1070	! ATTENTION: The NEC-branch of this routine may significantly profit from
1071	! further optimizations. So far, performance is much worse than
1072	! for routine ffty_tr_yx (more than three times slower).
1073	!------------------------------------------------------------------------------!
1074
1075	USE control_parameters
1076	USE cpulog
1077	USE indices
1078	USE interfaces
1079	USE pegrid
1080	USE transpose_indices
1081
1082	IMPLICIT NONE
1083
1084	INTEGER :: i, j, k
1085
1086	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1087	REAL, DIMENSION(1:nz,nys:nyn,0:nx) :: f_in
1088	REAL, DIMENSION(nny,1:nz,nxl_y:nxr_y,pdims(2)) :: f_out
1089	REAL, DIMENSION(nys:nyn,1:nz,0:nx) :: work
1090
1091	!
1092	!-- Carry out the FFT along x, where all data are present due to the
1093	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1094	!-- the first index.
1095	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
1096
1097	IF ( host(1:3) == 'nec' ) THEN
1098	!
1099	!-- Code for vector processors
1100	!$OMP PARALLEL PRIVATE ( i, j, k )
1101	!$OMP DO
1102	DO i = 0, nx
1103
1104	DO j = nys, nyn
1105	DO k = 1, nz
1106	work_fftx(i,k,j) = f_in(k,j,i)
1107	ENDDO
1108	ENDDO
1109
1110	ENDDO
1111
1112	!$OMP DO
1113	DO j = nys, nyn
1114
1115	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1116
1117	DO k = 1, nz
1118	DO i = 0, nx
1119	work(j,k,i) = work_fftx(i,k,j)
1120	ENDDO
1121	ENDDO
1122
1123	ENDDO
1124	!$OMP END PARALLEL
1125
1126	ELSE
1127
1128	!
1129	!-- Cache optimized code (there might be still a potential for better
1130	!-- optimization).
1131	!$OMP PARALLEL PRIVATE (i,j,k)
1132	!$OMP DO
1133	DO i = 0, nx
1134
1135	DO j = nys, nyn
1136	DO k = 1, nz
1137	work_fftx(i,k,j) = f_in(k,j,i)
1138	ENDDO
1139	ENDDO
1140
1141	ENDDO
1142
1143	!$OMP DO
1144	DO j = nys, nyn
1145	DO k = 1, nz
1146
1147	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1148
1149	DO i = 0, nx
1150	work(j,k,i) = work_fftx(i,k,j)
1151	ENDDO
1152	ENDDO
1153
1154	ENDDO
1155	!$OMP END PARALLEL
1156
1157	ENDIF
1158	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1159
1160	!
1161	!-- Transpose array
1162	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1163	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1164	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1165	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1166	comm1dy, ierr )
1167	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1168
1169	END SUBROUTINE fftx_tr_xy
1170
1171
1172	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1173
1174	!------------------------------------------------------------------------------!
1175	! Transposition y --> x with a subsequent backward Fourier transformation for
1176	! a 1d-decomposition along x
1177	!------------------------------------------------------------------------------!
1178
1179	USE control_parameters
1180	USE cpulog
1181	USE indices
1182	USE interfaces
1183	USE pegrid
1184	USE transpose_indices
1185
1186	IMPLICIT NONE
1187
1188	INTEGER :: i, j, k
1189
1190	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1191	REAL, DIMENSION(nny,1:nz,nxl_y:nxr_y,pdims(2)) :: f_in
1192	REAL, DIMENSION(1:nz,nys:nyn,0:nx) :: f_out
1193	REAL, DIMENSION(nys:nyn,1:nz,0:nx) :: work
1194
1195	!
1196	!-- Transpose array
1197	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1198	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1199	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1200	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1201	comm1dy, ierr )
1202	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1203
1204	!
1205	!-- Carry out the FFT along x, where all data are present due to the
1206	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1207	!-- the first index.
1208	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1209
1210	IF ( host(1:3) == 'nec' ) THEN
1211	!
1212	!-- Code optimized for vector processors
1213	!$OMP PARALLEL PRIVATE ( i, j, k )
1214	!$OMP DO
1215	DO j = nys, nyn
1216
1217	DO k = 1, nz
1218	DO i = 0, nx
1219	work_fftx(i,k,j) = work(j,k,i)
1220	ENDDO
1221	ENDDO
1222
1223	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1224
1225	ENDDO
1226
1227	!$OMP DO
1228	DO i = 0, nx
1229	DO j = nys, nyn
1230	DO k = 1, nz
1231	f_out(k,j,i) = work_fftx(i,k,j)
1232	ENDDO
1233	ENDDO
1234	ENDDO
1235	!$OMP END PARALLEL
1236
1237	ELSE
1238
1239	!
1240	!-- Cache optimized code (there might be still a potential for better
1241	!-- optimization).
1242	!$OMP PARALLEL PRIVATE (i,j,k)
1243	!$OMP DO
1244	DO j = nys, nyn
1245	DO k = 1, nz
1246
1247	DO i = 0, nx
1248	work_fftx(i,k,j) = work(j,k,i)
1249	ENDDO
1250
1251	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1252
1253	ENDDO
1254	ENDDO
1255
1256	!$OMP DO
1257	DO i = 0, nx
1258	DO j = nys, nyn
1259	DO k = 1, nz
1260	f_out(k,j,i) = work_fftx(i,k,j)
1261	ENDDO
1262	ENDDO
1263	ENDDO
1264	!$OMP END PARALLEL
1265
1266	ENDIF
1267	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1268
1269	END SUBROUTINE tr_yx_fftx
1270
1271
1272	SUBROUTINE ffty_tri_ffty( ar )
1273
1274	!------------------------------------------------------------------------------!
1275	! FFT along y, solution of the tridiagonal system and backward FFT for
1276	! a 1d-decomposition along y
1277	!
1278	! WARNING: this subroutine may still not work for hybrid parallelization
1279	! with OpenMP (for possible necessary changes see the original
1280	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1281	!------------------------------------------------------------------------------!
1282
1283	USE control_parameters
1284	USE cpulog
1285	USE grid_variables
1286	USE indices
1287	USE interfaces
1288	USE pegrid
1289	USE transpose_indices
1290
1291	IMPLICIT NONE
1292
1293	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1294
1295	REAL, DIMENSION(0:ny) :: work_ffty
1296	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1297	REAL, DIMENSION(nny,1:nz,nxl_y:nxr_y,pdims(2)) :: ar
1298	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1299
1300
1301	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1302
1303	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1304
1305	tn = 0 ! Default thread number in case of one thread
1306	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1307	DO i = nxl_y, nxr_y
1308
1309	!$ tn = omp_get_thread_num()
1310
1311	IF ( host(1:3) == 'nec' ) THEN
1312	!
1313	!-- Code optimized for vector processors
1314	DO k = 1, nz
1315
1316	m = 0
1317	DO n = 1, pdims(2)
1318	DO j = 1, nny
1319	work_triy(m,k) = ar(j,k,i,n)
1320	m = m + 1
1321	ENDDO
1322	ENDDO
1323
1324	ENDDO
1325
1326	CALL fft_y_m( work_triy, ny, 'forward' )
1327
1328	ELSE
1329	!
1330	!-- Cache optimized code
1331	DO k = 1, nz
1332
1333	m = 0
1334	DO n = 1, pdims(2)
1335	DO j = 1, nny
1336	work_ffty(m) = ar(j,k,i,n)
1337	m = m + 1
1338	ENDDO
1339	ENDDO
1340
1341	CALL fft_y( work_ffty, 'forward' )
1342
1343	DO j = 0, ny
1344	work_triy(j,k) = work_ffty(j)
1345	ENDDO
1346
1347	ENDDO
1348
1349	ENDIF
1350
1351	!
1352	!-- Solve the linear equation system
1353	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1354
1355	IF ( host(1:3) == 'nec' ) THEN
1356	!
1357	!-- Code optimized for vector processors
1358	CALL fft_y_m( work_triy, ny, 'backward' )
1359
1360	DO k = 1, nz
1361
1362	m = 0
1363	DO n = 1, pdims(2)
1364	DO j = 1, nny
1365	ar(j,k,i,n) = work_triy(m,k)
1366	m = m + 1
1367	ENDDO
1368	ENDDO
1369
1370	ENDDO
1371
1372	ELSE
1373	!
1374	!-- Cache optimized code
1375	DO k = 1, nz
1376
1377	DO j = 0, ny
1378	work_ffty(j) = work_triy(j,k)
1379	ENDDO
1380
1381	CALL fft_y( work_ffty, 'backward' )
1382
1383	m = 0
1384	DO n = 1, pdims(2)
1385	DO j = 1, nny
1386	ar(j,k,i,n) = work_ffty(m)
1387	m = m + 1
1388	ENDDO
1389	ENDDO
1390
1391	ENDDO
1392
1393	ENDIF
1394
1395	ENDDO
1396
1397	DEALLOCATE( tri )
1398
1399	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1400
1401	END SUBROUTINE ffty_tri_ffty
1402
1403
1404	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1405
1406	!------------------------------------------------------------------------------!
1407	! Solves the linear system of equations for a 1d-decomposition along x (see
1408	! tridia)
1409	!
1410	! Attention: when using the intel compilers older than 12.0, array tri must
1411	! be passed as an argument to the contained subroutines. Otherwise
1412	! addres faults will occur. This feature can be activated with
1413	! cpp-switch __intel11
1414	! On NEC, tri should not be passed (except for routine substi_1dd)
1415	! because this causes very bad performance.
1416	!------------------------------------------------------------------------------!
1417
1418	USE arrays_3d
1419	USE control_parameters
1420
1421	USE pegrid
1422
1423	IMPLICIT NONE
1424
1425	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1426
1427	REAL :: ddx2, ddy2
1428
1429	REAL, DIMENSION(0:nx,1:nz) :: ar
1430	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1431	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1432
1433
1434	nnyh = ( ny + 1 ) / 2
1435
1436	!
1437	!-- Define constant elements of the tridiagonal matrix.
1438	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1439	!-- the exchanged loops create bank conflicts. The following directive
1440	!-- prohibits loop exchange and the loops perform much better.
1441	! tn = omp_get_thread_num()
1442	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1443	! CALL local_flush( 120+tn )
1444	!CDIR NOLOOPCHG
1445	DO k = 0, nz-1
1446	DO i = 0,nx
1447	tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
1448	tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
1449	ENDDO
1450	ENDDO
1451	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1452	! CALL local_flush( 120+tn )
1453
1454	IF ( j <= nnyh ) THEN
1455	#if defined( __intel11 )
1456	CALL maketri_1dd( j, tri )
1457	#else
1458	CALL maketri_1dd( j )
1459	#endif
1460	ELSE
1461	#if defined( __intel11 )
1462	CALL maketri_1dd( ny+1-j, tri )
1463	#else
1464	CALL maketri_1dd( ny+1-j )
1465	#endif
1466	ENDIF
1467	#if defined( __intel11 )
1468	CALL split_1dd( tri )
1469	#else
1470	CALL split_1dd
1471	#endif
1472	CALL substi_1dd( ar, tri )
1473
1474	CONTAINS
1475
1476	#if defined( __intel11 )
1477	SUBROUTINE maketri_1dd( j, tri )
1478	#else
1479	SUBROUTINE maketri_1dd( j )
1480	#endif
1481
1482	!------------------------------------------------------------------------------!
1483	! computes the i- and j-dependent component of the matrix
1484	!------------------------------------------------------------------------------!
1485
1486	USE constants
1487
1488	IMPLICIT NONE
1489
1490	INTEGER :: i, j, k, nnxh
1491	REAL :: a, c
1492
1493	REAL, DIMENSION(0:nx) :: l
1494
1495	#if defined( __intel11 )
1496	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1497	#endif
1498
1499
1500	nnxh = ( nx + 1 ) / 2
1501	!
1502	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1503	!-- Fourier space. The coefficients are computed following the method of
1504	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1505	!-- Siano's original version by discretizing the Poisson equation,
1506	!-- before it is Fourier-transformed
1507	DO i = 0, nx
1508	IF ( i >= 0 .AND. i <= nnxh ) THEN
1509	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1510	REAL( nx+1 ) ) ) * ddx2 + &
1511	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1512	REAL( ny+1 ) ) ) * ddy2
1513	ELSE
1514	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1515	REAL( nx+1 ) ) ) * ddx2 + &
1516	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1517	REAL( ny+1 ) ) ) * ddy2
1518	ENDIF
1519	ENDDO
1520
1521	DO k = 0, nz-1
1522	DO i = 0, nx
1523	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
1524	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
1525	tri(1,i,k) = a + c - l(i)
1526	ENDDO
1527	ENDDO
1528	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1529	DO i = 0, nx
1530	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1531	ENDDO
1532	ENDIF
1533	IF ( ibc_p_t == 1 ) THEN
1534	DO i = 0, nx
1535	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1536	ENDDO
1537	ENDIF
1538
1539	END SUBROUTINE maketri_1dd
1540
1541
1542	#if defined( __intel11 )
1543	SUBROUTINE split_1dd( tri )
1544	#else
1545	SUBROUTINE split_1dd
1546	#endif
1547
1548	!------------------------------------------------------------------------------!
1549	! Splitting of the tridiagonal matrix (Thomas algorithm)
1550	!------------------------------------------------------------------------------!
1551
1552	IMPLICIT NONE
1553
1554	INTEGER :: i, k
1555
1556	#if defined( __intel11 )
1557	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1558	#endif
1559
1560
1561	!
1562	!-- Splitting
1563	DO i = 0, nx
1564	tri(4,i,0) = tri(1,i,0)
1565	ENDDO
1566	DO k = 1, nz-1
1567	DO i = 0, nx
1568	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1569	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1570	ENDDO
1571	ENDDO
1572
1573	END SUBROUTINE split_1dd
1574
1575
1576	SUBROUTINE substi_1dd( ar, tri )
1577
1578	!------------------------------------------------------------------------------!
1579	! Substitution (Forward and Backward) (Thomas algorithm)
1580	!------------------------------------------------------------------------------!
1581
1582	IMPLICIT NONE
1583
1584	INTEGER :: i, k
1585
1586	REAL, DIMENSION(0:nx,nz) :: ar
1587	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1588	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1589
1590	!
1591	!-- Forward substitution
1592	DO i = 0, nx
1593	ar1(i,0) = ar(i,1)
1594	ENDDO
1595	DO k = 1, nz-1
1596	DO i = 0, nx
1597	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1598	ENDDO
1599	ENDDO
1600
1601	!
1602	!-- Backward substitution
1603	!-- Note, the add of 1.0E-20 in the denominator is due to avoid divisions
1604	!-- by zero appearing if the pressure bc is set to neumann at the top of
1605	!-- the model domain.
1606	DO i = 0, nx
1607	ar(i,nz) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )
1608	ENDDO
1609	DO k = nz-2, 0, -1
1610	DO i = 0, nx
1611	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1612	/ tri(4,i,k)
1613	ENDDO
1614	ENDDO
1615
1616	!
1617	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
1618	!-- The respective values of ar should be zero at all k-levels if
1619	!-- acceleration of horizontally averaged vertical velocity is zero.
1620	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
1621	IF ( j == 0 ) THEN
1622	DO k = 1, nz
1623	ar(0,k) = 0.0
1624	ENDDO
1625	ENDIF
1626	ENDIF
1627
1628	END SUBROUTINE substi_1dd
1629
1630	END SUBROUTINE tridia_1dd
1631
1632	#endif
1633	#endif
1634	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |