Home

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 809

Last change on this file since 809 was 809, checked in by maronga, 12 years ago
Bugfix: cpp directives .NOT., .AND. replaced by !,&&. Minor bugfixes in mrungui
Property svn:keywords set to `Id`
File size: 46.5 KB

Line
1	MODULE poisfft_mod
2
3	!------------------------------------------------------------------------------!
4	! Current revisions:
5	! -----------------
6	! Bugfix: replaced .AND. and .NOT. with && and ! in the preprocessor directives
7	!
8	! Former revisions:
9	! -----------------
10	! $Id: poisfft.f90 809 2012-01-30 13:32:58Z maronga $
11	!
12	! 807 2012-01-25 11:53:51Z maronga
13	! New cpp directive "__check" implemented which is used by check_namelist_files
14	! (most of the code is unneeded by check_namelist_files).
15	!
16	! 763 2011-10-06 09:32:09Z suehring
17	! Comment added concerning the last change.
18	!
19	! 761 2011-10-05 17:58:52Z suehring
20	! Bugfix: Avoid divisions by zero in case of using a 'neumann' bc for the
21	! pressure at the top of the model domain.
22	!
23	! 696 2011-03-18 07:03:49Z raasch
24	! work_fftx removed from PRIVATE clauses in fftx_tr_xy and tr_yx_fftx
25	!
26	! 683 2011-02-09 14:25:15Z raasch
27	! openMP parallelization for 2d-domain-decomposition
28	!
29	! 667 2010-12-23 12:06:00Z suehring/gryschka
30	! ddzu replaced by ddzu_pres due to changes in zu(0)
31	!
32	! 622 2010-12-10 08:08:13Z raasch
33	! optional barriers included in order to speed up collective operations
34	!
35	! 377 2009-09-04 11:09:00Z raasch
36	! __lcmuk changed to __lc to avoid problems with Intel compiler on sgi-ice
37	!
38	! 164 2008-05-15 08:46:15Z raasch
39	! Arguments removed from transpose routines
40	!
41	! 128 2007-10-26 13:11:14Z raasch
42	! Bugfix: wavenumber calculation for even nx in routines maketri
43	!
44	! 85 2007-05-11 09:35:14Z raasch
45	! Bugfix: work_fft*_vec removed from some PRIVATE-declarations
46	!
47	! 76 2007-03-29 00:58:32Z raasch
48	! Tridiagonal coefficients adjusted for Neumann boundary conditions both at
49	! the bottom and the top.
50	!
51	! RCS Log replace by Id keyword, revision history cleaned up
52	!
53	! Revision 1.24 2006/08/04 15:00:24 raasch
54	! Default setting of the thread number tn in case of not using OpenMP
55	!
56	! Revision 1.23 2006/02/23 12:48:38 raasch
57	! Additional compiler directive in routine tridia_1dd for preventing loop
58	! exchange on NEC-SX6
59	!
60	! Revision 1.20 2004/04/30 12:38:09 raasch
61	! Parts of former poisfft_hybrid moved to this subroutine,
62	! former subroutine changed to a module, renaming of FFT-subroutines and
63	! -module, FFTs completely substituted by calls of fft_x and fft_y,
64	! NAG fft used in the non-parallel case completely removed, l in maketri
65	! is now a 1d-array, variables passed by modules instead of using parameter
66	! lists, enlarged transposition arrays introduced
67	!
68	! Revision 1.1 1997/07/24 11:24:14 raasch
69	! Initial revision
70	!
71	!
72	! Description:
73	! ------------
74	! See below.
75	!------------------------------------------------------------------------------!
76
77	!--------------------------------------------------------------------------!
78	! poisfft !
79	! !
80	! Original version: Stephan Siano (pois3d) !
81	! !
82	! Institute of Meteorology and Climatology, University of Hannover !
83	! Germany !
84	! !
85	! Version as of July 23,1996 !
86	! !
87	! !
88	! Version for parallel computers: Siegfried Raasch !
89	! !
90	! Version as of July 03,1997 !
91	! !
92	! Solves the Poisson equation with a 2D spectral method !
93	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
94	! !
95	! Input: !
96	! real ar contains in the (nnx,nny,nnz) elements, !
97	! starting from the element (1,nys,nxl), the !
98	! values for s !
99	! real work Temporary array !
100	! !
101	! Output: !
102	! real ar contains the solution for p !
103	!--------------------------------------------------------------------------!
104
105	USE fft_xy
106	USE indices
107	USE transpose_indices
108
109	IMPLICIT NONE
110
111	PRIVATE
112
113	#if ! defined ( __check )
114	PUBLIC poisfft, poisfft_init
115
116	INTERFACE poisfft
117	MODULE PROCEDURE poisfft
118	END INTERFACE poisfft
119
120	INTERFACE poisfft_init
121	MODULE PROCEDURE poisfft_init
122	END INTERFACE poisfft_init
123	#else
124	PUBLIC poisfft_init
125
126	INTERFACE poisfft_init
127	MODULE PROCEDURE poisfft_init
128	END INTERFACE poisfft_init
129	#endif
130
131	CONTAINS
132
133	SUBROUTINE poisfft_init
134
135	CALL fft_init
136
137	END SUBROUTINE poisfft_init
138
139	#if ! defined ( __check )
140	SUBROUTINE poisfft( ar, work )
141
142	USE cpulog
143	USE interfaces
144	USE pegrid
145
146	IMPLICIT NONE
147
148	REAL, DIMENSION(1:nza,nys:nyna,nxl:nxra) :: ar, work
149
150
151	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
152
153	!
154	!-- Two-dimensional Fourier Transformation in x- and y-direction.
155	#if defined( __parallel )
156	IF ( pdims(2) == 1 ) THEN
157
158	!
159	!-- 1d-domain-decomposition along x:
160	!-- FFT along y and transposition y --> x
161	CALL ffty_tr_yx( ar, work, ar )
162
163	!
164	!-- FFT along x, solving the tridiagonal system and backward FFT
165	CALL fftx_tri_fftx( ar )
166
167	!
168	!-- Transposition x --> y and backward FFT along y
169	CALL tr_xy_ffty( ar, work, ar )
170
171	ELSEIF ( pdims(1) == 1 ) THEN
172
173	!
174	!-- 1d-domain-decomposition along y:
175	!-- FFT along x and transposition x --> y
176	CALL fftx_tr_xy( ar, work, ar )
177
178	!
179	!-- FFT along y, solving the tridiagonal system and backward FFT
180	CALL ffty_tri_ffty( ar )
181
182	!
183	!-- Transposition y --> x and backward FFT along x
184	CALL tr_yx_fftx( ar, work, ar )
185
186	ELSE
187
188	!
189	!-- 2d-domain-decomposition
190	!-- Transposition z --> x
191	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
192	CALL transpose_zx( ar, work, ar )
193	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
194
195	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
196	CALL fftxp( ar, 'forward' )
197	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
198
199	!
200	!-- Transposition x --> y
201	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
202	CALL transpose_xy( ar, work, ar )
203	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
204
205	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
206	CALL fftyp( ar, 'forward' )
207	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
208
209	!
210	!-- Transposition y --> z
211	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
212	CALL transpose_yz( ar, work, ar )
213	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
214
215	!
216	!-- Solve the Poisson equation in z-direction in cartesian space.
217	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
218	CALL tridia( ar )
219	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
220
221	!
222	!-- Inverse Fourier Transformation
223	!-- Transposition z --> y
224	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
225	CALL transpose_zy( ar, work, ar )
226	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
227
228	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
229	CALL fftyp( ar, 'backward' )
230	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
231
232	!
233	!-- Transposition y --> x
234	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
235	CALL transpose_yx( ar, work, ar )
236	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
237
238	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
239	CALL fftxp( ar, 'backward' )
240	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
241
242	!
243	!-- Transposition x --> z
244	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
245	CALL transpose_xz( ar, work, ar )
246	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
247
248	ENDIF
249
250	#else
251
252	!
253	!-- Two-dimensional Fourier Transformation along x- and y-direction.
254	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
255	CALL fftx( ar, 'forward' )
256	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
257	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
258	CALL ffty( ar, 'forward' )
259	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
260
261	!
262	!-- Solve the Poisson equation in z-direction in cartesian space.
263	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
264	CALL tridia( ar )
265	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
266
267	!
268	!-- Inverse Fourier Transformation.
269	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
270	CALL ffty( ar, 'backward' )
271	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
272	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
273	CALL fftx( ar, 'backward' )
274	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
275
276	#endif
277
278	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
279
280	END SUBROUTINE poisfft
281
282
283
284	SUBROUTINE tridia( ar )
285
286	!------------------------------------------------------------------------------!
287	! solves the linear system of equations:
288	!
289	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
290	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
291	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
292	!
293	! by using the Thomas algorithm
294	!------------------------------------------------------------------------------!
295
296	USE arrays_3d
297
298	IMPLICIT NONE
299
300	INTEGER :: i, j, k, nnyh
301
302	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
303	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
304
305	#if defined( __parallel )
306	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
307	#else
308	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
309	#endif
310
311
312	nnyh = (ny+1) / 2
313
314	!
315	!-- Define constant elements of the tridiagonal matrix.
316	!$OMP PARALLEL PRIVATE ( k, i )
317	!$OMP DO
318	DO k = 0, nz-1
319	DO i = nxl_z, nxr_z
320	tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
321	tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
322	ENDDO
323	ENDDO
324	!$OMP END PARALLEL
325
326	#if defined( __parallel )
327	!
328	!-- Repeat for all y-levels.
329	!$OMP PARALLEL FIRSTPRIVATE( tri ) PRIVATE ( ar1, j )
330	!$OMP DO
331	DO j = nys_z, nyn_z
332	IF ( j <= nnyh ) THEN
333	CALL maketri( tri, j )
334	ELSE
335	CALL maketri( tri, ny+1-j )
336	ENDIF
337	CALL split( tri )
338	CALL substi( ar, ar1, tri, j )
339	ENDDO
340	!$OMP END PARALLEL
341	#else
342	!
343	!-- First y-level.
344	CALL maketri( tri, nys_z )
345	CALL split( tri )
346	CALL substi( ar, ar1, tri, 0 )
347
348	!
349	!-- Further y-levels.
350	DO j = 1, nnyh - 1
351	CALL maketri( tri, j )
352	CALL split( tri )
353	CALL substi( ar, ar1, tri, j )
354	CALL substi( ar, ar1, tri, ny+1-j )
355	ENDDO
356	CALL maketri( tri, nnyh )
357	CALL split( tri )
358	CALL substi( ar, ar1, tri, nnyh+nys )
359	#endif
360
361	CONTAINS
362
363	SUBROUTINE maketri( tri, j )
364
365	!------------------------------------------------------------------------------!
366	! Computes the i- and j-dependent component of the matrix
367	!------------------------------------------------------------------------------!
368
369	USE arrays_3d
370	USE constants
371	USE control_parameters
372	USE grid_variables
373
374	IMPLICIT NONE
375
376	INTEGER :: i, j, k, nnxh
377	REAL :: a, c
378	REAL :: ll(nxl_z:nxr_z)
379	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
380
381
382	nnxh = ( nx + 1 ) / 2
383
384	!
385	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
386	!-- Fourier space. The coefficients are computed following the method of
387	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
388	!-- Siano's original version by discretizing the Poisson equation,
389	!-- before it is Fourier-transformed
390	#if defined( __parallel )
391	DO i = nxl_z, nxr_z
392	IF ( i >= 0 .AND. i <= nnxh ) THEN
393	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
394	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
395	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
396	FLOAT( ny+1 ) ) ) / ( dy * dy )
397	ELSE
398	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
399	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
400	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
401	FLOAT( ny+1 ) ) ) / ( dy * dy )
402	ENDIF
403	DO k = 0,nz-1
404	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
405	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
406	tri(1,i,k) = a + c - ll(i)
407	ENDDO
408	ENDDO
409	#else
410	DO i = 0, nnxh
411	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / FLOAT( nx+1 ) ) ) / &
412	( dx * dx ) + &
413	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / FLOAT( ny+1 ) ) ) / &
414	( dy * dy )
415	DO k = 0, nz-1
416	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
417	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
418	tri(1,i,k) = a + c - ll(i)
419	IF ( i >= 1 .and. i < nnxh ) THEN
420	tri(1,nx+1-i,k) = tri(1,i,k)
421	ENDIF
422	ENDDO
423	ENDDO
424	#endif
425	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
426	DO i = nxl_z, nxr_z
427	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
428	ENDDO
429	ENDIF
430	IF ( ibc_p_t == 1 ) THEN
431	DO i = nxl_z, nxr_z
432	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
433	ENDDO
434	ENDIF
435
436	END SUBROUTINE maketri
437
438
439	SUBROUTINE substi( ar, ar1, tri, j )
440
441	!------------------------------------------------------------------------------!
442	! Substitution (Forward and Backward) (Thomas algorithm)
443	!------------------------------------------------------------------------------!
444
445	USE control_parameters
446
447	IMPLICIT NONE
448
449	INTEGER :: i, j, k
450	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
451	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
452	#if defined( __parallel )
453	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
454	#else
455	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
456	#endif
457
458	!
459	!-- Forward substitution.
460	DO i = nxl_z, nxr_z
461	#if defined( __parallel )
462	ar1(i,0) = ar(i,j,1)
463	#else
464	ar1(i,0) = ar(1,j,i)
465	#endif
466	ENDDO
467	DO k = 1, nz - 1
468	DO i = nxl_z, nxr_z
469	#if defined( __parallel )
470	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
471	#else
472	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
473	#endif
474	ENDDO
475	ENDDO
476
477	!
478	!-- Backward substitution.
479	DO i = nxl_z, nxr_z
480	#if defined( __parallel )
481	ar(i,j,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
482	#else
483	ar(nz,j,i) = ar1(i,nz-1) / tri(4,i,nz-1)
484	#endif
485	ENDDO
486	DO k = nz-2, 0, -1
487	DO i = nxl_z, nxr_z
488	#if defined( __parallel )
489	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
490	/ tri(4,i,k)
491	#else
492	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
493	/ tri(4,i,k)
494	#endif
495	ENDDO
496	ENDDO
497
498	!
499	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
500	!-- The respective values of ar should be zero at all k-levels if
501	!-- acceleration of horizontally averaged vertical velocity is zero.
502	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
503	IF ( j == 0 .AND. nxl_z == 0 ) THEN
504	#if defined( __parallel )
505	DO k = 1, nz
506	ar(nxl_z,j,k) = 0.0
507	ENDDO
508	#else
509	DO k = 1, nz
510	ar(k,j,nxl_z) = 0.0
511	ENDDO
512	#endif
513	ENDIF
514	ENDIF
515
516	END SUBROUTINE substi
517
518
519	SUBROUTINE split( tri )
520
521	!------------------------------------------------------------------------------!
522	! Splitting of the tridiagonal matrix (Thomas algorithm)
523	!------------------------------------------------------------------------------!
524
525	IMPLICIT NONE
526
527	INTEGER :: i, k
528	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
529
530	!
531	!-- Splitting.
532	DO i = nxl_z, nxr_z
533	tri(4,i,0) = tri(1,i,0)
534	ENDDO
535	DO k = 1, nz-1
536	DO i = nxl_z, nxr_z
537	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
538	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
539	ENDDO
540	ENDDO
541
542	END SUBROUTINE split
543
544	END SUBROUTINE tridia
545
546
547	#if defined( __parallel )
548	SUBROUTINE fftxp( ar, direction )
549
550	!------------------------------------------------------------------------------!
551	! Fourier-transformation along x-direction Parallelized version
552	!------------------------------------------------------------------------------!
553
554	IMPLICIT NONE
555
556	CHARACTER (LEN=*) :: direction
557	INTEGER :: j, k
558	REAL :: ar(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa)
559
560	!
561	!-- Performing the fft with one of the methods implemented
562	!$OMP PARALLEL PRIVATE ( j, k )
563	!$OMP DO
564	DO k = nzb_x, nzt_x
565	DO j = nys_x, nyn_x
566	CALL fft_x( ar(0:nx,j,k), direction )
567	ENDDO
568	ENDDO
569	!$OMP END PARALLEL
570
571	END SUBROUTINE fftxp
572
573	#else
574	SUBROUTINE fftx( ar, direction )
575
576	!------------------------------------------------------------------------------!
577	! Fourier-transformation along x-direction Non parallel version
578	!------------------------------------------------------------------------------!
579
580	IMPLICIT NONE
581
582	CHARACTER (LEN=*) :: direction
583	INTEGER :: i, j, k
584	REAL :: ar(1:nz,0:ny,0:nx)
585
586	!
587	!-- Performing the fft with one of the methods implemented
588	!$OMP PARALLEL PRIVATE ( j, k )
589	!$OMP DO
590	DO k = 1, nz
591	DO j = 0, ny
592	CALL fft_x( ar(k,j,0:nx), direction )
593	ENDDO
594	ENDDO
595	!$OMP END PARALLEL
596
597	END SUBROUTINE fftx
598	#endif
599
600
601	#if defined( __parallel )
602	SUBROUTINE fftyp( ar, direction )
603
604	!------------------------------------------------------------------------------!
605	! Fourier-transformation along y-direction Parallelized version
606	!------------------------------------------------------------------------------!
607
608	IMPLICIT NONE
609
610	CHARACTER (LEN=*) :: direction
611	INTEGER :: i, k
612	REAL :: ar(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya)
613
614	!
615	!-- Performing the fft with one of the methods implemented
616	!$OMP PARALLEL PRIVATE ( i, k )
617	!$OMP DO
618	DO k = nzb_y, nzt_y
619	DO i = nxl_y, nxr_y
620	CALL fft_y( ar(0:ny,i,k), direction )
621	ENDDO
622	ENDDO
623	!$OMP END PARALLEL
624
625	END SUBROUTINE fftyp
626
627	#else
628	SUBROUTINE ffty( ar, direction )
629
630	!------------------------------------------------------------------------------!
631	! Fourier-transformation along y-direction Non parallel version
632	!------------------------------------------------------------------------------!
633
634	IMPLICIT NONE
635
636	CHARACTER (LEN=*) :: direction
637	INTEGER :: i, k
638	REAL :: ar(1:nz,0:ny,0:nx)
639
640	!
641	!-- Performing the fft with one of the methods implemented
642	!$OMP PARALLEL PRIVATE ( i, k )
643	!$OMP DO
644	DO k = 1, nz
645	DO i = 0, nx
646	CALL fft_y( ar(k,0:ny,i), direction )
647	ENDDO
648	ENDDO
649	!$OMP END PARALLEL
650
651	END SUBROUTINE ffty
652	#endif
653
654	#if defined( __parallel )
655	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
656
657	!------------------------------------------------------------------------------!
658	! Fourier-transformation along y with subsequent transposition y --> x for
659	! a 1d-decomposition along x
660	!
661	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
662	! if the first index of work_ffty_vec is odd. Otherwise
663	! memory bank conflicts may occur (especially if the index is a
664	! multiple of 128). That's why work_ffty_vec is dimensioned as
665	! 0:ny+1.
666	! Of course, this will not work if users are using an odd number
667	! of gridpoints along y.
668	!------------------------------------------------------------------------------!
669
670	USE control_parameters
671	USE cpulog
672	USE indices
673	USE interfaces
674	USE pegrid
675	USE transpose_indices
676
677	IMPLICIT NONE
678
679	INTEGER :: i, iend, iouter, ir, j, k
680	INTEGER, PARAMETER :: stridex = 4
681
682	REAL, DIMENSION(0:ny,stridex) :: work_ffty
683	#if defined( __nec )
684	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
685	#endif
686	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_in
687	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_out
688	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
689
690	!
691	!-- Carry out the FFT along y, where all data are present due to the
692	!-- 1d-decomposition along x. Resort the data in a way that x becomes
693	!-- the first index.
694	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
695
696	IF ( host(1:3) == 'nec' ) THEN
697	#if defined( __nec )
698	!
699	!-- Code optimized for vector processors
700	!$OMP PARALLEL PRIVATE ( i, j, k )
701	!$OMP DO
702	DO i = nxl, nxr
703
704	DO j = 0, ny
705	DO k = 1, nz
706	work_ffty_vec(j,k,i) = f_in(k,j,i)
707	ENDDO
708	ENDDO
709
710	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
711
712	ENDDO
713
714	!$OMP DO
715	DO k = 1, nz
716	DO j = 0, ny
717	DO i = nxl, nxr
718	work(i,k,j) = work_ffty_vec(j,k,i)
719	ENDDO
720	ENDDO
721	ENDDO
722	!$OMP END PARALLEL
723	#endif
724
725	ELSE
726
727	!
728	!-- Cache optimized code.
729	!-- The i-(x-)direction is split into a strided outer loop and an inner
730	!-- loop for better cache performance
731	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
732	!$OMP DO
733	DO iouter = nxl, nxr, stridex
734
735	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
736
737	DO k = 1, nz
738
739	DO i = iouter, iend
740
741	ir = i-iouter+1 ! counter within a stride
742	DO j = 0, ny
743	work_ffty(j,ir) = f_in(k,j,i)
744	ENDDO
745	!
746	!-- FFT along y
747	CALL fft_y( work_ffty(:,ir), 'forward' )
748
749	ENDDO
750
751	!
752	!-- Resort
753	DO j = 0, ny
754	DO i = iouter, iend
755	work(i,k,j) = work_ffty(j,i-iouter+1)
756	ENDDO
757	ENDDO
758
759	ENDDO
760
761	ENDDO
762	!$OMP END PARALLEL
763
764	ENDIF
765	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
766
767	!
768	!-- Transpose array
769	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
770	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
771	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
772	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
773	comm1dx, ierr )
774	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
775
776	END SUBROUTINE ffty_tr_yx
777
778
779	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
780
781	!------------------------------------------------------------------------------!
782	! Transposition x --> y with a subsequent backward Fourier transformation for
783	! a 1d-decomposition along x
784	!------------------------------------------------------------------------------!
785
786	USE control_parameters
787	USE cpulog
788	USE indices
789	USE interfaces
790	USE pegrid
791	USE transpose_indices
792
793	IMPLICIT NONE
794
795	INTEGER :: i, iend, iouter, ir, j, k
796	INTEGER, PARAMETER :: stridex = 4
797
798	REAL, DIMENSION(0:ny,stridex) :: work_ffty
799	#if defined( __nec )
800	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
801	#endif
802	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_in
803	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_out
804	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
805
806	!
807	!-- Transpose array
808	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
809	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
810	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
811	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
812	comm1dx, ierr )
813	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
814
815	!
816	!-- Resort the data in a way that y becomes the first index and carry out the
817	!-- backward fft along y.
818	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
819
820	IF ( host(1:3) == 'nec' ) THEN
821	#if defined( __nec )
822	!
823	!-- Code optimized for vector processors
824	!$OMP PARALLEL PRIVATE ( i, j, k )
825	!$OMP DO
826	DO k = 1, nz
827	DO j = 0, ny
828	DO i = nxl, nxr
829	work_ffty_vec(j,k,i) = work(i,k,j)
830	ENDDO
831	ENDDO
832	ENDDO
833
834	!$OMP DO
835	DO i = nxl, nxr
836
837	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
838
839	DO j = 0, ny
840	DO k = 1, nz
841	f_out(k,j,i) = work_ffty_vec(j,k,i)
842	ENDDO
843	ENDDO
844
845	ENDDO
846	!$OMP END PARALLEL
847	#endif
848
849	ELSE
850
851	!
852	!-- Cache optimized code.
853	!-- The i-(x-)direction is split into a strided outer loop and an inner
854	!-- loop for better cache performance
855	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
856	!$OMP DO
857	DO iouter = nxl, nxr, stridex
858
859	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
860
861	DO k = 1, nz
862	!
863	!-- Resort
864	DO j = 0, ny
865	DO i = iouter, iend
866	work_ffty(j,i-iouter+1) = work(i,k,j)
867	ENDDO
868	ENDDO
869
870	DO i = iouter, iend
871
872	!
873	!-- FFT along y
874	ir = i-iouter+1 ! counter within a stride
875	CALL fft_y( work_ffty(:,ir), 'backward' )
876
877	DO j = 0, ny
878	f_out(k,j,i) = work_ffty(j,ir)
879	ENDDO
880	ENDDO
881
882	ENDDO
883
884	ENDDO
885	!$OMP END PARALLEL
886
887	ENDIF
888
889	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
890
891	END SUBROUTINE tr_xy_ffty
892
893
894	SUBROUTINE fftx_tri_fftx( ar )
895
896	!------------------------------------------------------------------------------!
897	! FFT along x, solution of the tridiagonal system and backward FFT for
898	! a 1d-decomposition along x
899	!
900	! WARNING: this subroutine may still not work for hybrid parallelization
901	! with OpenMP (for possible necessary changes see the original
902	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
903	!------------------------------------------------------------------------------!
904
905	USE control_parameters
906	USE cpulog
907	USE grid_variables
908	USE indices
909	USE interfaces
910	USE pegrid
911	USE transpose_indices
912
913	IMPLICIT NONE
914
915	character(len=3) :: myth_char
916
917	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
918
919	REAL, DIMENSION(0:nx) :: work_fftx
920	REAL, DIMENSION(0:nx,1:nz) :: work_trix
921	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: ar
922	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
923
924
925	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
926
927	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
928
929	tn = 0 ! Default thread number in case of one thread
930	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
931	DO j = nys_x, nyn_x
932
933	!$ tn = omp_get_thread_num()
934
935	IF ( host(1:3) == 'nec' ) THEN
936	!
937	!-- Code optimized for vector processors
938	DO k = 1, nz
939
940	m = 0
941	DO n = 1, pdims(1)
942	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
943	work_trix(m,k) = ar(i,k,j,n)
944	m = m + 1
945	ENDDO
946	ENDDO
947
948	ENDDO
949
950	CALL fft_x_m( work_trix, 'forward' )
951
952	ELSE
953	!
954	!-- Cache optimized code
955	DO k = 1, nz
956
957	m = 0
958	DO n = 1, pdims(1)
959	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
960	work_fftx(m) = ar(i,k,j,n)
961	m = m + 1
962	ENDDO
963	ENDDO
964
965	CALL fft_x( work_fftx, 'forward' )
966
967	DO i = 0, nx
968	work_trix(i,k) = work_fftx(i)
969	ENDDO
970
971	ENDDO
972
973	ENDIF
974
975	!
976	!-- Solve the linear equation system
977	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
978
979	IF ( host(1:3) == 'nec' ) THEN
980	!
981	!-- Code optimized for vector processors
982	CALL fft_x_m( work_trix, 'backward' )
983
984	DO k = 1, nz
985
986	m = 0
987	DO n = 1, pdims(1)
988	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
989	ar(i,k,j,n) = work_trix(m,k)
990	m = m + 1
991	ENDDO
992	ENDDO
993
994	ENDDO
995
996	ELSE
997	!
998	!-- Cache optimized code
999	DO k = 1, nz
1000
1001	DO i = 0, nx
1002	work_fftx(i) = work_trix(i,k)
1003	ENDDO
1004
1005	CALL fft_x( work_fftx, 'backward' )
1006
1007	m = 0
1008	DO n = 1, pdims(1)
1009	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
1010	ar(i,k,j,n) = work_fftx(m)
1011	m = m + 1
1012	ENDDO
1013	ENDDO
1014
1015	ENDDO
1016
1017	ENDIF
1018
1019	ENDDO
1020
1021	DEALLOCATE( tri )
1022
1023	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
1024
1025	END SUBROUTINE fftx_tri_fftx
1026
1027
1028	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
1029
1030	!------------------------------------------------------------------------------!
1031	! Fourier-transformation along x with subsequent transposition x --> y for
1032	! a 1d-decomposition along y
1033	!
1034	! ATTENTION: The NEC-branch of this routine may significantly profit from
1035	! further optimizations. So far, performance is much worse than
1036	! for routine ffty_tr_yx (more than three times slower).
1037	!------------------------------------------------------------------------------!
1038
1039	USE control_parameters
1040	USE cpulog
1041	USE indices
1042	USE interfaces
1043	USE pegrid
1044	USE transpose_indices
1045
1046	IMPLICIT NONE
1047
1048	INTEGER :: i, j, k
1049
1050	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1051	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_in
1052	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_out
1053	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1054
1055	!
1056	!-- Carry out the FFT along x, where all data are present due to the
1057	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1058	!-- the first index.
1059	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
1060
1061	IF ( host(1:3) == 'nec' ) THEN
1062	!
1063	!-- Code for vector processors
1064	!$OMP PARALLEL PRIVATE ( i, j, k )
1065	!$OMP DO
1066	DO i = 0, nx
1067
1068	DO j = nys, nyn
1069	DO k = 1, nz
1070	work_fftx(i,k,j) = f_in(k,j,i)
1071	ENDDO
1072	ENDDO
1073
1074	ENDDO
1075
1076	!$OMP DO
1077	DO j = nys, nyn
1078
1079	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1080
1081	DO k = 1, nz
1082	DO i = 0, nx
1083	work(j,k,i) = work_fftx(i,k,j)
1084	ENDDO
1085	ENDDO
1086
1087	ENDDO
1088	!$OMP END PARALLEL
1089
1090	ELSE
1091
1092	!
1093	!-- Cache optimized code (there might be still a potential for better
1094	!-- optimization).
1095	!$OMP PARALLEL PRIVATE (i,j,k)
1096	!$OMP DO
1097	DO i = 0, nx
1098
1099	DO j = nys, nyn
1100	DO k = 1, nz
1101	work_fftx(i,k,j) = f_in(k,j,i)
1102	ENDDO
1103	ENDDO
1104
1105	ENDDO
1106
1107	!$OMP DO
1108	DO j = nys, nyn
1109	DO k = 1, nz
1110
1111	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1112
1113	DO i = 0, nx
1114	work(j,k,i) = work_fftx(i,k,j)
1115	ENDDO
1116	ENDDO
1117
1118	ENDDO
1119	!$OMP END PARALLEL
1120
1121	ENDIF
1122	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1123
1124	!
1125	!-- Transpose array
1126	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1127	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1128	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1129	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1130	comm1dy, ierr )
1131	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1132
1133	END SUBROUTINE fftx_tr_xy
1134
1135
1136	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1137
1138	!------------------------------------------------------------------------------!
1139	! Transposition y --> x with a subsequent backward Fourier transformation for
1140	! a 1d-decomposition along x
1141	!------------------------------------------------------------------------------!
1142
1143	USE control_parameters
1144	USE cpulog
1145	USE indices
1146	USE interfaces
1147	USE pegrid
1148	USE transpose_indices
1149
1150	IMPLICIT NONE
1151
1152	INTEGER :: i, j, k
1153
1154	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1155	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_in
1156	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_out
1157	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1158
1159	!
1160	!-- Transpose array
1161	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1162	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1163	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1164	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1165	comm1dy, ierr )
1166	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1167
1168	!
1169	!-- Carry out the FFT along x, where all data are present due to the
1170	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1171	!-- the first index.
1172	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1173
1174	IF ( host(1:3) == 'nec' ) THEN
1175	!
1176	!-- Code optimized for vector processors
1177	!$OMP PARALLEL PRIVATE ( i, j, k )
1178	!$OMP DO
1179	DO j = nys, nyn
1180
1181	DO k = 1, nz
1182	DO i = 0, nx
1183	work_fftx(i,k,j) = work(j,k,i)
1184	ENDDO
1185	ENDDO
1186
1187	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1188
1189	ENDDO
1190
1191	!$OMP DO
1192	DO i = 0, nx
1193	DO j = nys, nyn
1194	DO k = 1, nz
1195	f_out(k,j,i) = work_fftx(i,k,j)
1196	ENDDO
1197	ENDDO
1198	ENDDO
1199	!$OMP END PARALLEL
1200
1201	ELSE
1202
1203	!
1204	!-- Cache optimized code (there might be still a potential for better
1205	!-- optimization).
1206	!$OMP PARALLEL PRIVATE (i,j,k)
1207	!$OMP DO
1208	DO j = nys, nyn
1209	DO k = 1, nz
1210
1211	DO i = 0, nx
1212	work_fftx(i,k,j) = work(j,k,i)
1213	ENDDO
1214
1215	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1216
1217	ENDDO
1218	ENDDO
1219
1220	!$OMP DO
1221	DO i = 0, nx
1222	DO j = nys, nyn
1223	DO k = 1, nz
1224	f_out(k,j,i) = work_fftx(i,k,j)
1225	ENDDO
1226	ENDDO
1227	ENDDO
1228	!$OMP END PARALLEL
1229
1230	ENDIF
1231	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1232
1233	END SUBROUTINE tr_yx_fftx
1234
1235
1236	SUBROUTINE ffty_tri_ffty( ar )
1237
1238	!------------------------------------------------------------------------------!
1239	! FFT along y, solution of the tridiagonal system and backward FFT for
1240	! a 1d-decomposition along y
1241	!
1242	! WARNING: this subroutine may still not work for hybrid parallelization
1243	! with OpenMP (for possible necessary changes see the original
1244	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1245	!------------------------------------------------------------------------------!
1246
1247	USE control_parameters
1248	USE cpulog
1249	USE grid_variables
1250	USE indices
1251	USE interfaces
1252	USE pegrid
1253	USE transpose_indices
1254
1255	IMPLICIT NONE
1256
1257	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1258
1259	REAL, DIMENSION(0:ny) :: work_ffty
1260	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1261	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: ar
1262	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1263
1264
1265	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1266
1267	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1268
1269	tn = 0 ! Default thread number in case of one thread
1270	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1271	DO i = nxl_y, nxr_y
1272
1273	!$ tn = omp_get_thread_num()
1274
1275	IF ( host(1:3) == 'nec' ) THEN
1276	!
1277	!-- Code optimized for vector processors
1278	DO k = 1, nz
1279
1280	m = 0
1281	DO n = 1, pdims(2)
1282	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1283	work_triy(m,k) = ar(j,k,i,n)
1284	m = m + 1
1285	ENDDO
1286	ENDDO
1287
1288	ENDDO
1289
1290	CALL fft_y_m( work_triy, ny, 'forward' )
1291
1292	ELSE
1293	!
1294	!-- Cache optimized code
1295	DO k = 1, nz
1296
1297	m = 0
1298	DO n = 1, pdims(2)
1299	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1300	work_ffty(m) = ar(j,k,i,n)
1301	m = m + 1
1302	ENDDO
1303	ENDDO
1304
1305	CALL fft_y( work_ffty, 'forward' )
1306
1307	DO j = 0, ny
1308	work_triy(j,k) = work_ffty(j)
1309	ENDDO
1310
1311	ENDDO
1312
1313	ENDIF
1314
1315	!
1316	!-- Solve the linear equation system
1317	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1318
1319	IF ( host(1:3) == 'nec' ) THEN
1320	!
1321	!-- Code optimized for vector processors
1322	CALL fft_y_m( work_triy, ny, 'backward' )
1323
1324	DO k = 1, nz
1325
1326	m = 0
1327	DO n = 1, pdims(2)
1328	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1329	ar(j,k,i,n) = work_triy(m,k)
1330	m = m + 1
1331	ENDDO
1332	ENDDO
1333
1334	ENDDO
1335
1336	ELSE
1337	!
1338	!-- Cache optimized code
1339	DO k = 1, nz
1340
1341	DO j = 0, ny
1342	work_ffty(j) = work_triy(j,k)
1343	ENDDO
1344
1345	CALL fft_y( work_ffty, 'backward' )
1346
1347	m = 0
1348	DO n = 1, pdims(2)
1349	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1350	ar(j,k,i,n) = work_ffty(m)
1351	m = m + 1
1352	ENDDO
1353	ENDDO
1354
1355	ENDDO
1356
1357	ENDIF
1358
1359	ENDDO
1360
1361	DEALLOCATE( tri )
1362
1363	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1364
1365	END SUBROUTINE ffty_tri_ffty
1366
1367
1368	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1369
1370	!------------------------------------------------------------------------------!
1371	! Solves the linear system of equations for a 1d-decomposition along x (see
1372	! tridia)
1373	!
1374	! Attention: when using the intel compiler, array tri must be passed as an
1375	! argument to the contained subroutines. Otherwise addres faults
1376	! will occur.
1377	! On NEC, tri should not be passed (except for routine substi_1dd)
1378	! because this causes very bad performance.
1379	!------------------------------------------------------------------------------!
1380
1381	USE arrays_3d
1382	USE control_parameters
1383
1384	USE pegrid
1385
1386	IMPLICIT NONE
1387
1388	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1389
1390	REAL :: ddx2, ddy2
1391
1392	REAL, DIMENSION(0:nx,1:nz) :: ar
1393	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1394	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1395
1396
1397	nnyh = ( ny + 1 ) / 2
1398
1399	!
1400	!-- Define constant elements of the tridiagonal matrix.
1401	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1402	!-- the exchanged loops create bank conflicts. The following directive
1403	!-- prohibits loop exchange and the loops perform much better.
1404	! tn = omp_get_thread_num()
1405	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1406	! CALL local_flush( 120+tn )
1407	!CDIR NOLOOPCHG
1408	DO k = 0, nz-1
1409	DO i = 0,nx
1410	tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
1411	tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
1412	ENDDO
1413	ENDDO
1414	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1415	! CALL local_flush( 120+tn )
1416
1417	IF ( j <= nnyh ) THEN
1418	#if defined( __lc )
1419	CALL maketri_1dd( j, tri )
1420	#else
1421	CALL maketri_1dd( j )
1422	#endif
1423	ELSE
1424	#if defined( __lc )
1425	CALL maketri_1dd( ny+1-j, tri )
1426	#else
1427	CALL maketri_1dd( ny+1-j )
1428	#endif
1429	ENDIF
1430	#if defined( __lc )
1431	CALL split_1dd( tri )
1432	#else
1433	CALL split_1dd
1434	#endif
1435	CALL substi_1dd( ar, tri )
1436
1437	CONTAINS
1438
1439	#if defined( __lc )
1440	SUBROUTINE maketri_1dd( j, tri )
1441	#else
1442	SUBROUTINE maketri_1dd( j )
1443	#endif
1444
1445	!------------------------------------------------------------------------------!
1446	! computes the i- and j-dependent component of the matrix
1447	!------------------------------------------------------------------------------!
1448
1449	USE constants
1450
1451	IMPLICIT NONE
1452
1453	INTEGER :: i, j, k, nnxh
1454	REAL :: a, c
1455
1456	REAL, DIMENSION(0:nx) :: l
1457
1458	#if defined( __lc )
1459	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1460	#endif
1461
1462
1463	nnxh = ( nx + 1 ) / 2
1464	!
1465	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1466	!-- Fourier space. The coefficients are computed following the method of
1467	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1468	!-- Siano's original version by discretizing the Poisson equation,
1469	!-- before it is Fourier-transformed
1470	DO i = 0, nx
1471	IF ( i >= 0 .AND. i <= nnxh ) THEN
1472	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1473	FLOAT( nx+1 ) ) ) * ddx2 + &
1474	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1475	FLOAT( ny+1 ) ) ) * ddy2
1476	ELSE
1477	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1478	FLOAT( nx+1 ) ) ) * ddx2 + &
1479	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1480	FLOAT( ny+1 ) ) ) * ddy2
1481	ENDIF
1482	ENDDO
1483
1484	DO k = 0, nz-1
1485	DO i = 0, nx
1486	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
1487	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
1488	tri(1,i,k) = a + c - l(i)
1489	ENDDO
1490	ENDDO
1491	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1492	DO i = 0, nx
1493	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1494	ENDDO
1495	ENDIF
1496	IF ( ibc_p_t == 1 ) THEN
1497	DO i = 0, nx
1498	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1499	ENDDO
1500	ENDIF
1501
1502	END SUBROUTINE maketri_1dd
1503
1504
1505	#if defined( __lc )
1506	SUBROUTINE split_1dd( tri )
1507	#else
1508	SUBROUTINE split_1dd
1509	#endif
1510
1511	!------------------------------------------------------------------------------!
1512	! Splitting of the tridiagonal matrix (Thomas algorithm)
1513	!------------------------------------------------------------------------------!
1514
1515	IMPLICIT NONE
1516
1517	INTEGER :: i, k
1518
1519	#if defined( __lc )
1520	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1521	#endif
1522
1523
1524	!
1525	!-- Splitting
1526	DO i = 0, nx
1527	tri(4,i,0) = tri(1,i,0)
1528	ENDDO
1529	DO k = 1, nz-1
1530	DO i = 0, nx
1531	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1532	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1533	ENDDO
1534	ENDDO
1535
1536	END SUBROUTINE split_1dd
1537
1538
1539	SUBROUTINE substi_1dd( ar, tri )
1540
1541	!------------------------------------------------------------------------------!
1542	! Substitution (Forward and Backward) (Thomas algorithm)
1543	!------------------------------------------------------------------------------!
1544
1545	IMPLICIT NONE
1546
1547	INTEGER :: i, k
1548
1549	REAL, DIMENSION(0:nx,nz) :: ar
1550	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1551	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1552
1553	!
1554	!-- Forward substitution
1555	DO i = 0, nx
1556	ar1(i,0) = ar(i,1)
1557	ENDDO
1558	DO k = 1, nz-1
1559	DO i = 0, nx
1560	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1561	ENDDO
1562	ENDDO
1563
1564	!
1565	!-- Backward substitution
1566	!-- Note, the add of 1.0E-20 in the denominator is due to avoid divisions
1567	!-- by zero appearing if the pressure bc is set to neumann at the top of
1568	!-- the model domain.
1569	DO i = 0, nx
1570	ar(i,nz) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )
1571	ENDDO
1572	DO k = nz-2, 0, -1
1573	DO i = 0, nx
1574	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1575	/ tri(4,i,k)
1576	ENDDO
1577	ENDDO
1578
1579	!
1580	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
1581	!-- The respective values of ar should be zero at all k-levels if
1582	!-- acceleration of horizontally averaged vertical velocity is zero.
1583	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
1584	IF ( j == 0 ) THEN
1585	DO k = 1, nz
1586	ar(0,k) = 0.0
1587	ENDDO
1588	ENDIF
1589	ENDIF
1590
1591	END SUBROUTINE substi_1dd
1592
1593	END SUBROUTINE tridia_1dd
1594
1595	#endif
1596	#endif
1597	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |