Home

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 750

Last change on this file since 750 was 697, checked in by raasch, 14 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 45.6 KB

Line
1	MODULE poisfft_mod
2
3	!------------------------------------------------------------------------------!
4	! Current revisions:
5	! -----------------
6	!
7	!
8	! Former revisions:
9	! -----------------
10	! $Id: poisfft.f90 697 2011-03-18 07:10:52Z suehring $
11	!
12	! 696 2011-03-18 07:03:49Z raasch
13	! work_fftx removed from PRIVATE clauses in fftx_tr_xy and tr_yx_fftx
14	!
15	! 683 2011-02-09 14:25:15Z raasch
16	! openMP parallelization for 2d-domain-decomposition
17	!
18	! 667 2010-12-23 12:06:00Z suehring/gryschka
19	! ddzu replaced by ddzu_pres due to changes in zu(0)
20	!
21	! 622 2010-12-10 08:08:13Z raasch
22	! optional barriers included in order to speed up collective operations
23	!
24	! 377 2009-09-04 11:09:00Z raasch
25	! __lcmuk changed to __lc to avoid problems with Intel compiler on sgi-ice
26	!
27	! 164 2008-05-15 08:46:15Z raasch
28	! Arguments removed from transpose routines
29	!
30	! 128 2007-10-26 13:11:14Z raasch
31	! Bugfix: wavenumber calculation for even nx in routines maketri
32	!
33	! 85 2007-05-11 09:35:14Z raasch
34	! Bugfix: work_fft*_vec removed from some PRIVATE-declarations
35	!
36	! 76 2007-03-29 00:58:32Z raasch
37	! Tridiagonal coefficients adjusted for Neumann boundary conditions both at
38	! the bottom and the top.
39	!
40	! RCS Log replace by Id keyword, revision history cleaned up
41	!
42	! Revision 1.24 2006/08/04 15:00:24 raasch
43	! Default setting of the thread number tn in case of not using OpenMP
44	!
45	! Revision 1.23 2006/02/23 12:48:38 raasch
46	! Additional compiler directive in routine tridia_1dd for preventing loop
47	! exchange on NEC-SX6
48	!
49	! Revision 1.20 2004/04/30 12:38:09 raasch
50	! Parts of former poisfft_hybrid moved to this subroutine,
51	! former subroutine changed to a module, renaming of FFT-subroutines and
52	! -module, FFTs completely substituted by calls of fft_x and fft_y,
53	! NAG fft used in the non-parallel case completely removed, l in maketri
54	! is now a 1d-array, variables passed by modules instead of using parameter
55	! lists, enlarged transposition arrays introduced
56	!
57	! Revision 1.1 1997/07/24 11:24:14 raasch
58	! Initial revision
59	!
60	!
61	! Description:
62	! ------------
63	! See below.
64	!------------------------------------------------------------------------------!
65
66	!--------------------------------------------------------------------------!
67	! poisfft !
68	! !
69	! Original version: Stephan Siano (pois3d) !
70	! !
71	! Institute of Meteorology and Climatology, University of Hannover !
72	! Germany !
73	! !
74	! Version as of July 23,1996 !
75	! !
76	! !
77	! Version for parallel computers: Siegfried Raasch !
78	! !
79	! Version as of July 03,1997 !
80	! !
81	! Solves the Poisson equation with a 2D spectral method !
82	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
83	! !
84	! Input: !
85	! real ar contains in the (nnx,nny,nnz) elements, !
86	! starting from the element (1,nys,nxl), the !
87	! values for s !
88	! real work Temporary array !
89	! !
90	! Output: !
91	! real ar contains the solution for p !
92	!--------------------------------------------------------------------------!
93
94	USE fft_xy
95	USE indices
96	USE transpose_indices
97
98	IMPLICIT NONE
99
100	PRIVATE
101	PUBLIC poisfft, poisfft_init
102
103	INTERFACE poisfft
104	MODULE PROCEDURE poisfft
105	END INTERFACE poisfft
106
107	INTERFACE poisfft_init
108	MODULE PROCEDURE poisfft_init
109	END INTERFACE poisfft_init
110
111	CONTAINS
112
113	SUBROUTINE poisfft_init
114
115	CALL fft_init
116
117	END SUBROUTINE poisfft_init
118
119
120	SUBROUTINE poisfft( ar, work )
121
122	USE cpulog
123	USE interfaces
124	USE pegrid
125
126	IMPLICIT NONE
127
128	REAL, DIMENSION(1:nza,nys:nyna,nxl:nxra) :: ar, work
129
130
131	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
132
133	!
134	!-- Two-dimensional Fourier Transformation in x- and y-direction.
135	#if defined( __parallel )
136	IF ( pdims(2) == 1 ) THEN
137
138	!
139	!-- 1d-domain-decomposition along x:
140	!-- FFT along y and transposition y --> x
141	CALL ffty_tr_yx( ar, work, ar )
142
143	!
144	!-- FFT along x, solving the tridiagonal system and backward FFT
145	CALL fftx_tri_fftx( ar )
146
147	!
148	!-- Transposition x --> y and backward FFT along y
149	CALL tr_xy_ffty( ar, work, ar )
150
151	ELSEIF ( pdims(1) == 1 ) THEN
152
153	!
154	!-- 1d-domain-decomposition along y:
155	!-- FFT along x and transposition x --> y
156	CALL fftx_tr_xy( ar, work, ar )
157
158	!
159	!-- FFT along y, solving the tridiagonal system and backward FFT
160	CALL ffty_tri_ffty( ar )
161
162	!
163	!-- Transposition y --> x and backward FFT along x
164	CALL tr_yx_fftx( ar, work, ar )
165
166	ELSE
167
168	!
169	!-- 2d-domain-decomposition
170	!-- Transposition z --> x
171	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
172	CALL transpose_zx( ar, work, ar )
173	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
174
175	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
176	CALL fftxp( ar, 'forward' )
177	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
178
179	!
180	!-- Transposition x --> y
181	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
182	CALL transpose_xy( ar, work, ar )
183	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
184
185	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
186	CALL fftyp( ar, 'forward' )
187	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
188
189	!
190	!-- Transposition y --> z
191	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
192	CALL transpose_yz( ar, work, ar )
193	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
194
195	!
196	!-- Solve the Poisson equation in z-direction in cartesian space.
197	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
198	CALL tridia( ar )
199	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
200
201	!
202	!-- Inverse Fourier Transformation
203	!-- Transposition z --> y
204	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
205	CALL transpose_zy( ar, work, ar )
206	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
207
208	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
209	CALL fftyp( ar, 'backward' )
210	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
211
212	!
213	!-- Transposition y --> x
214	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
215	CALL transpose_yx( ar, work, ar )
216	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
217
218	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
219	CALL fftxp( ar, 'backward' )
220	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
221
222	!
223	!-- Transposition x --> z
224	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
225	CALL transpose_xz( ar, work, ar )
226	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
227
228	ENDIF
229
230	#else
231
232	!
233	!-- Two-dimensional Fourier Transformation along x- and y-direction.
234	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
235	CALL fftx( ar, 'forward' )
236	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
237	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
238	CALL ffty( ar, 'forward' )
239	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
240
241	!
242	!-- Solve the Poisson equation in z-direction in cartesian space.
243	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
244	CALL tridia( ar )
245	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
246
247	!
248	!-- Inverse Fourier Transformation.
249	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
250	CALL ffty( ar, 'backward' )
251	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
252	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
253	CALL fftx( ar, 'backward' )
254	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
255
256	#endif
257
258	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
259
260	END SUBROUTINE poisfft
261
262
263
264	SUBROUTINE tridia( ar )
265
266	!------------------------------------------------------------------------------!
267	! solves the linear system of equations:
268	!
269	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
270	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
271	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
272	!
273	! by using the Thomas algorithm
274	!------------------------------------------------------------------------------!
275
276	USE arrays_3d
277
278	IMPLICIT NONE
279
280	INTEGER :: i, j, k, nnyh
281
282	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
283	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
284
285	#if defined( __parallel )
286	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
287	#else
288	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
289	#endif
290
291
292	nnyh = (ny+1) / 2
293
294	!
295	!-- Define constant elements of the tridiagonal matrix.
296	!$OMP PARALLEL PRIVATE ( k, i )
297	!$OMP DO
298	DO k = 0, nz-1
299	DO i = nxl_z, nxr_z
300	tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
301	tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
302	ENDDO
303	ENDDO
304	!$OMP END PARALLEL
305
306	#if defined( __parallel )
307	!
308	!-- Repeat for all y-levels.
309	!$OMP PARALLEL FIRSTPRIVATE( tri ) PRIVATE ( ar1, j )
310	!$OMP DO
311	DO j = nys_z, nyn_z
312	IF ( j <= nnyh ) THEN
313	CALL maketri( tri, j )
314	ELSE
315	CALL maketri( tri, ny+1-j )
316	ENDIF
317	CALL split( tri )
318	CALL substi( ar, ar1, tri, j )
319	ENDDO
320	!$OMP END PARALLEL
321	#else
322	!
323	!-- First y-level.
324	CALL maketri( tri, nys_z )
325	CALL split( tri )
326	CALL substi( ar, ar1, tri, 0 )
327
328	!
329	!-- Further y-levels.
330	DO j = 1, nnyh - 1
331	CALL maketri( tri, j )
332	CALL split( tri )
333	CALL substi( ar, ar1, tri, j )
334	CALL substi( ar, ar1, tri, ny+1-j )
335	ENDDO
336	CALL maketri( tri, nnyh )
337	CALL split( tri )
338	CALL substi( ar, ar1, tri, nnyh+nys )
339	#endif
340
341	CONTAINS
342
343	SUBROUTINE maketri( tri, j )
344
345	!------------------------------------------------------------------------------!
346	! Computes the i- and j-dependent component of the matrix
347	!------------------------------------------------------------------------------!
348
349	USE arrays_3d
350	USE constants
351	USE control_parameters
352	USE grid_variables
353
354	IMPLICIT NONE
355
356	INTEGER :: i, j, k, nnxh
357	REAL :: a, c
358	REAL :: ll(nxl_z:nxr_z)
359	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
360
361
362	nnxh = ( nx + 1 ) / 2
363
364	!
365	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
366	!-- Fourier space. The coefficients are computed following the method of
367	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
368	!-- Siano's original version by discretizing the Poisson equation,
369	!-- before it is Fourier-transformed
370	#if defined( __parallel )
371	DO i = nxl_z, nxr_z
372	IF ( i >= 0 .AND. i <= nnxh ) THEN
373	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
374	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
375	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
376	FLOAT( ny+1 ) ) ) / ( dy * dy )
377	ELSE
378	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
379	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
380	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
381	FLOAT( ny+1 ) ) ) / ( dy * dy )
382	ENDIF
383	DO k = 0,nz-1
384	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
385	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
386	tri(1,i,k) = a + c - ll(i)
387	ENDDO
388	ENDDO
389	#else
390	DO i = 0, nnxh
391	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / FLOAT( nx+1 ) ) ) / &
392	( dx * dx ) + &
393	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / FLOAT( ny+1 ) ) ) / &
394	( dy * dy )
395	DO k = 0, nz-1
396	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
397	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
398	tri(1,i,k) = a + c - ll(i)
399	IF ( i >= 1 .and. i < nnxh ) THEN
400	tri(1,nx+1-i,k) = tri(1,i,k)
401	ENDIF
402	ENDDO
403	ENDDO
404	#endif
405	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
406	DO i = nxl_z, nxr_z
407	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
408	ENDDO
409	ENDIF
410	IF ( ibc_p_t == 1 ) THEN
411	DO i = nxl_z, nxr_z
412	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
413	ENDDO
414	ENDIF
415
416	END SUBROUTINE maketri
417
418
419	SUBROUTINE substi( ar, ar1, tri, j )
420
421	!------------------------------------------------------------------------------!
422	! Substitution (Forward and Backward) (Thomas algorithm)
423	!------------------------------------------------------------------------------!
424
425	USE control_parameters
426
427	IMPLICIT NONE
428
429	INTEGER :: i, j, k
430	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
431	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
432	#if defined( __parallel )
433	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
434	#else
435	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
436	#endif
437
438	!
439	!-- Forward substitution.
440	DO i = nxl_z, nxr_z
441	#if defined( __parallel )
442	ar1(i,0) = ar(i,j,1)
443	#else
444	ar1(i,0) = ar(1,j,i)
445	#endif
446	ENDDO
447	DO k = 1, nz - 1
448	DO i = nxl_z, nxr_z
449	#if defined( __parallel )
450	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
451	#else
452	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
453	#endif
454	ENDDO
455	ENDDO
456
457	!
458	!-- Backward substitution.
459	DO i = nxl_z, nxr_z
460	#if defined( __parallel )
461	ar(i,j,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
462	#else
463	ar(nz,j,i) = ar1(i,nz-1) / tri(4,i,nz-1)
464	#endif
465	ENDDO
466	DO k = nz-2, 0, -1
467	DO i = nxl_z, nxr_z
468	#if defined( __parallel )
469	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
470	/ tri(4,i,k)
471	#else
472	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
473	/ tri(4,i,k)
474	#endif
475	ENDDO
476	ENDDO
477
478	!
479	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
480	!-- The respective values of ar should be zero at all k-levels if
481	!-- acceleration of horizontally averaged vertical velocity is zero.
482	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
483	IF ( j == 0 .AND. nxl_z == 0 ) THEN
484	#if defined( __parallel )
485	DO k = 1, nz
486	ar(nxl_z,j,k) = 0.0
487	ENDDO
488	#else
489	DO k = 1, nz
490	ar(k,j,nxl_z) = 0.0
491	ENDDO
492	#endif
493	ENDIF
494	ENDIF
495
496	END SUBROUTINE substi
497
498
499	SUBROUTINE split( tri )
500
501	!------------------------------------------------------------------------------!
502	! Splitting of the tridiagonal matrix (Thomas algorithm)
503	!------------------------------------------------------------------------------!
504
505	IMPLICIT NONE
506
507	INTEGER :: i, k
508	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
509
510	!
511	!-- Splitting.
512	DO i = nxl_z, nxr_z
513	tri(4,i,0) = tri(1,i,0)
514	ENDDO
515	DO k = 1, nz-1
516	DO i = nxl_z, nxr_z
517	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
518	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
519	ENDDO
520	ENDDO
521
522	END SUBROUTINE split
523
524	END SUBROUTINE tridia
525
526
527	#if defined( __parallel )
528	SUBROUTINE fftxp( ar, direction )
529
530	!------------------------------------------------------------------------------!
531	! Fourier-transformation along x-direction Parallelized version
532	!------------------------------------------------------------------------------!
533
534	IMPLICIT NONE
535
536	CHARACTER (LEN=*) :: direction
537	INTEGER :: j, k
538	REAL :: ar(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa)
539
540	!
541	!-- Performing the fft with one of the methods implemented
542	!$OMP PARALLEL PRIVATE ( j, k )
543	!$OMP DO
544	DO k = nzb_x, nzt_x
545	DO j = nys_x, nyn_x
546	CALL fft_x( ar(0:nx,j,k), direction )
547	ENDDO
548	ENDDO
549	!$OMP END PARALLEL
550
551	END SUBROUTINE fftxp
552
553	#else
554	SUBROUTINE fftx( ar, direction )
555
556	!------------------------------------------------------------------------------!
557	! Fourier-transformation along x-direction Non parallel version
558	!------------------------------------------------------------------------------!
559
560	IMPLICIT NONE
561
562	CHARACTER (LEN=*) :: direction
563	INTEGER :: i, j, k
564	REAL :: ar(1:nz,0:ny,0:nx)
565
566	!
567	!-- Performing the fft with one of the methods implemented
568	!$OMP PARALLEL PRIVATE ( j, k )
569	!$OMP DO
570	DO k = 1, nz
571	DO j = 0, ny
572	CALL fft_x( ar(k,j,0:nx), direction )
573	ENDDO
574	ENDDO
575	!$OMP END PARALLEL
576
577	END SUBROUTINE fftx
578	#endif
579
580
581	#if defined( __parallel )
582	SUBROUTINE fftyp( ar, direction )
583
584	!------------------------------------------------------------------------------!
585	! Fourier-transformation along y-direction Parallelized version
586	!------------------------------------------------------------------------------!
587
588	IMPLICIT NONE
589
590	CHARACTER (LEN=*) :: direction
591	INTEGER :: i, k
592	REAL :: ar(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya)
593
594	!
595	!-- Performing the fft with one of the methods implemented
596	!$OMP PARALLEL PRIVATE ( i, k )
597	!$OMP DO
598	DO k = nzb_y, nzt_y
599	DO i = nxl_y, nxr_y
600	CALL fft_y( ar(0:ny,i,k), direction )
601	ENDDO
602	ENDDO
603	!$OMP END PARALLEL
604
605	END SUBROUTINE fftyp
606
607	#else
608	SUBROUTINE ffty( ar, direction )
609
610	!------------------------------------------------------------------------------!
611	! Fourier-transformation along y-direction Non parallel version
612	!------------------------------------------------------------------------------!
613
614	IMPLICIT NONE
615
616	CHARACTER (LEN=*) :: direction
617	INTEGER :: i, k
618	REAL :: ar(1:nz,0:ny,0:nx)
619
620	!
621	!-- Performing the fft with one of the methods implemented
622	!$OMP PARALLEL PRIVATE ( i, k )
623	!$OMP DO
624	DO k = 1, nz
625	DO i = 0, nx
626	CALL fft_y( ar(k,0:ny,i), direction )
627	ENDDO
628	ENDDO
629	!$OMP END PARALLEL
630
631	END SUBROUTINE ffty
632	#endif
633
634	#if defined( __parallel )
635	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
636
637	!------------------------------------------------------------------------------!
638	! Fourier-transformation along y with subsequent transposition y --> x for
639	! a 1d-decomposition along x
640	!
641	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
642	! if the first index of work_ffty_vec is odd. Otherwise
643	! memory bank conflicts may occur (especially if the index is a
644	! multiple of 128). That's why work_ffty_vec is dimensioned as
645	! 0:ny+1.
646	! Of course, this will not work if users are using an odd number
647	! of gridpoints along y.
648	!------------------------------------------------------------------------------!
649
650	USE control_parameters
651	USE cpulog
652	USE indices
653	USE interfaces
654	USE pegrid
655	USE transpose_indices
656
657	IMPLICIT NONE
658
659	INTEGER :: i, iend, iouter, ir, j, k
660	INTEGER, PARAMETER :: stridex = 4
661
662	REAL, DIMENSION(0:ny,stridex) :: work_ffty
663	#if defined( __nec )
664	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
665	#endif
666	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_in
667	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_out
668	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
669
670	!
671	!-- Carry out the FFT along y, where all data are present due to the
672	!-- 1d-decomposition along x. Resort the data in a way that x becomes
673	!-- the first index.
674	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
675
676	IF ( host(1:3) == 'nec' ) THEN
677	#if defined( __nec )
678	!
679	!-- Code optimized for vector processors
680	!$OMP PARALLEL PRIVATE ( i, j, k )
681	!$OMP DO
682	DO i = nxl, nxr
683
684	DO j = 0, ny
685	DO k = 1, nz
686	work_ffty_vec(j,k,i) = f_in(k,j,i)
687	ENDDO
688	ENDDO
689
690	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
691
692	ENDDO
693
694	!$OMP DO
695	DO k = 1, nz
696	DO j = 0, ny
697	DO i = nxl, nxr
698	work(i,k,j) = work_ffty_vec(j,k,i)
699	ENDDO
700	ENDDO
701	ENDDO
702	!$OMP END PARALLEL
703	#endif
704
705	ELSE
706
707	!
708	!-- Cache optimized code.
709	!-- The i-(x-)direction is split into a strided outer loop and an inner
710	!-- loop for better cache performance
711	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
712	!$OMP DO
713	DO iouter = nxl, nxr, stridex
714
715	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
716
717	DO k = 1, nz
718
719	DO i = iouter, iend
720
721	ir = i-iouter+1 ! counter within a stride
722	DO j = 0, ny
723	work_ffty(j,ir) = f_in(k,j,i)
724	ENDDO
725	!
726	!-- FFT along y
727	CALL fft_y( work_ffty(:,ir), 'forward' )
728
729	ENDDO
730
731	!
732	!-- Resort
733	DO j = 0, ny
734	DO i = iouter, iend
735	work(i,k,j) = work_ffty(j,i-iouter+1)
736	ENDDO
737	ENDDO
738
739	ENDDO
740
741	ENDDO
742	!$OMP END PARALLEL
743
744	ENDIF
745	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
746
747	!
748	!-- Transpose array
749	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
750	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
751	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
752	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
753	comm1dx, ierr )
754	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
755
756	END SUBROUTINE ffty_tr_yx
757
758
759	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
760
761	!------------------------------------------------------------------------------!
762	! Transposition x --> y with a subsequent backward Fourier transformation for
763	! a 1d-decomposition along x
764	!------------------------------------------------------------------------------!
765
766	USE control_parameters
767	USE cpulog
768	USE indices
769	USE interfaces
770	USE pegrid
771	USE transpose_indices
772
773	IMPLICIT NONE
774
775	INTEGER :: i, iend, iouter, ir, j, k
776	INTEGER, PARAMETER :: stridex = 4
777
778	REAL, DIMENSION(0:ny,stridex) :: work_ffty
779	#if defined( __nec )
780	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
781	#endif
782	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_in
783	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_out
784	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
785
786	!
787	!-- Transpose array
788	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
789	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
790	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
791	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
792	comm1dx, ierr )
793	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
794
795	!
796	!-- Resort the data in a way that y becomes the first index and carry out the
797	!-- backward fft along y.
798	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
799
800	IF ( host(1:3) == 'nec' ) THEN
801	#if defined( __nec )
802	!
803	!-- Code optimized for vector processors
804	!$OMP PARALLEL PRIVATE ( i, j, k )
805	!$OMP DO
806	DO k = 1, nz
807	DO j = 0, ny
808	DO i = nxl, nxr
809	work_ffty_vec(j,k,i) = work(i,k,j)
810	ENDDO
811	ENDDO
812	ENDDO
813
814	!$OMP DO
815	DO i = nxl, nxr
816
817	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
818
819	DO j = 0, ny
820	DO k = 1, nz
821	f_out(k,j,i) = work_ffty_vec(j,k,i)
822	ENDDO
823	ENDDO
824
825	ENDDO
826	!$OMP END PARALLEL
827	#endif
828
829	ELSE
830
831	!
832	!-- Cache optimized code.
833	!-- The i-(x-)direction is split into a strided outer loop and an inner
834	!-- loop for better cache performance
835	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
836	!$OMP DO
837	DO iouter = nxl, nxr, stridex
838
839	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
840
841	DO k = 1, nz
842	!
843	!-- Resort
844	DO j = 0, ny
845	DO i = iouter, iend
846	work_ffty(j,i-iouter+1) = work(i,k,j)
847	ENDDO
848	ENDDO
849
850	DO i = iouter, iend
851
852	!
853	!-- FFT along y
854	ir = i-iouter+1 ! counter within a stride
855	CALL fft_y( work_ffty(:,ir), 'backward' )
856
857	DO j = 0, ny
858	f_out(k,j,i) = work_ffty(j,ir)
859	ENDDO
860	ENDDO
861
862	ENDDO
863
864	ENDDO
865	!$OMP END PARALLEL
866
867	ENDIF
868
869	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
870
871	END SUBROUTINE tr_xy_ffty
872
873
874	SUBROUTINE fftx_tri_fftx( ar )
875
876	!------------------------------------------------------------------------------!
877	! FFT along x, solution of the tridiagonal system and backward FFT for
878	! a 1d-decomposition along x
879	!
880	! WARNING: this subroutine may still not work for hybrid parallelization
881	! with OpenMP (for possible necessary changes see the original
882	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
883	!------------------------------------------------------------------------------!
884
885	USE control_parameters
886	USE cpulog
887	USE grid_variables
888	USE indices
889	USE interfaces
890	USE pegrid
891	USE transpose_indices
892
893	IMPLICIT NONE
894
895	character(len=3) :: myth_char
896
897	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
898
899	REAL, DIMENSION(0:nx) :: work_fftx
900	REAL, DIMENSION(0:nx,1:nz) :: work_trix
901	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: ar
902	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
903
904
905	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
906
907	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
908
909	tn = 0 ! Default thread number in case of one thread
910	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
911	DO j = nys_x, nyn_x
912
913	!$ tn = omp_get_thread_num()
914
915	IF ( host(1:3) == 'nec' ) THEN
916	!
917	!-- Code optimized for vector processors
918	DO k = 1, nz
919
920	m = 0
921	DO n = 1, pdims(1)
922	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
923	work_trix(m,k) = ar(i,k,j,n)
924	m = m + 1
925	ENDDO
926	ENDDO
927
928	ENDDO
929
930	CALL fft_x_m( work_trix, 'forward' )
931
932	ELSE
933	!
934	!-- Cache optimized code
935	DO k = 1, nz
936
937	m = 0
938	DO n = 1, pdims(1)
939	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
940	work_fftx(m) = ar(i,k,j,n)
941	m = m + 1
942	ENDDO
943	ENDDO
944
945	CALL fft_x( work_fftx, 'forward' )
946
947	DO i = 0, nx
948	work_trix(i,k) = work_fftx(i)
949	ENDDO
950
951	ENDDO
952
953	ENDIF
954
955	!
956	!-- Solve the linear equation system
957	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
958
959	IF ( host(1:3) == 'nec' ) THEN
960	!
961	!-- Code optimized for vector processors
962	CALL fft_x_m( work_trix, 'backward' )
963
964	DO k = 1, nz
965
966	m = 0
967	DO n = 1, pdims(1)
968	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
969	ar(i,k,j,n) = work_trix(m,k)
970	m = m + 1
971	ENDDO
972	ENDDO
973
974	ENDDO
975
976	ELSE
977	!
978	!-- Cache optimized code
979	DO k = 1, nz
980
981	DO i = 0, nx
982	work_fftx(i) = work_trix(i,k)
983	ENDDO
984
985	CALL fft_x( work_fftx, 'backward' )
986
987	m = 0
988	DO n = 1, pdims(1)
989	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
990	ar(i,k,j,n) = work_fftx(m)
991	m = m + 1
992	ENDDO
993	ENDDO
994
995	ENDDO
996
997	ENDIF
998
999	ENDDO
1000
1001	DEALLOCATE( tri )
1002
1003	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
1004
1005	END SUBROUTINE fftx_tri_fftx
1006
1007
1008	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
1009
1010	!------------------------------------------------------------------------------!
1011	! Fourier-transformation along x with subsequent transposition x --> y for
1012	! a 1d-decomposition along y
1013	!
1014	! ATTENTION: The NEC-branch of this routine may significantly profit from
1015	! further optimizations. So far, performance is much worse than
1016	! for routine ffty_tr_yx (more than three times slower).
1017	!------------------------------------------------------------------------------!
1018
1019	USE control_parameters
1020	USE cpulog
1021	USE indices
1022	USE interfaces
1023	USE pegrid
1024	USE transpose_indices
1025
1026	IMPLICIT NONE
1027
1028	INTEGER :: i, j, k
1029
1030	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1031	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_in
1032	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_out
1033	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1034
1035	!
1036	!-- Carry out the FFT along x, where all data are present due to the
1037	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1038	!-- the first index.
1039	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
1040
1041	IF ( host(1:3) == 'nec' ) THEN
1042	!
1043	!-- Code for vector processors
1044	!$OMP PARALLEL PRIVATE ( i, j, k )
1045	!$OMP DO
1046	DO i = 0, nx
1047
1048	DO j = nys, nyn
1049	DO k = 1, nz
1050	work_fftx(i,k,j) = f_in(k,j,i)
1051	ENDDO
1052	ENDDO
1053
1054	ENDDO
1055
1056	!$OMP DO
1057	DO j = nys, nyn
1058
1059	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1060
1061	DO k = 1, nz
1062	DO i = 0, nx
1063	work(j,k,i) = work_fftx(i,k,j)
1064	ENDDO
1065	ENDDO
1066
1067	ENDDO
1068	!$OMP END PARALLEL
1069
1070	ELSE
1071
1072	!
1073	!-- Cache optimized code (there might be still a potential for better
1074	!-- optimization).
1075	!$OMP PARALLEL PRIVATE (i,j,k)
1076	!$OMP DO
1077	DO i = 0, nx
1078
1079	DO j = nys, nyn
1080	DO k = 1, nz
1081	work_fftx(i,k,j) = f_in(k,j,i)
1082	ENDDO
1083	ENDDO
1084
1085	ENDDO
1086
1087	!$OMP DO
1088	DO j = nys, nyn
1089	DO k = 1, nz
1090
1091	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1092
1093	DO i = 0, nx
1094	work(j,k,i) = work_fftx(i,k,j)
1095	ENDDO
1096	ENDDO
1097
1098	ENDDO
1099	!$OMP END PARALLEL
1100
1101	ENDIF
1102	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1103
1104	!
1105	!-- Transpose array
1106	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1107	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1108	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1109	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1110	comm1dy, ierr )
1111	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1112
1113	END SUBROUTINE fftx_tr_xy
1114
1115
1116	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1117
1118	!------------------------------------------------------------------------------!
1119	! Transposition y --> x with a subsequent backward Fourier transformation for
1120	! a 1d-decomposition along x
1121	!------------------------------------------------------------------------------!
1122
1123	USE control_parameters
1124	USE cpulog
1125	USE indices
1126	USE interfaces
1127	USE pegrid
1128	USE transpose_indices
1129
1130	IMPLICIT NONE
1131
1132	INTEGER :: i, j, k
1133
1134	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1135	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_in
1136	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_out
1137	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1138
1139	!
1140	!-- Transpose array
1141	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1142	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
1143	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1144	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1145	comm1dy, ierr )
1146	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1147
1148	!
1149	!-- Carry out the FFT along x, where all data are present due to the
1150	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1151	!-- the first index.
1152	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1153
1154	IF ( host(1:3) == 'nec' ) THEN
1155	!
1156	!-- Code optimized for vector processors
1157	!$OMP PARALLEL PRIVATE ( i, j, k )
1158	!$OMP DO
1159	DO j = nys, nyn
1160
1161	DO k = 1, nz
1162	DO i = 0, nx
1163	work_fftx(i,k,j) = work(j,k,i)
1164	ENDDO
1165	ENDDO
1166
1167	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1168
1169	ENDDO
1170
1171	!$OMP DO
1172	DO i = 0, nx
1173	DO j = nys, nyn
1174	DO k = 1, nz
1175	f_out(k,j,i) = work_fftx(i,k,j)
1176	ENDDO
1177	ENDDO
1178	ENDDO
1179	!$OMP END PARALLEL
1180
1181	ELSE
1182
1183	!
1184	!-- Cache optimized code (there might be still a potential for better
1185	!-- optimization).
1186	!$OMP PARALLEL PRIVATE (i,j,k)
1187	!$OMP DO
1188	DO j = nys, nyn
1189	DO k = 1, nz
1190
1191	DO i = 0, nx
1192	work_fftx(i,k,j) = work(j,k,i)
1193	ENDDO
1194
1195	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1196
1197	ENDDO
1198	ENDDO
1199
1200	!$OMP DO
1201	DO i = 0, nx
1202	DO j = nys, nyn
1203	DO k = 1, nz
1204	f_out(k,j,i) = work_fftx(i,k,j)
1205	ENDDO
1206	ENDDO
1207	ENDDO
1208	!$OMP END PARALLEL
1209
1210	ENDIF
1211	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1212
1213	END SUBROUTINE tr_yx_fftx
1214
1215
1216	SUBROUTINE ffty_tri_ffty( ar )
1217
1218	!------------------------------------------------------------------------------!
1219	! FFT along y, solution of the tridiagonal system and backward FFT for
1220	! a 1d-decomposition along y
1221	!
1222	! WARNING: this subroutine may still not work for hybrid parallelization
1223	! with OpenMP (for possible necessary changes see the original
1224	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1225	!------------------------------------------------------------------------------!
1226
1227	USE control_parameters
1228	USE cpulog
1229	USE grid_variables
1230	USE indices
1231	USE interfaces
1232	USE pegrid
1233	USE transpose_indices
1234
1235	IMPLICIT NONE
1236
1237	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1238
1239	REAL, DIMENSION(0:ny) :: work_ffty
1240	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1241	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: ar
1242	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1243
1244
1245	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1246
1247	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1248
1249	tn = 0 ! Default thread number in case of one thread
1250	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1251	DO i = nxl_y, nxr_y
1252
1253	!$ tn = omp_get_thread_num()
1254
1255	IF ( host(1:3) == 'nec' ) THEN
1256	!
1257	!-- Code optimized for vector processors
1258	DO k = 1, nz
1259
1260	m = 0
1261	DO n = 1, pdims(2)
1262	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1263	work_triy(m,k) = ar(j,k,i,n)
1264	m = m + 1
1265	ENDDO
1266	ENDDO
1267
1268	ENDDO
1269
1270	CALL fft_y_m( work_triy, ny, 'forward' )
1271
1272	ELSE
1273	!
1274	!-- Cache optimized code
1275	DO k = 1, nz
1276
1277	m = 0
1278	DO n = 1, pdims(2)
1279	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1280	work_ffty(m) = ar(j,k,i,n)
1281	m = m + 1
1282	ENDDO
1283	ENDDO
1284
1285	CALL fft_y( work_ffty, 'forward' )
1286
1287	DO j = 0, ny
1288	work_triy(j,k) = work_ffty(j)
1289	ENDDO
1290
1291	ENDDO
1292
1293	ENDIF
1294
1295	!
1296	!-- Solve the linear equation system
1297	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1298
1299	IF ( host(1:3) == 'nec' ) THEN
1300	!
1301	!-- Code optimized for vector processors
1302	CALL fft_y_m( work_triy, ny, 'backward' )
1303
1304	DO k = 1, nz
1305
1306	m = 0
1307	DO n = 1, pdims(2)
1308	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1309	ar(j,k,i,n) = work_triy(m,k)
1310	m = m + 1
1311	ENDDO
1312	ENDDO
1313
1314	ENDDO
1315
1316	ELSE
1317	!
1318	!-- Cache optimized code
1319	DO k = 1, nz
1320
1321	DO j = 0, ny
1322	work_ffty(j) = work_triy(j,k)
1323	ENDDO
1324
1325	CALL fft_y( work_ffty, 'backward' )
1326
1327	m = 0
1328	DO n = 1, pdims(2)
1329	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1330	ar(j,k,i,n) = work_ffty(m)
1331	m = m + 1
1332	ENDDO
1333	ENDDO
1334
1335	ENDDO
1336
1337	ENDIF
1338
1339	ENDDO
1340
1341	DEALLOCATE( tri )
1342
1343	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1344
1345	END SUBROUTINE ffty_tri_ffty
1346
1347
1348	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1349
1350	!------------------------------------------------------------------------------!
1351	! Solves the linear system of equations for a 1d-decomposition along x (see
1352	! tridia)
1353	!
1354	! Attention: when using the intel compiler, array tri must be passed as an
1355	! argument to the contained subroutines. Otherwise addres faults
1356	! will occur.
1357	! On NEC, tri should not be passed (except for routine substi_1dd)
1358	! because this causes very bad performance.
1359	!------------------------------------------------------------------------------!
1360
1361	USE arrays_3d
1362	USE control_parameters
1363
1364	USE pegrid
1365
1366	IMPLICIT NONE
1367
1368	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1369
1370	REAL :: ddx2, ddy2
1371
1372	REAL, DIMENSION(0:nx,1:nz) :: ar
1373	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1374	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1375
1376
1377	nnyh = ( ny + 1 ) / 2
1378
1379	!
1380	!-- Define constant elements of the tridiagonal matrix.
1381	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1382	!-- the exchanged loops create bank conflicts. The following directive
1383	!-- prohibits loop exchange and the loops perform much better.
1384	! tn = omp_get_thread_num()
1385	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1386	! CALL local_flush( 120+tn )
1387	!CDIR NOLOOPCHG
1388	DO k = 0, nz-1
1389	DO i = 0,nx
1390	tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
1391	tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
1392	ENDDO
1393	ENDDO
1394	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1395	! CALL local_flush( 120+tn )
1396
1397	IF ( j <= nnyh ) THEN
1398	#if defined( __lc )
1399	CALL maketri_1dd( j, tri )
1400	#else
1401	CALL maketri_1dd( j )
1402	#endif
1403	ELSE
1404	#if defined( __lc )
1405	CALL maketri_1dd( ny+1-j, tri )
1406	#else
1407	CALL maketri_1dd( ny+1-j )
1408	#endif
1409	ENDIF
1410	#if defined( __lc )
1411	CALL split_1dd( tri )
1412	#else
1413	CALL split_1dd
1414	#endif
1415	CALL substi_1dd( ar, tri )
1416
1417	CONTAINS
1418
1419	#if defined( __lc )
1420	SUBROUTINE maketri_1dd( j, tri )
1421	#else
1422	SUBROUTINE maketri_1dd( j )
1423	#endif
1424
1425	!------------------------------------------------------------------------------!
1426	! computes the i- and j-dependent component of the matrix
1427	!------------------------------------------------------------------------------!
1428
1429	USE constants
1430
1431	IMPLICIT NONE
1432
1433	INTEGER :: i, j, k, nnxh
1434	REAL :: a, c
1435
1436	REAL, DIMENSION(0:nx) :: l
1437
1438	#if defined( __lc )
1439	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1440	#endif
1441
1442
1443	nnxh = ( nx + 1 ) / 2
1444	!
1445	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1446	!-- Fourier space. The coefficients are computed following the method of
1447	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1448	!-- Siano's original version by discretizing the Poisson equation,
1449	!-- before it is Fourier-transformed
1450	DO i = 0, nx
1451	IF ( i >= 0 .AND. i <= nnxh ) THEN
1452	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1453	FLOAT( nx+1 ) ) ) * ddx2 + &
1454	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1455	FLOAT( ny+1 ) ) ) * ddy2
1456	ELSE
1457	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1458	FLOAT( nx+1 ) ) ) * ddx2 + &
1459	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1460	FLOAT( ny+1 ) ) ) * ddy2
1461	ENDIF
1462	ENDDO
1463
1464	DO k = 0, nz-1
1465	DO i = 0, nx
1466	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
1467	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
1468	tri(1,i,k) = a + c - l(i)
1469	ENDDO
1470	ENDDO
1471	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1472	DO i = 0, nx
1473	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1474	ENDDO
1475	ENDIF
1476	IF ( ibc_p_t == 1 ) THEN
1477	DO i = 0, nx
1478	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1479	ENDDO
1480	ENDIF
1481
1482	END SUBROUTINE maketri_1dd
1483
1484
1485	#if defined( __lc )
1486	SUBROUTINE split_1dd( tri )
1487	#else
1488	SUBROUTINE split_1dd
1489	#endif
1490
1491	!------------------------------------------------------------------------------!
1492	! Splitting of the tridiagonal matrix (Thomas algorithm)
1493	!------------------------------------------------------------------------------!
1494
1495	IMPLICIT NONE
1496
1497	INTEGER :: i, k
1498
1499	#if defined( __lc )
1500	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1501	#endif
1502
1503
1504	!
1505	!-- Splitting
1506	DO i = 0, nx
1507	tri(4,i,0) = tri(1,i,0)
1508	ENDDO
1509	DO k = 1, nz-1
1510	DO i = 0, nx
1511	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1512	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1513	ENDDO
1514	ENDDO
1515
1516	END SUBROUTINE split_1dd
1517
1518
1519	SUBROUTINE substi_1dd( ar, tri )
1520
1521	!------------------------------------------------------------------------------!
1522	! Substitution (Forward and Backward) (Thomas algorithm)
1523	!------------------------------------------------------------------------------!
1524
1525	IMPLICIT NONE
1526
1527	INTEGER :: i, k
1528
1529	REAL, DIMENSION(0:nx,nz) :: ar
1530	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1531	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1532
1533	!
1534	!-- Forward substitution
1535	DO i = 0, nx
1536	ar1(i,0) = ar(i,1)
1537	ENDDO
1538	DO k = 1, nz-1
1539	DO i = 0, nx
1540	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1541	ENDDO
1542	ENDDO
1543
1544	!
1545	!-- Backward substitution
1546	DO i = 0, nx
1547	ar(i,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
1548	ENDDO
1549	DO k = nz-2, 0, -1
1550	DO i = 0, nx
1551	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1552	/ tri(4,i,k)
1553	ENDDO
1554	ENDDO
1555
1556	!
1557	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
1558	!-- The respective values of ar should be zero at all k-levels if
1559	!-- acceleration of horizontally averaged vertical velocity is zero.
1560	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
1561	IF ( j == 0 ) THEN
1562	DO k = 1, nz
1563	ar(0,k) = 0.0
1564	ENDDO
1565	ENDIF
1566	ENDIF
1567
1568	END SUBROUTINE substi_1dd
1569
1570	END SUBROUTINE tridia_1dd
1571
1572	#endif
1573
1574	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |