Home

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 1

Last change on this file since 1 was 1, checked in by raasch, 17 years ago
Initial repository layout and content
File size: 45.1 KB

Line
1	MODULE poisfft_mod
2
3	!------------------------------------------------------------------------------!
4	! Actual revisions:
5	! -----------------
6	!
7	!
8	! Former revisions:
9	! -----------------
10	! $Log: poisfft.f90,v $
11	! Revision 1.24 2006/08/04 15:00:24 raasch
12	! Default setting of the thread number tn in case of not using OpenMP
13	!
14	! Revision 1.23 2006/02/23 12:48:38 raasch
15	! Additional compiler directive in routine tridia_1dd for preventing loop
16	! exchange on NEC-SX6
17	!
18	! Revision 1.22 2005/05/18 15:53:00 raasch
19	! Missing argument in ffty
20	!
21	! Revision 1.21 2005/03/26 20:51:59 raasch
22	! Use of module indices removed from routine split_1dd since it caused errors
23	! in case of nx /= ny
24	!
25	! Revision 1.20 2004/04/30 12:38:09 raasch
26	! Parts of former poisfft_hybrid moved to this subroutine,
27	! former subroutine changed to a module, renaming of FFT-subroutines and
28	! -module, FFTs completely substituted by calls of fft_x and fft_y,
29	! NAG fft used in the non-parallel case completely removed, l in maketri
30	! is now a 1d-array, variables passed by modules instead of using parameter
31	! lists, enlarged transposition arrays introduced
32	!
33	! Revision 1.19 2003/04/16 13:11:16 raasch
34	! Internal fft method calls substituted by calls of fft_x_1dd and fft_y_1dd
35	!
36	! Revision 1.18 2003/03/16 09:44:45 raasch
37	! Two underscores (_) are placed in front of all define-strings
38	!
39	! Revision 1.17 2003/03/12 16:36:34 raasch
40	! System specific version for NEC included (equivalent to CRAY scilib
41	! calls, but different subroutine names; the CRAY names are replaced with
42	! NEC names by cpp), size of array work1 is expanded because NEC routines
43	! need more memory
44	!
45	! Revision 1.15 2002/06/11 13:14:31 raasch
46	! System specific version for IBM included (parallel case only)
47	!
48	! Revision 1.13 2001/01/25 22:19:18 letzel
49	! All comments translated into English.
50	!
51	! Revision 1.12 2001/01/23 01:53:04 raasch
52	! Total revision of parallel and non-paralle fft-routines. Additional
53	! implementation of the singleton algorithm.
54	!
55	! Revision 1.9 1998/07/27 08:12:56 schroeter
56	! Berechnung von l in maketri geschieht nun ausserhalb
57	! der k-Schleife, da unabhaengig von k, dadurch
58	! Rechenzeitersparnis
59	!
60	! Revision 1.8 1998/07/24 07:52:09 schroeter
61	! Anpassung des Druckloesealgorithmusses an Vorgehensweise
62	! von Schmidt, Schumann und Volkert
63	! (DFVLR-Mitteilung 84-15, S. 20-26)
64	!
65	! Revision 1.1 1997/07/24 11:24:14 raasch
66	! Initial revision
67	!
68	!
69	! Description:
70	! ------------
71	! See below.
72	!------------------------------------------------------------------------------!
73
74	!--------------------------------------------------------------------------!
75	! poisfft !
76	! !
77	! Original version: Stephan Siano (pois3d) !
78	! !
79	! Institute of Meteorology and Climatology, University of Hannover !
80	! Germany !
81	! !
82	! Version as of July 23,1996 !
83	! !
84	! !
85	! Version for parallel computers: Siegfried Raasch !
86	! !
87	! Version as of July 03,1997 !
88	! !
89	! Solves the Poisson equation with a 2D spectral method !
90	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
91	! !
92	! Input: !
93	! real ar contains in the (nnx,nny,nnz) elements, !
94	! starting from the element (1,nys,nxl), the !
95	! values for s !
96	! real work Temporary array !
97	! !
98	! Output: !
99	! real ar contains the solution for p !
100	!--------------------------------------------------------------------------!
101
102	USE fft_xy
103	USE indices
104	USE transpose_indices
105
106	IMPLICIT NONE
107
108	PRIVATE
109	PUBLIC poisfft, poisfft_init
110
111	INTERFACE poisfft
112	MODULE PROCEDURE poisfft
113	END INTERFACE poisfft
114
115	INTERFACE poisfft_init
116	MODULE PROCEDURE poisfft_init
117	END INTERFACE poisfft_init
118
119	CONTAINS
120
121	SUBROUTINE poisfft_init
122
123	CALL fft_init
124
125	END SUBROUTINE poisfft_init
126
127
128	SUBROUTINE poisfft( ar, work )
129
130	USE cpulog
131	USE interfaces
132	USE pegrid
133
134	IMPLICIT NONE
135
136	REAL, DIMENSION(1:nza,nys:nyna,nxl:nxra) :: ar, work
137
138
139	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
140
141	!
142	!-- Two-dimensional Fourier Transformation in x- and y-direction.
143	#if defined( __parallel )
144	IF ( pdims(2) == 1 ) THEN
145
146	!
147	!-- 1d-domain-decomposition along x:
148	!-- FFT along y and transposition y --> x
149	CALL ffty_tr_yx( ar, work, ar )
150
151	!
152	!-- FFT along x, solving the tridiagonal system and backward FFT
153	CALL fftx_tri_fftx( ar )
154
155	!
156	!-- Transposition x --> y and backward FFT along y
157	CALL tr_xy_ffty( ar, work, ar )
158
159	ELSEIF ( pdims(1) == 1 ) THEN
160
161	!
162	!-- 1d-domain-decomposition along y:
163	!-- FFT along x and transposition x --> y
164	CALL fftx_tr_xy( ar, work, ar )
165
166	!
167	!-- FFT along y, solving the tridiagonal system and backward FFT
168	CALL ffty_tri_ffty( ar )
169
170	!
171	!-- Transposition y --> x and backward FFT along x
172	CALL tr_yx_fftx( ar, work, ar )
173
174	ELSE
175
176	!
177	!-- 2d-domain-decomposition
178	!-- Transposition z --> x
179	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
180	CALL transpose_zx( ar, work, ar, work, ar )
181	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
182
183	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
184	CALL fftxp( ar, 'forward' )
185	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
186
187	!
188	!-- Transposition x --> y
189	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
190	CALL transpose_xy( ar, work, ar, work, ar )
191	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
192
193	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
194	CALL fftyp( ar, 'forward' )
195	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
196
197	!
198	!-- Transposition y --> z
199	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
200	CALL transpose_yz( ar, work, ar, work, ar )
201	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
202
203	!
204	!-- Solve the Poisson equation in z-direction in cartesian space.
205	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
206	CALL tridia( ar )
207	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
208
209	!
210	!-- Inverse Fourier Transformation
211	!-- Transposition z --> y
212	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
213	CALL transpose_zy( ar, work, ar, work, ar )
214	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
215
216	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
217	CALL fftyp( ar, 'backward' )
218	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
219
220	!
221	!-- Transposition y --> x
222	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
223	CALL transpose_yx( ar, work, ar, work, ar )
224	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
225
226	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
227	CALL fftxp( ar, 'backward' )
228	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
229
230	!
231	!-- Transposition x --> z
232	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
233	CALL transpose_xz( ar, work, ar, work, ar )
234	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
235
236	ENDIF
237
238	#else
239
240	!
241	!-- Two-dimensional Fourier Transformation along x- and y-direction.
242	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
243	CALL fftx( ar, 'forward' )
244	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
245	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
246	CALL ffty( ar, 'forward' )
247	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
248
249	!
250	!-- Solve the Poisson equation in z-direction in cartesian space.
251	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
252	CALL tridia( ar )
253	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
254
255	!
256	!-- Inverse Fourier Transformation.
257	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
258	CALL ffty( ar, 'backward' )
259	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
260	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
261	CALL fftx( ar, 'backward' )
262	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
263
264	#endif
265
266	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
267
268	END SUBROUTINE poisfft
269
270
271
272	SUBROUTINE tridia( ar )
273
274	!------------------------------------------------------------------------------!
275	! solves the linear system of equations:
276	!
277	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
278	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
279	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
280	!
281	! by using the Thomas algorithm
282	!------------------------------------------------------------------------------!
283
284	USE arrays_3d
285
286	IMPLICIT NONE
287
288	INTEGER :: i, j, k, nnyh
289
290	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
291	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
292
293	#if defined( __parallel )
294	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
295	#else
296	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
297	#endif
298
299
300	nnyh = (ny+1) / 2
301
302	!
303	!-- Define constant elements of the tridiagonal matrix.
304	DO k = 0, nz-1
305	DO i = nxl_z, nxr_z
306	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
307	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
308	ENDDO
309	ENDDO
310
311	#if defined( __parallel )
312	!
313	!-- Repeat for all y-levels.
314	DO j = nys_z, nyn_z
315	IF ( j <= nnyh ) THEN
316	CALL maketri( tri, j )
317	ELSE
318	CALL maketri( tri, ny+1-j )
319	ENDIF
320	CALL split( tri )
321	CALL substi( ar, ar1, tri, j )
322	ENDDO
323	#else
324	!
325	!-- First y-level.
326	CALL maketri( tri, nys_z )
327	CALL split( tri )
328	CALL substi( ar, ar1, tri, 0 )
329
330	!
331	!-- Further y-levels.
332	DO j = 1, nnyh - 1
333	CALL maketri( tri, j )
334	CALL split( tri )
335	CALL substi( ar, ar1, tri, j )
336	CALL substi( ar, ar1, tri, ny+1-j )
337	ENDDO
338	CALL maketri( tri, nnyh )
339	CALL split( tri )
340	CALL substi( ar, ar1, tri, nnyh+nys )
341	#endif
342
343	CONTAINS
344
345	SUBROUTINE maketri( tri, j )
346
347	!------------------------------------------------------------------------------!
348	! Computes the i- and j-dependent component of the matrix
349	!------------------------------------------------------------------------------!
350
351	USE arrays_3d
352	USE constants
353	USE control_parameters
354	USE grid_variables
355
356	IMPLICIT NONE
357
358	INTEGER :: i, j, k, nnxh
359	REAL :: a, c
360	REAL :: ll(nxl_z:nxr_z)
361	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
362
363
364	nnxh = ( nx + 1 ) / 2
365
366	!
367	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
368	!-- Fourier space. The coefficients are computed following the method of
369	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
370	!-- Siano's original version by discretizing the Poisson equation,
371	!-- before it is Fourier-transformed
372	#if defined( __parallel )
373	DO i = nxl_z, nxr_z
374	IF ( i >= 0 .AND. i < nnxh ) THEN
375	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
376	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
377	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
378	FLOAT( ny+1 ) ) ) / ( dy * dy )
379	ELSEIF ( i == nnxh ) THEN
380	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
381	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
382	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
383	FLOAT( ny+1 ) ) ) / ( dy * dy )
384	ELSE
385	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
386	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
387	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
388	FLOAT( ny+1 ) ) ) / ( dy * dy )
389	ENDIF
390	DO k = 0,nz-1
391	a = -1.0 * ddzu(k+2) * ddzw(k+1)
392	c = -1.0 * ddzu(k+1) * ddzw(k+1)
393	tri(1,i,k) = a + c - ll(i)
394	ENDDO
395	ENDDO
396	#else
397	DO i = 0, nnxh
398	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / FLOAT( nx+1 ) ) ) / &
399	( dx * dx ) + &
400	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / FLOAT( ny+1 ) ) ) / &
401	( dy * dy )
402	DO k = 0, nz-1
403	a = -1.0 * ddzu(k+2) * ddzw(k+1)
404	c = -1.0 * ddzu(k+1) * ddzw(k+1)
405	tri(1,i,k) = a + c - ll(i)
406	IF ( i >= 1 .and. i < nnxh ) THEN
407	tri(1,nx+1-i,k) = tri(1,i,k)
408	ENDIF
409	ENDDO
410	ENDDO
411	#endif
412	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
413	DO i = nxl_z, nxr_z
414	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
415	ENDDO
416	ENDIF
417	IF ( ibc_p_t == 1 ) THEN
418	DO i = nxl_z, nxr_z
419	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
420	ENDDO
421	ENDIF
422
423	END SUBROUTINE maketri
424
425
426	SUBROUTINE substi( ar, ar1, tri, j )
427
428	!------------------------------------------------------------------------------!
429	! Substitution (Forward and Backward) (Thomas algorithm)
430	!------------------------------------------------------------------------------!
431
432	IMPLICIT NONE
433
434	INTEGER :: i, j, k
435	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
436	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
437	#if defined( __parallel )
438	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
439	#else
440	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
441	#endif
442
443	!
444	!-- Forward substitution.
445	DO i = nxl_z, nxr_z
446	#if defined( __parallel )
447	ar1(i,0) = ar(i,j,1)
448	#else
449	ar1(i,0) = ar(1,j,i)
450	#endif
451	ENDDO
452	DO k = 1, nz - 1
453	DO i = nxl_z, nxr_z
454	#if defined( __parallel )
455	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
456	#else
457	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
458	#endif
459	ENDDO
460	ENDDO
461
462	!
463	!-- Backward substitution.
464	DO i = nxl_z, nxr_z
465	#if defined( __parallel )
466	ar(i,j,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
467	#else
468	ar(nz,j,i) = ar1(i,nz-1) / tri(4,i,nz-1)
469	#endif
470	ENDDO
471	DO k = nz-2, 0, -1
472	DO i = nxl_z, nxr_z
473	#if defined( __parallel )
474	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
475	/ tri(4,i,k)
476	#else
477	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
478	/ tri(4,i,k)
479	#endif
480	ENDDO
481	ENDDO
482
483	END SUBROUTINE substi
484
485
486	SUBROUTINE split( tri )
487
488	!------------------------------------------------------------------------------!
489	! Splitting of the tridiagonal matrix (Thomas algorithm)
490	!------------------------------------------------------------------------------!
491
492	IMPLICIT NONE
493
494	INTEGER :: i, k
495	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
496
497	!
498	!-- Splitting.
499	DO i = nxl_z, nxr_z
500	tri(4,i,0) = tri(1,i,0)
501	ENDDO
502	DO k = 1, nz-1
503	DO i = nxl_z, nxr_z
504	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
505	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
506	ENDDO
507	ENDDO
508
509	END SUBROUTINE split
510
511	END SUBROUTINE tridia
512
513
514	#if defined( __parallel )
515	SUBROUTINE fftxp( ar, direction )
516
517	!------------------------------------------------------------------------------!
518	! Fourier-transformation along x-direction Parallelized version
519	!------------------------------------------------------------------------------!
520
521	IMPLICIT NONE
522
523	CHARACTER (LEN=*) :: direction
524	INTEGER :: j, k
525	REAL :: ar(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa)
526
527	!
528	!-- Performing the fft with one of the methods implemented
529	DO k = nzb_x, nzt_x
530	DO j = nys_x, nyn_x
531	CALL fft_x( ar(0:nx,j,k), direction )
532	ENDDO
533	ENDDO
534
535	END SUBROUTINE fftxp
536
537	#else
538	SUBROUTINE fftx( ar, direction )
539
540	!------------------------------------------------------------------------------!
541	! Fourier-transformation along x-direction Non parallel version
542	!------------------------------------------------------------------------------!
543
544	IMPLICIT NONE
545
546	CHARACTER (LEN=*) :: direction
547	INTEGER :: i, j, k
548	REAL :: ar(1:nz,0:ny,0:nx)
549
550	!
551	!-- Performing the fft with one of the methods implemented
552	DO k = 1, nz
553	DO j = 0, ny
554	CALL fft_x( ar(k,j,0:nx), direction )
555	ENDDO
556	ENDDO
557
558	END SUBROUTINE fftx
559	#endif
560
561
562	#if defined( __parallel )
563	SUBROUTINE fftyp( ar, direction )
564
565	!------------------------------------------------------------------------------!
566	! Fourier-transformation along y-direction Parallelized version
567	!------------------------------------------------------------------------------!
568
569	IMPLICIT NONE
570
571	CHARACTER (LEN=*) :: direction
572	INTEGER :: i, k
573	REAL :: ar(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya)
574
575	!
576	!-- Performing the fft with one of the methods implemented
577	DO k = nzb_y, nzt_y
578	DO i = nxl_y, nxr_y
579	CALL fft_y( ar(0:ny,i,k), direction )
580	ENDDO
581	ENDDO
582
583	END SUBROUTINE fftyp
584
585	#else
586	SUBROUTINE ffty( ar, direction )
587
588	!------------------------------------------------------------------------------!
589	! Fourier-transformation along y-direction Non parallel version
590	!------------------------------------------------------------------------------!
591
592	IMPLICIT NONE
593
594	CHARACTER (LEN=*) :: direction
595	INTEGER :: i, k
596	REAL :: ar(1:nz,0:ny,0:nx)
597
598	!
599	!-- Performing the fft with one of the methods implemented
600	DO k = 1, nz
601	DO i = 0, nx
602	CALL fft_y( ar(k,0:ny,i), direction )
603	ENDDO
604	ENDDO
605
606	END SUBROUTINE ffty
607	#endif
608
609	#if defined( __parallel )
610	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
611
612	!------------------------------------------------------------------------------!
613	! Fourier-transformation along y with subsequent transposition y --> x for
614	! a 1d-decomposition along x
615	!
616	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
617	! if the first index of work_ffty_vec is odd. Otherwise
618	! memory bank conflicts may occur (especially if the index is a
619	! multiple of 128). That's why work_ffty_vec is dimensioned as
620	! 0:ny+1.
621	! Of course, this will not work if users are using an odd number
622	! of gridpoints along y.
623	!------------------------------------------------------------------------------!
624
625	USE control_parameters
626	USE cpulog
627	USE indices
628	USE interfaces
629	USE pegrid
630	USE transpose_indices
631
632	IMPLICIT NONE
633
634	INTEGER :: i, iend, iouter, ir, j, k
635	INTEGER, PARAMETER :: stridex = 4
636
637	REAL, DIMENSION(0:ny,stridex) :: work_ffty
638	#if defined( __nec )
639	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
640	#endif
641	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_in
642	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_out
643	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
644
645	!
646	!-- Carry out the FFT along y, where all data are present due to the
647	!-- 1d-decomposition along x. Resort the data in a way that x becomes
648	!-- the first index.
649	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
650
651	IF ( host(1:3) == 'nec' ) THEN
652	#if defined( __nec )
653	!
654	!-- Code optimized for vector processors
655	!$OMP PARALLEL PRIVATE ( i, j, k, work_ffty_vec )
656	!$OMP DO
657	DO i = nxl, nxr
658
659	DO j = 0, ny
660	DO k = 1, nz
661	work_ffty_vec(j,k,i) = f_in(k,j,i)
662	ENDDO
663	ENDDO
664
665	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
666
667	ENDDO
668
669	!$OMP DO
670	DO k = 1, nz
671	DO j = 0, ny
672	DO i = nxl, nxr
673	work(i,k,j) = work_ffty_vec(j,k,i)
674	ENDDO
675	ENDDO
676	ENDDO
677	!$OMP END PARALLEL
678	#endif
679
680	ELSE
681
682	!
683	!-- Cache optimized code.
684	!-- The i-(x-)direction is split into a strided outer loop and an inner
685	!-- loop for better cache performance
686	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
687	!$OMP DO
688	DO iouter = nxl, nxr, stridex
689
690	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
691
692	DO k = 1, nz
693
694	DO i = iouter, iend
695
696	ir = i-iouter+1 ! counter within a stride
697	DO j = 0, ny
698	work_ffty(j,ir) = f_in(k,j,i)
699	ENDDO
700	!
701	!-- FFT along y
702	CALL fft_y( work_ffty(:,ir), 'forward' )
703
704	ENDDO
705
706	!
707	!-- Resort
708	DO j = 0, ny
709	DO i = iouter, iend
710	work(i,k,j) = work_ffty(j,i-iouter+1)
711	ENDDO
712	ENDDO
713
714	ENDDO
715
716	ENDDO
717	!$OMP END PARALLEL
718
719	ENDIF
720	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
721
722	!
723	!-- Transpose array
724	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
725	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
726	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
727	comm1dx, ierr )
728	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
729
730	END SUBROUTINE ffty_tr_yx
731
732
733	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
734
735	!------------------------------------------------------------------------------!
736	! Transposition x --> y with a subsequent backward Fourier transformation for
737	! a 1d-decomposition along x
738	!------------------------------------------------------------------------------!
739
740	USE control_parameters
741	USE cpulog
742	USE indices
743	USE interfaces
744	USE pegrid
745	USE transpose_indices
746
747	IMPLICIT NONE
748
749	INTEGER :: i, iend, iouter, ir, j, k
750	INTEGER, PARAMETER :: stridex = 4
751
752	REAL, DIMENSION(0:ny,stridex) :: work_ffty
753	#if defined( __nec )
754	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
755	#endif
756	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_in
757	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_out
758	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
759
760	!
761	!-- Transpose array
762	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
763	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
764	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
765	comm1dx, ierr )
766	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
767
768	!
769	!-- Resort the data in a way that y becomes the first index and carry out the
770	!-- backward fft along y.
771	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
772
773	IF ( host(1:3) == 'nec' ) THEN
774	#if defined( __nec )
775	!
776	!-- Code optimized for vector processors
777	!$OMP PARALLEL PRIVATE ( i, j, k, work_ffty_vec )
778	!$OMP DO
779	DO k = 1, nz
780	DO j = 0, ny
781	DO i = nxl, nxr
782	work_ffty_vec(j,k,i) = work(i,k,j)
783	ENDDO
784	ENDDO
785	ENDDO
786
787	!$OMP DO
788	DO i = nxl, nxr
789
790	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
791
792	DO j = 0, ny
793	DO k = 1, nz
794	f_out(k,j,i) = work_ffty_vec(j,k,i)
795	ENDDO
796	ENDDO
797
798	ENDDO
799	!$OMP END PARALLEL
800	#endif
801
802	ELSE
803
804	!
805	!-- Cache optimized code.
806	!-- The i-(x-)direction is split into a strided outer loop and an inner
807	!-- loop for better cache performance
808	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
809	!$OMP DO
810	DO iouter = nxl, nxr, stridex
811
812	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
813
814	DO k = 1, nz
815	!
816	!-- Resort
817	DO j = 0, ny
818	DO i = iouter, iend
819	work_ffty(j,i-iouter+1) = work(i,k,j)
820	ENDDO
821	ENDDO
822
823	DO i = iouter, iend
824
825	!
826	!-- FFT along y
827	ir = i-iouter+1 ! counter within a stride
828	CALL fft_y( work_ffty(:,ir), 'backward' )
829
830	DO j = 0, ny
831	f_out(k,j,i) = work_ffty(j,ir)
832	ENDDO
833	ENDDO
834
835	ENDDO
836
837	ENDDO
838	!$OMP END PARALLEL
839
840	ENDIF
841
842	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
843
844	END SUBROUTINE tr_xy_ffty
845
846
847	SUBROUTINE fftx_tri_fftx( ar )
848
849	!------------------------------------------------------------------------------!
850	! FFT along x, solution of the tridiagonal system and backward FFT for
851	! a 1d-decomposition along x
852	!
853	! WARNING: this subroutine may still not work for hybrid parallelization
854	! with OpenMP (for possible necessary changes see the original
855	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
856	!------------------------------------------------------------------------------!
857
858	USE control_parameters
859	USE cpulog
860	USE grid_variables
861	USE indices
862	USE interfaces
863	USE pegrid
864	USE transpose_indices
865
866	IMPLICIT NONE
867
868	character(len=3) :: myth_char
869
870	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
871
872	REAL, DIMENSION(0:nx) :: work_fftx
873	REAL, DIMENSION(0:nx,1:nz) :: work_trix
874	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: ar
875	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
876
877
878	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
879
880	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
881
882	tn = 0 ! Default thread number in case of one thread
883	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
884	DO j = nys_x, nyn_x
885
886	!$ tn = omp_get_thread_num()
887
888	IF ( host(1:3) == 'nec' ) THEN
889	!
890	!-- Code optimized for vector processors
891	DO k = 1, nz
892
893	m = 0
894	DO n = 1, pdims(1)
895	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
896	work_trix(m,k) = ar(i,k,j,n)
897	m = m + 1
898	ENDDO
899	ENDDO
900
901	ENDDO
902
903	CALL fft_x_m( work_trix, 'forward' )
904
905	ELSE
906	!
907	!-- Cache optimized code
908	DO k = 1, nz
909
910	m = 0
911	DO n = 1, pdims(1)
912	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
913	work_fftx(m) = ar(i,k,j,n)
914	m = m + 1
915	ENDDO
916	ENDDO
917
918	CALL fft_x( work_fftx, 'forward' )
919
920	DO i = 0, nx
921	work_trix(i,k) = work_fftx(i)
922	ENDDO
923
924	ENDDO
925
926	ENDIF
927
928	!
929	!-- Solve the linear equation system
930	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
931
932	IF ( host(1:3) == 'nec' ) THEN
933	!
934	!-- Code optimized for vector processors
935	CALL fft_x_m( work_trix, 'backward' )
936
937	DO k = 1, nz
938
939	m = 0
940	DO n = 1, pdims(1)
941	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
942	ar(i,k,j,n) = work_trix(m,k)
943	m = m + 1
944	ENDDO
945	ENDDO
946
947	ENDDO
948
949	ELSE
950	!
951	!-- Cache optimized code
952	DO k = 1, nz
953
954	DO i = 0, nx
955	work_fftx(i) = work_trix(i,k)
956	ENDDO
957
958	CALL fft_x( work_fftx, 'backward' )
959
960	m = 0
961	DO n = 1, pdims(1)
962	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
963	ar(i,k,j,n) = work_fftx(m)
964	m = m + 1
965	ENDDO
966	ENDDO
967
968	ENDDO
969
970	ENDIF
971
972	ENDDO
973
974	DEALLOCATE( tri )
975
976	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
977
978	END SUBROUTINE fftx_tri_fftx
979
980
981	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
982
983	!------------------------------------------------------------------------------!
984	! Fourier-transformation along x with subsequent transposition x --> y for
985	! a 1d-decomposition along y
986	!
987	! ATTENTION: The NEC-branch of this routine may significantly profit from
988	! further optimizations. So far, performance is much worse than
989	! for routine ffty_tr_yx (more than three times slower).
990	!------------------------------------------------------------------------------!
991
992	USE control_parameters
993	USE cpulog
994	USE indices
995	USE interfaces
996	USE pegrid
997	USE transpose_indices
998
999	IMPLICIT NONE
1000
1001	INTEGER :: i, j, k
1002
1003	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1004	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_in
1005	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_out
1006	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1007
1008	!
1009	!-- Carry out the FFT along x, where all data are present due to the
1010	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1011	!-- the first index.
1012	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
1013
1014	IF ( host(1:3) == 'nec' ) THEN
1015	!
1016	!-- Code for vector processors
1017	!$OMP PARALLEL PRIVATE ( i, j, k, work_fftx )
1018	!$OMP DO
1019	DO i = 0, nx
1020
1021	DO j = nys, nyn
1022	DO k = 1, nz
1023	work_fftx(i,k,j) = f_in(k,j,i)
1024	ENDDO
1025	ENDDO
1026
1027	ENDDO
1028
1029	!$OMP DO
1030	DO j = nys, nyn
1031
1032	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1033
1034	DO k = 1, nz
1035	DO i = 0, nx
1036	work(j,k,i) = work_fftx(i,k,j)
1037	ENDDO
1038	ENDDO
1039
1040	ENDDO
1041	!$OMP END PARALLEL
1042
1043	ELSE
1044
1045	!
1046	!-- Cache optimized code (there might be still a potential for better
1047	!-- optimization).
1048	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1049	!$OMP DO
1050	DO i = 0, nx
1051
1052	DO j = nys, nyn
1053	DO k = 1, nz
1054	work_fftx(i,k,j) = f_in(k,j,i)
1055	ENDDO
1056	ENDDO
1057
1058	ENDDO
1059
1060	!$OMP DO
1061	DO j = nys, nyn
1062	DO k = 1, nz
1063
1064	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1065
1066	DO i = 0, nx
1067	work(j,k,i) = work_fftx(i,k,j)
1068	ENDDO
1069	ENDDO
1070
1071	ENDDO
1072	!$OMP END PARALLEL
1073
1074	ENDIF
1075	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1076
1077	!
1078	!-- Transpose array
1079	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1080	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1081	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1082	comm1dy, ierr )
1083	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1084
1085	END SUBROUTINE fftx_tr_xy
1086
1087
1088	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1089
1090	!------------------------------------------------------------------------------!
1091	! Transposition y --> x with a subsequent backward Fourier transformation for
1092	! a 1d-decomposition along x
1093	!------------------------------------------------------------------------------!
1094
1095	USE control_parameters
1096	USE cpulog
1097	USE indices
1098	USE interfaces
1099	USE pegrid
1100	USE transpose_indices
1101
1102	IMPLICIT NONE
1103
1104	INTEGER :: i, j, k
1105
1106	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1107	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_in
1108	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_out
1109	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1110
1111	!
1112	!-- Transpose array
1113	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1114	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1115	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1116	comm1dy, ierr )
1117	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1118
1119	!
1120	!-- Carry out the FFT along x, where all data are present due to the
1121	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1122	!-- the first index.
1123	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1124
1125	IF ( host(1:3) == 'nec' ) THEN
1126	!
1127	!-- Code optimized for vector processors
1128	!$OMP PARALLEL PRIVATE ( i, j, k, work_fftx )
1129	!$OMP DO
1130	DO j = nys, nyn
1131
1132	DO k = 1, nz
1133	DO i = 0, nx
1134	work_fftx(i,k,j) = work(j,k,i)
1135	ENDDO
1136	ENDDO
1137
1138	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1139
1140	ENDDO
1141
1142	!$OMP DO
1143	DO i = 0, nx
1144	DO j = nys, nyn
1145	DO k = 1, nz
1146	f_out(k,j,i) = work_fftx(i,k,j)
1147	ENDDO
1148	ENDDO
1149	ENDDO
1150	!$OMP END PARALLEL
1151
1152	ELSE
1153
1154	!
1155	!-- Cache optimized code (there might be still a potential for better
1156	!-- optimization).
1157	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1158	!$OMP DO
1159	DO j = nys, nyn
1160	DO k = 1, nz
1161
1162	DO i = 0, nx
1163	work_fftx(i,k,j) = work(j,k,i)
1164	ENDDO
1165
1166	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1167
1168	ENDDO
1169	ENDDO
1170
1171	!$OMP DO
1172	DO i = 0, nx
1173	DO j = nys, nyn
1174	DO k = 1, nz
1175	f_out(k,j,i) = work_fftx(i,k,j)
1176	ENDDO
1177	ENDDO
1178	ENDDO
1179	!$OMP END PARALLEL
1180
1181	ENDIF
1182	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1183
1184	END SUBROUTINE tr_yx_fftx
1185
1186
1187	SUBROUTINE ffty_tri_ffty( ar )
1188
1189	!------------------------------------------------------------------------------!
1190	! FFT along y, solution of the tridiagonal system and backward FFT for
1191	! a 1d-decomposition along y
1192	!
1193	! WARNING: this subroutine may still not work for hybrid parallelization
1194	! with OpenMP (for possible necessary changes see the original
1195	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1196	!------------------------------------------------------------------------------!
1197
1198	USE control_parameters
1199	USE cpulog
1200	USE grid_variables
1201	USE indices
1202	USE interfaces
1203	USE pegrid
1204	USE transpose_indices
1205
1206	IMPLICIT NONE
1207
1208	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1209
1210	REAL, DIMENSION(0:ny) :: work_ffty
1211	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1212	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: ar
1213	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1214
1215
1216	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1217
1218	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1219
1220	tn = 0 ! Default thread number in case of one thread
1221	!$OMP PARALLEL PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1222	!$OMP DO
1223	DO i = nxl_y, nxr_y
1224
1225	!$ tn = omp_get_thread_num()
1226
1227	IF ( host(1:3) == 'nec' ) THEN
1228	!
1229	!-- Code optimized for vector processors
1230	DO k = 1, nz
1231
1232	m = 0
1233	DO n = 1, pdims(2)
1234	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1235	work_triy(m,k) = ar(j,k,i,n)
1236	m = m + 1
1237	ENDDO
1238	ENDDO
1239
1240	ENDDO
1241
1242	CALL fft_y_m( work_triy, ny, 'forward' )
1243
1244	ELSE
1245	!
1246	!-- Cache optimized code
1247	DO k = 1, nz
1248
1249	m = 0
1250	DO n = 1, pdims(2)
1251	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1252	work_ffty(m) = ar(j,k,i,n)
1253	m = m + 1
1254	ENDDO
1255	ENDDO
1256
1257	CALL fft_y( work_ffty, 'forward' )
1258
1259	DO j = 0, ny
1260	work_triy(j,k) = work_ffty(j)
1261	ENDDO
1262
1263	ENDDO
1264
1265	ENDIF
1266
1267	!
1268	!-- Solve the linear equation system
1269	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1270
1271	IF ( host(1:3) == 'nec' ) THEN
1272	!
1273	!-- Code optimized for vector processors
1274	CALL fft_y_m( work_triy, ny, 'backward' )
1275
1276	DO k = 1, nz
1277
1278	m = 0
1279	DO n = 1, pdims(2)
1280	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1281	ar(j,k,i,n) = work_triy(m,k)
1282	m = m + 1
1283	ENDDO
1284	ENDDO
1285
1286	ENDDO
1287
1288	ELSE
1289	!
1290	!-- Cache optimized code
1291	DO k = 1, nz
1292
1293	DO j = 0, ny
1294	work_ffty(j) = work_triy(j,k)
1295	ENDDO
1296
1297	CALL fft_y( work_ffty, 'backward' )
1298
1299	m = 0
1300	DO n = 1, pdims(2)
1301	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1302	ar(j,k,i,n) = work_ffty(m)
1303	m = m + 1
1304	ENDDO
1305	ENDDO
1306
1307	ENDDO
1308
1309	ENDIF
1310
1311	ENDDO
1312	!$OMP END PARALLEL
1313
1314	DEALLOCATE( tri )
1315
1316	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1317
1318	END SUBROUTINE ffty_tri_ffty
1319
1320
1321	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1322
1323	!------------------------------------------------------------------------------!
1324	! Solves the linear system of equations for a 1d-decomposition along x (see
1325	! tridia)
1326	!
1327	! Attention: when using the intel compiler, array tri must be passed as an
1328	! argument to the contained subroutines. Otherwise addres faults
1329	! will occur.
1330	! On NEC, tri should not be passed (except for routine substi_1dd)
1331	! because this causes very bad performance.
1332	!------------------------------------------------------------------------------!
1333
1334	USE arrays_3d
1335	USE control_parameters
1336
1337	USE pegrid
1338
1339	IMPLICIT NONE
1340
1341	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1342
1343	REAL :: ddx2, ddy2
1344
1345	REAL, DIMENSION(0:nx,1:nz) :: ar
1346	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1347	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1348
1349
1350	nnyh = ( ny + 1 ) / 2
1351
1352	!
1353	!-- Define constant elements of the tridiagonal matrix.
1354	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1355	!-- the exchanged loops create bank conflicts. The following directive
1356	!-- prohibits loop exchange and the loops perform much better.
1357	! tn = omp_get_thread_num()
1358	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1359	! CALL FLUSH_( 120+tn )
1360	!CDIR NOLOOPCHG
1361	DO k = 0, nz-1
1362	DO i = 0,nx
1363	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
1364	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
1365	ENDDO
1366	ENDDO
1367	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1368	! CALL FLUSH_( 120+tn )
1369
1370	IF ( j <= nnyh ) THEN
1371	#if defined( __lcmuk )
1372	CALL maketri_1dd( j, tri )
1373	#else
1374	CALL maketri_1dd( j )
1375	#endif
1376	ELSE
1377	#if defined( __lcmuk )
1378	CALL maketri_1dd( ny+1-j, tri )
1379	#else
1380	CALL maketri_1dd( ny+1-j )
1381	#endif
1382	ENDIF
1383	#if defined( __lcmuk )
1384	CALL split_1dd( tri )
1385	#else
1386	CALL split_1dd
1387	#endif
1388	CALL substi_1dd( ar, tri )
1389
1390	CONTAINS
1391
1392	#if defined( __lcmuk )
1393	SUBROUTINE maketri_1dd( j, tri )
1394	#else
1395	SUBROUTINE maketri_1dd( j )
1396	#endif
1397
1398	!------------------------------------------------------------------------------!
1399	! computes the i- and j-dependent component of the matrix
1400	!------------------------------------------------------------------------------!
1401
1402	USE constants
1403
1404	IMPLICIT NONE
1405
1406	INTEGER :: i, j, k, nnxh
1407	REAL :: a, c
1408
1409	REAL, DIMENSION(0:nx) :: l
1410
1411	#if defined( __lcmuk )
1412	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1413	#endif
1414
1415
1416	nnxh = ( nx + 1 ) / 2
1417	!
1418	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1419	!-- Fourier space. The coefficients are computed following the method of
1420	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1421	!-- Siano's original version by discretizing the Poisson equation,
1422	!-- before it is Fourier-transformed
1423	DO i = 0, nx
1424	IF ( i >= 0 .AND. i < nnxh ) THEN
1425	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1426	FLOAT( nx+1 ) ) ) * ddx2 + &
1427	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1428	FLOAT( ny+1 ) ) ) * ddy2
1429	ELSEIF ( i == nnxh ) THEN
1430	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1431	FLOAT( nx+1 ) ) ) * ddx2 + &
1432	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1433	FLOAT( ny+1 ) ) ) * ddy2
1434	ELSE
1435	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1436	FLOAT( nx+1 ) ) ) * ddx2 + &
1437	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1438	FLOAT( ny+1 ) ) ) * ddy2
1439	ENDIF
1440	ENDDO
1441
1442	DO k = 0, nz-1
1443	DO i = 0, nx
1444	a = -1.0 * ddzu(k+2) * ddzw(k+1)
1445	c = -1.0 * ddzu(k+1) * ddzw(k+1)
1446	tri(1,i,k) = a + c - l(i)
1447	ENDDO
1448	ENDDO
1449	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1450	DO i = 0, nx
1451	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1452	ENDDO
1453	ENDIF
1454	IF ( ibc_p_t == 1 ) THEN
1455	DO i = 0, nx
1456	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1457	ENDDO
1458	ENDIF
1459
1460	END SUBROUTINE maketri_1dd
1461
1462
1463	#if defined( __lcmuk )
1464	SUBROUTINE split_1dd( tri )
1465	#else
1466	SUBROUTINE split_1dd
1467	#endif
1468
1469	!------------------------------------------------------------------------------!
1470	! Splitting of the tridiagonal matrix (Thomas algorithm)
1471	!------------------------------------------------------------------------------!
1472
1473	IMPLICIT NONE
1474
1475	INTEGER :: i, k
1476
1477	#if defined( __lcmuk )
1478	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1479	#endif
1480
1481
1482	!
1483	!-- Splitting
1484	DO i = 0, nx
1485	tri(4,i,0) = tri(1,i,0)
1486	ENDDO
1487	DO k = 1, nz-1
1488	DO i = 0, nx
1489	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1490	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1491	ENDDO
1492	ENDDO
1493
1494	END SUBROUTINE split_1dd
1495
1496
1497	SUBROUTINE substi_1dd( ar, tri )
1498
1499	!------------------------------------------------------------------------------!
1500	! Substitution (Forward and Backward) (Thomas algorithm)
1501	!------------------------------------------------------------------------------!
1502
1503	IMPLICIT NONE
1504
1505	INTEGER :: i, j, k
1506
1507	REAL, DIMENSION(0:nx,nz) :: ar
1508	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1509	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1510
1511	!
1512	!-- Forward substitution
1513	DO i = 0, nx
1514	ar1(i,0) = ar(i,1)
1515	ENDDO
1516	DO k = 1, nz-1
1517	DO i = 0, nx
1518	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1519	ENDDO
1520	ENDDO
1521
1522	!
1523	!-- Backward substitution
1524	DO i = 0, nx
1525	ar(i,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
1526	ENDDO
1527	DO k = nz-2, 0, -1
1528	DO i = 0, nx
1529	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1530	/ tri(4,i,k)
1531	ENDDO
1532	ENDDO
1533
1534	END SUBROUTINE substi_1dd
1535
1536	END SUBROUTINE tridia_1dd
1537
1538	#endif
1539
1540	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |