Home

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 65

Last change on this file since 65 was 4, checked in by raasch, 18 years ago
Id keyword set as property for all *.f90 files
Property svn:keywords set to `Id`
File size: 43.8 KB

Line
1	MODULE poisfft_mod
2
3	!------------------------------------------------------------------------------!
4	! Actual revisions:
5	! -----------------
6	!
7	!
8	! Former revisions:
9	! -----------------
10	! $Id: poisfft.f90 4 2007-02-13 11:33:16Z raasch $
11	! RCS Log replace by Id keyword, revision history cleaned up
12	!
13	! Revision 1.24 2006/08/04 15:00:24 raasch
14	! Default setting of the thread number tn in case of not using OpenMP
15	!
16	! Revision 1.23 2006/02/23 12:48:38 raasch
17	! Additional compiler directive in routine tridia_1dd for preventing loop
18	! exchange on NEC-SX6
19	!
20	! Revision 1.20 2004/04/30 12:38:09 raasch
21	! Parts of former poisfft_hybrid moved to this subroutine,
22	! former subroutine changed to a module, renaming of FFT-subroutines and
23	! -module, FFTs completely substituted by calls of fft_x and fft_y,
24	! NAG fft used in the non-parallel case completely removed, l in maketri
25	! is now a 1d-array, variables passed by modules instead of using parameter
26	! lists, enlarged transposition arrays introduced
27	!
28	! Revision 1.1 1997/07/24 11:24:14 raasch
29	! Initial revision
30	!
31	!
32	! Description:
33	! ------------
34	! See below.
35	!------------------------------------------------------------------------------!
36
37	!--------------------------------------------------------------------------!
38	! poisfft !
39	! !
40	! Original version: Stephan Siano (pois3d) !
41	! !
42	! Institute of Meteorology and Climatology, University of Hannover !
43	! Germany !
44	! !
45	! Version as of July 23,1996 !
46	! !
47	! !
48	! Version for parallel computers: Siegfried Raasch !
49	! !
50	! Version as of July 03,1997 !
51	! !
52	! Solves the Poisson equation with a 2D spectral method !
53	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
54	! !
55	! Input: !
56	! real ar contains in the (nnx,nny,nnz) elements, !
57	! starting from the element (1,nys,nxl), the !
58	! values for s !
59	! real work Temporary array !
60	! !
61	! Output: !
62	! real ar contains the solution for p !
63	!--------------------------------------------------------------------------!
64
65	USE fft_xy
66	USE indices
67	USE transpose_indices
68
69	IMPLICIT NONE
70
71	PRIVATE
72	PUBLIC poisfft, poisfft_init
73
74	INTERFACE poisfft
75	MODULE PROCEDURE poisfft
76	END INTERFACE poisfft
77
78	INTERFACE poisfft_init
79	MODULE PROCEDURE poisfft_init
80	END INTERFACE poisfft_init
81
82	CONTAINS
83
84	SUBROUTINE poisfft_init
85
86	CALL fft_init
87
88	END SUBROUTINE poisfft_init
89
90
91	SUBROUTINE poisfft( ar, work )
92
93	USE cpulog
94	USE interfaces
95	USE pegrid
96
97	IMPLICIT NONE
98
99	REAL, DIMENSION(1:nza,nys:nyna,nxl:nxra) :: ar, work
100
101
102	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
103
104	!
105	!-- Two-dimensional Fourier Transformation in x- and y-direction.
106	#if defined( __parallel )
107	IF ( pdims(2) == 1 ) THEN
108
109	!
110	!-- 1d-domain-decomposition along x:
111	!-- FFT along y and transposition y --> x
112	CALL ffty_tr_yx( ar, work, ar )
113
114	!
115	!-- FFT along x, solving the tridiagonal system and backward FFT
116	CALL fftx_tri_fftx( ar )
117
118	!
119	!-- Transposition x --> y and backward FFT along y
120	CALL tr_xy_ffty( ar, work, ar )
121
122	ELSEIF ( pdims(1) == 1 ) THEN
123
124	!
125	!-- 1d-domain-decomposition along y:
126	!-- FFT along x and transposition x --> y
127	CALL fftx_tr_xy( ar, work, ar )
128
129	!
130	!-- FFT along y, solving the tridiagonal system and backward FFT
131	CALL ffty_tri_ffty( ar )
132
133	!
134	!-- Transposition y --> x and backward FFT along x
135	CALL tr_yx_fftx( ar, work, ar )
136
137	ELSE
138
139	!
140	!-- 2d-domain-decomposition
141	!-- Transposition z --> x
142	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
143	CALL transpose_zx( ar, work, ar, work, ar )
144	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
145
146	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
147	CALL fftxp( ar, 'forward' )
148	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
149
150	!
151	!-- Transposition x --> y
152	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
153	CALL transpose_xy( ar, work, ar, work, ar )
154	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
155
156	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
157	CALL fftyp( ar, 'forward' )
158	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
159
160	!
161	!-- Transposition y --> z
162	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
163	CALL transpose_yz( ar, work, ar, work, ar )
164	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
165
166	!
167	!-- Solve the Poisson equation in z-direction in cartesian space.
168	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
169	CALL tridia( ar )
170	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
171
172	!
173	!-- Inverse Fourier Transformation
174	!-- Transposition z --> y
175	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
176	CALL transpose_zy( ar, work, ar, work, ar )
177	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
178
179	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
180	CALL fftyp( ar, 'backward' )
181	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
182
183	!
184	!-- Transposition y --> x
185	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
186	CALL transpose_yx( ar, work, ar, work, ar )
187	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
188
189	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
190	CALL fftxp( ar, 'backward' )
191	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
192
193	!
194	!-- Transposition x --> z
195	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
196	CALL transpose_xz( ar, work, ar, work, ar )
197	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
198
199	ENDIF
200
201	#else
202
203	!
204	!-- Two-dimensional Fourier Transformation along x- and y-direction.
205	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
206	CALL fftx( ar, 'forward' )
207	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
208	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
209	CALL ffty( ar, 'forward' )
210	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
211
212	!
213	!-- Solve the Poisson equation in z-direction in cartesian space.
214	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
215	CALL tridia( ar )
216	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
217
218	!
219	!-- Inverse Fourier Transformation.
220	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
221	CALL ffty( ar, 'backward' )
222	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
223	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
224	CALL fftx( ar, 'backward' )
225	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
226
227	#endif
228
229	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
230
231	END SUBROUTINE poisfft
232
233
234
235	SUBROUTINE tridia( ar )
236
237	!------------------------------------------------------------------------------!
238	! solves the linear system of equations:
239	!
240	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
241	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
242	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
243	!
244	! by using the Thomas algorithm
245	!------------------------------------------------------------------------------!
246
247	USE arrays_3d
248
249	IMPLICIT NONE
250
251	INTEGER :: i, j, k, nnyh
252
253	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
254	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
255
256	#if defined( __parallel )
257	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
258	#else
259	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
260	#endif
261
262
263	nnyh = (ny+1) / 2
264
265	!
266	!-- Define constant elements of the tridiagonal matrix.
267	DO k = 0, nz-1
268	DO i = nxl_z, nxr_z
269	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
270	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
271	ENDDO
272	ENDDO
273
274	#if defined( __parallel )
275	!
276	!-- Repeat for all y-levels.
277	DO j = nys_z, nyn_z
278	IF ( j <= nnyh ) THEN
279	CALL maketri( tri, j )
280	ELSE
281	CALL maketri( tri, ny+1-j )
282	ENDIF
283	CALL split( tri )
284	CALL substi( ar, ar1, tri, j )
285	ENDDO
286	#else
287	!
288	!-- First y-level.
289	CALL maketri( tri, nys_z )
290	CALL split( tri )
291	CALL substi( ar, ar1, tri, 0 )
292
293	!
294	!-- Further y-levels.
295	DO j = 1, nnyh - 1
296	CALL maketri( tri, j )
297	CALL split( tri )
298	CALL substi( ar, ar1, tri, j )
299	CALL substi( ar, ar1, tri, ny+1-j )
300	ENDDO
301	CALL maketri( tri, nnyh )
302	CALL split( tri )
303	CALL substi( ar, ar1, tri, nnyh+nys )
304	#endif
305
306	CONTAINS
307
308	SUBROUTINE maketri( tri, j )
309
310	!------------------------------------------------------------------------------!
311	! Computes the i- and j-dependent component of the matrix
312	!------------------------------------------------------------------------------!
313
314	USE arrays_3d
315	USE constants
316	USE control_parameters
317	USE grid_variables
318
319	IMPLICIT NONE
320
321	INTEGER :: i, j, k, nnxh
322	REAL :: a, c
323	REAL :: ll(nxl_z:nxr_z)
324	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
325
326
327	nnxh = ( nx + 1 ) / 2
328
329	!
330	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
331	!-- Fourier space. The coefficients are computed following the method of
332	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
333	!-- Siano's original version by discretizing the Poisson equation,
334	!-- before it is Fourier-transformed
335	#if defined( __parallel )
336	DO i = nxl_z, nxr_z
337	IF ( i >= 0 .AND. i < nnxh ) THEN
338	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
339	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
340	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
341	FLOAT( ny+1 ) ) ) / ( dy * dy )
342	ELSEIF ( i == nnxh ) THEN
343	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
344	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
345	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
346	FLOAT( ny+1 ) ) ) / ( dy * dy )
347	ELSE
348	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
349	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
350	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
351	FLOAT( ny+1 ) ) ) / ( dy * dy )
352	ENDIF
353	DO k = 0,nz-1
354	a = -1.0 * ddzu(k+2) * ddzw(k+1)
355	c = -1.0 * ddzu(k+1) * ddzw(k+1)
356	tri(1,i,k) = a + c - ll(i)
357	ENDDO
358	ENDDO
359	#else
360	DO i = 0, nnxh
361	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / FLOAT( nx+1 ) ) ) / &
362	( dx * dx ) + &
363	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / FLOAT( ny+1 ) ) ) / &
364	( dy * dy )
365	DO k = 0, nz-1
366	a = -1.0 * ddzu(k+2) * ddzw(k+1)
367	c = -1.0 * ddzu(k+1) * ddzw(k+1)
368	tri(1,i,k) = a + c - ll(i)
369	IF ( i >= 1 .and. i < nnxh ) THEN
370	tri(1,nx+1-i,k) = tri(1,i,k)
371	ENDIF
372	ENDDO
373	ENDDO
374	#endif
375	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
376	DO i = nxl_z, nxr_z
377	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
378	ENDDO
379	ENDIF
380	IF ( ibc_p_t == 1 ) THEN
381	DO i = nxl_z, nxr_z
382	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
383	ENDDO
384	ENDIF
385
386	END SUBROUTINE maketri
387
388
389	SUBROUTINE substi( ar, ar1, tri, j )
390
391	!------------------------------------------------------------------------------!
392	! Substitution (Forward and Backward) (Thomas algorithm)
393	!------------------------------------------------------------------------------!
394
395	IMPLICIT NONE
396
397	INTEGER :: i, j, k
398	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
399	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
400	#if defined( __parallel )
401	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
402	#else
403	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
404	#endif
405
406	!
407	!-- Forward substitution.
408	DO i = nxl_z, nxr_z
409	#if defined( __parallel )
410	ar1(i,0) = ar(i,j,1)
411	#else
412	ar1(i,0) = ar(1,j,i)
413	#endif
414	ENDDO
415	DO k = 1, nz - 1
416	DO i = nxl_z, nxr_z
417	#if defined( __parallel )
418	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
419	#else
420	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
421	#endif
422	ENDDO
423	ENDDO
424
425	!
426	!-- Backward substitution.
427	DO i = nxl_z, nxr_z
428	#if defined( __parallel )
429	ar(i,j,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
430	#else
431	ar(nz,j,i) = ar1(i,nz-1) / tri(4,i,nz-1)
432	#endif
433	ENDDO
434	DO k = nz-2, 0, -1
435	DO i = nxl_z, nxr_z
436	#if defined( __parallel )
437	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
438	/ tri(4,i,k)
439	#else
440	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
441	/ tri(4,i,k)
442	#endif
443	ENDDO
444	ENDDO
445
446	END SUBROUTINE substi
447
448
449	SUBROUTINE split( tri )
450
451	!------------------------------------------------------------------------------!
452	! Splitting of the tridiagonal matrix (Thomas algorithm)
453	!------------------------------------------------------------------------------!
454
455	IMPLICIT NONE
456
457	INTEGER :: i, k
458	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
459
460	!
461	!-- Splitting.
462	DO i = nxl_z, nxr_z
463	tri(4,i,0) = tri(1,i,0)
464	ENDDO
465	DO k = 1, nz-1
466	DO i = nxl_z, nxr_z
467	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
468	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
469	ENDDO
470	ENDDO
471
472	END SUBROUTINE split
473
474	END SUBROUTINE tridia
475
476
477	#if defined( __parallel )
478	SUBROUTINE fftxp( ar, direction )
479
480	!------------------------------------------------------------------------------!
481	! Fourier-transformation along x-direction Parallelized version
482	!------------------------------------------------------------------------------!
483
484	IMPLICIT NONE
485
486	CHARACTER (LEN=*) :: direction
487	INTEGER :: j, k
488	REAL :: ar(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa)
489
490	!
491	!-- Performing the fft with one of the methods implemented
492	DO k = nzb_x, nzt_x
493	DO j = nys_x, nyn_x
494	CALL fft_x( ar(0:nx,j,k), direction )
495	ENDDO
496	ENDDO
497
498	END SUBROUTINE fftxp
499
500	#else
501	SUBROUTINE fftx( ar, direction )
502
503	!------------------------------------------------------------------------------!
504	! Fourier-transformation along x-direction Non parallel version
505	!------------------------------------------------------------------------------!
506
507	IMPLICIT NONE
508
509	CHARACTER (LEN=*) :: direction
510	INTEGER :: i, j, k
511	REAL :: ar(1:nz,0:ny,0:nx)
512
513	!
514	!-- Performing the fft with one of the methods implemented
515	DO k = 1, nz
516	DO j = 0, ny
517	CALL fft_x( ar(k,j,0:nx), direction )
518	ENDDO
519	ENDDO
520
521	END SUBROUTINE fftx
522	#endif
523
524
525	#if defined( __parallel )
526	SUBROUTINE fftyp( ar, direction )
527
528	!------------------------------------------------------------------------------!
529	! Fourier-transformation along y-direction Parallelized version
530	!------------------------------------------------------------------------------!
531
532	IMPLICIT NONE
533
534	CHARACTER (LEN=*) :: direction
535	INTEGER :: i, k
536	REAL :: ar(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya)
537
538	!
539	!-- Performing the fft with one of the methods implemented
540	DO k = nzb_y, nzt_y
541	DO i = nxl_y, nxr_y
542	CALL fft_y( ar(0:ny,i,k), direction )
543	ENDDO
544	ENDDO
545
546	END SUBROUTINE fftyp
547
548	#else
549	SUBROUTINE ffty( ar, direction )
550
551	!------------------------------------------------------------------------------!
552	! Fourier-transformation along y-direction Non parallel version
553	!------------------------------------------------------------------------------!
554
555	IMPLICIT NONE
556
557	CHARACTER (LEN=*) :: direction
558	INTEGER :: i, k
559	REAL :: ar(1:nz,0:ny,0:nx)
560
561	!
562	!-- Performing the fft with one of the methods implemented
563	DO k = 1, nz
564	DO i = 0, nx
565	CALL fft_y( ar(k,0:ny,i), direction )
566	ENDDO
567	ENDDO
568
569	END SUBROUTINE ffty
570	#endif
571
572	#if defined( __parallel )
573	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
574
575	!------------------------------------------------------------------------------!
576	! Fourier-transformation along y with subsequent transposition y --> x for
577	! a 1d-decomposition along x
578	!
579	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
580	! if the first index of work_ffty_vec is odd. Otherwise
581	! memory bank conflicts may occur (especially if the index is a
582	! multiple of 128). That's why work_ffty_vec is dimensioned as
583	! 0:ny+1.
584	! Of course, this will not work if users are using an odd number
585	! of gridpoints along y.
586	!------------------------------------------------------------------------------!
587
588	USE control_parameters
589	USE cpulog
590	USE indices
591	USE interfaces
592	USE pegrid
593	USE transpose_indices
594
595	IMPLICIT NONE
596
597	INTEGER :: i, iend, iouter, ir, j, k
598	INTEGER, PARAMETER :: stridex = 4
599
600	REAL, DIMENSION(0:ny,stridex) :: work_ffty
601	#if defined( __nec )
602	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
603	#endif
604	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_in
605	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_out
606	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
607
608	!
609	!-- Carry out the FFT along y, where all data are present due to the
610	!-- 1d-decomposition along x. Resort the data in a way that x becomes
611	!-- the first index.
612	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
613
614	IF ( host(1:3) == 'nec' ) THEN
615	#if defined( __nec )
616	!
617	!-- Code optimized for vector processors
618	!$OMP PARALLEL PRIVATE ( i, j, k, work_ffty_vec )
619	!$OMP DO
620	DO i = nxl, nxr
621
622	DO j = 0, ny
623	DO k = 1, nz
624	work_ffty_vec(j,k,i) = f_in(k,j,i)
625	ENDDO
626	ENDDO
627
628	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
629
630	ENDDO
631
632	!$OMP DO
633	DO k = 1, nz
634	DO j = 0, ny
635	DO i = nxl, nxr
636	work(i,k,j) = work_ffty_vec(j,k,i)
637	ENDDO
638	ENDDO
639	ENDDO
640	!$OMP END PARALLEL
641	#endif
642
643	ELSE
644
645	!
646	!-- Cache optimized code.
647	!-- The i-(x-)direction is split into a strided outer loop and an inner
648	!-- loop for better cache performance
649	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
650	!$OMP DO
651	DO iouter = nxl, nxr, stridex
652
653	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
654
655	DO k = 1, nz
656
657	DO i = iouter, iend
658
659	ir = i-iouter+1 ! counter within a stride
660	DO j = 0, ny
661	work_ffty(j,ir) = f_in(k,j,i)
662	ENDDO
663	!
664	!-- FFT along y
665	CALL fft_y( work_ffty(:,ir), 'forward' )
666
667	ENDDO
668
669	!
670	!-- Resort
671	DO j = 0, ny
672	DO i = iouter, iend
673	work(i,k,j) = work_ffty(j,i-iouter+1)
674	ENDDO
675	ENDDO
676
677	ENDDO
678
679	ENDDO
680	!$OMP END PARALLEL
681
682	ENDIF
683	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
684
685	!
686	!-- Transpose array
687	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
688	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
689	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
690	comm1dx, ierr )
691	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
692
693	END SUBROUTINE ffty_tr_yx
694
695
696	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
697
698	!------------------------------------------------------------------------------!
699	! Transposition x --> y with a subsequent backward Fourier transformation for
700	! a 1d-decomposition along x
701	!------------------------------------------------------------------------------!
702
703	USE control_parameters
704	USE cpulog
705	USE indices
706	USE interfaces
707	USE pegrid
708	USE transpose_indices
709
710	IMPLICIT NONE
711
712	INTEGER :: i, iend, iouter, ir, j, k
713	INTEGER, PARAMETER :: stridex = 4
714
715	REAL, DIMENSION(0:ny,stridex) :: work_ffty
716	#if defined( __nec )
717	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
718	#endif
719	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_in
720	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_out
721	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
722
723	!
724	!-- Transpose array
725	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
726	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
727	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
728	comm1dx, ierr )
729	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
730
731	!
732	!-- Resort the data in a way that y becomes the first index and carry out the
733	!-- backward fft along y.
734	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
735
736	IF ( host(1:3) == 'nec' ) THEN
737	#if defined( __nec )
738	!
739	!-- Code optimized for vector processors
740	!$OMP PARALLEL PRIVATE ( i, j, k, work_ffty_vec )
741	!$OMP DO
742	DO k = 1, nz
743	DO j = 0, ny
744	DO i = nxl, nxr
745	work_ffty_vec(j,k,i) = work(i,k,j)
746	ENDDO
747	ENDDO
748	ENDDO
749
750	!$OMP DO
751	DO i = nxl, nxr
752
753	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
754
755	DO j = 0, ny
756	DO k = 1, nz
757	f_out(k,j,i) = work_ffty_vec(j,k,i)
758	ENDDO
759	ENDDO
760
761	ENDDO
762	!$OMP END PARALLEL
763	#endif
764
765	ELSE
766
767	!
768	!-- Cache optimized code.
769	!-- The i-(x-)direction is split into a strided outer loop and an inner
770	!-- loop for better cache performance
771	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
772	!$OMP DO
773	DO iouter = nxl, nxr, stridex
774
775	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
776
777	DO k = 1, nz
778	!
779	!-- Resort
780	DO j = 0, ny
781	DO i = iouter, iend
782	work_ffty(j,i-iouter+1) = work(i,k,j)
783	ENDDO
784	ENDDO
785
786	DO i = iouter, iend
787
788	!
789	!-- FFT along y
790	ir = i-iouter+1 ! counter within a stride
791	CALL fft_y( work_ffty(:,ir), 'backward' )
792
793	DO j = 0, ny
794	f_out(k,j,i) = work_ffty(j,ir)
795	ENDDO
796	ENDDO
797
798	ENDDO
799
800	ENDDO
801	!$OMP END PARALLEL
802
803	ENDIF
804
805	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
806
807	END SUBROUTINE tr_xy_ffty
808
809
810	SUBROUTINE fftx_tri_fftx( ar )
811
812	!------------------------------------------------------------------------------!
813	! FFT along x, solution of the tridiagonal system and backward FFT for
814	! a 1d-decomposition along x
815	!
816	! WARNING: this subroutine may still not work for hybrid parallelization
817	! with OpenMP (for possible necessary changes see the original
818	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
819	!------------------------------------------------------------------------------!
820
821	USE control_parameters
822	USE cpulog
823	USE grid_variables
824	USE indices
825	USE interfaces
826	USE pegrid
827	USE transpose_indices
828
829	IMPLICIT NONE
830
831	character(len=3) :: myth_char
832
833	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
834
835	REAL, DIMENSION(0:nx) :: work_fftx
836	REAL, DIMENSION(0:nx,1:nz) :: work_trix
837	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: ar
838	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
839
840
841	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
842
843	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
844
845	tn = 0 ! Default thread number in case of one thread
846	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
847	DO j = nys_x, nyn_x
848
849	!$ tn = omp_get_thread_num()
850
851	IF ( host(1:3) == 'nec' ) THEN
852	!
853	!-- Code optimized for vector processors
854	DO k = 1, nz
855
856	m = 0
857	DO n = 1, pdims(1)
858	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
859	work_trix(m,k) = ar(i,k,j,n)
860	m = m + 1
861	ENDDO
862	ENDDO
863
864	ENDDO
865
866	CALL fft_x_m( work_trix, 'forward' )
867
868	ELSE
869	!
870	!-- Cache optimized code
871	DO k = 1, nz
872
873	m = 0
874	DO n = 1, pdims(1)
875	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
876	work_fftx(m) = ar(i,k,j,n)
877	m = m + 1
878	ENDDO
879	ENDDO
880
881	CALL fft_x( work_fftx, 'forward' )
882
883	DO i = 0, nx
884	work_trix(i,k) = work_fftx(i)
885	ENDDO
886
887	ENDDO
888
889	ENDIF
890
891	!
892	!-- Solve the linear equation system
893	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
894
895	IF ( host(1:3) == 'nec' ) THEN
896	!
897	!-- Code optimized for vector processors
898	CALL fft_x_m( work_trix, 'backward' )
899
900	DO k = 1, nz
901
902	m = 0
903	DO n = 1, pdims(1)
904	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
905	ar(i,k,j,n) = work_trix(m,k)
906	m = m + 1
907	ENDDO
908	ENDDO
909
910	ENDDO
911
912	ELSE
913	!
914	!-- Cache optimized code
915	DO k = 1, nz
916
917	DO i = 0, nx
918	work_fftx(i) = work_trix(i,k)
919	ENDDO
920
921	CALL fft_x( work_fftx, 'backward' )
922
923	m = 0
924	DO n = 1, pdims(1)
925	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
926	ar(i,k,j,n) = work_fftx(m)
927	m = m + 1
928	ENDDO
929	ENDDO
930
931	ENDDO
932
933	ENDIF
934
935	ENDDO
936
937	DEALLOCATE( tri )
938
939	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
940
941	END SUBROUTINE fftx_tri_fftx
942
943
944	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
945
946	!------------------------------------------------------------------------------!
947	! Fourier-transformation along x with subsequent transposition x --> y for
948	! a 1d-decomposition along y
949	!
950	! ATTENTION: The NEC-branch of this routine may significantly profit from
951	! further optimizations. So far, performance is much worse than
952	! for routine ffty_tr_yx (more than three times slower).
953	!------------------------------------------------------------------------------!
954
955	USE control_parameters
956	USE cpulog
957	USE indices
958	USE interfaces
959	USE pegrid
960	USE transpose_indices
961
962	IMPLICIT NONE
963
964	INTEGER :: i, j, k
965
966	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
967	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_in
968	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_out
969	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
970
971	!
972	!-- Carry out the FFT along x, where all data are present due to the
973	!-- 1d-decomposition along y. Resort the data in a way that y becomes
974	!-- the first index.
975	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
976
977	IF ( host(1:3) == 'nec' ) THEN
978	!
979	!-- Code for vector processors
980	!$OMP PARALLEL PRIVATE ( i, j, k, work_fftx )
981	!$OMP DO
982	DO i = 0, nx
983
984	DO j = nys, nyn
985	DO k = 1, nz
986	work_fftx(i,k,j) = f_in(k,j,i)
987	ENDDO
988	ENDDO
989
990	ENDDO
991
992	!$OMP DO
993	DO j = nys, nyn
994
995	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
996
997	DO k = 1, nz
998	DO i = 0, nx
999	work(j,k,i) = work_fftx(i,k,j)
1000	ENDDO
1001	ENDDO
1002
1003	ENDDO
1004	!$OMP END PARALLEL
1005
1006	ELSE
1007
1008	!
1009	!-- Cache optimized code (there might be still a potential for better
1010	!-- optimization).
1011	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1012	!$OMP DO
1013	DO i = 0, nx
1014
1015	DO j = nys, nyn
1016	DO k = 1, nz
1017	work_fftx(i,k,j) = f_in(k,j,i)
1018	ENDDO
1019	ENDDO
1020
1021	ENDDO
1022
1023	!$OMP DO
1024	DO j = nys, nyn
1025	DO k = 1, nz
1026
1027	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1028
1029	DO i = 0, nx
1030	work(j,k,i) = work_fftx(i,k,j)
1031	ENDDO
1032	ENDDO
1033
1034	ENDDO
1035	!$OMP END PARALLEL
1036
1037	ENDIF
1038	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1039
1040	!
1041	!-- Transpose array
1042	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1043	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1044	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1045	comm1dy, ierr )
1046	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1047
1048	END SUBROUTINE fftx_tr_xy
1049
1050
1051	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1052
1053	!------------------------------------------------------------------------------!
1054	! Transposition y --> x with a subsequent backward Fourier transformation for
1055	! a 1d-decomposition along x
1056	!------------------------------------------------------------------------------!
1057
1058	USE control_parameters
1059	USE cpulog
1060	USE indices
1061	USE interfaces
1062	USE pegrid
1063	USE transpose_indices
1064
1065	IMPLICIT NONE
1066
1067	INTEGER :: i, j, k
1068
1069	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1070	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_in
1071	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_out
1072	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1073
1074	!
1075	!-- Transpose array
1076	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1077	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1078	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1079	comm1dy, ierr )
1080	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1081
1082	!
1083	!-- Carry out the FFT along x, where all data are present due to the
1084	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1085	!-- the first index.
1086	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1087
1088	IF ( host(1:3) == 'nec' ) THEN
1089	!
1090	!-- Code optimized for vector processors
1091	!$OMP PARALLEL PRIVATE ( i, j, k, work_fftx )
1092	!$OMP DO
1093	DO j = nys, nyn
1094
1095	DO k = 1, nz
1096	DO i = 0, nx
1097	work_fftx(i,k,j) = work(j,k,i)
1098	ENDDO
1099	ENDDO
1100
1101	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1102
1103	ENDDO
1104
1105	!$OMP DO
1106	DO i = 0, nx
1107	DO j = nys, nyn
1108	DO k = 1, nz
1109	f_out(k,j,i) = work_fftx(i,k,j)
1110	ENDDO
1111	ENDDO
1112	ENDDO
1113	!$OMP END PARALLEL
1114
1115	ELSE
1116
1117	!
1118	!-- Cache optimized code (there might be still a potential for better
1119	!-- optimization).
1120	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1121	!$OMP DO
1122	DO j = nys, nyn
1123	DO k = 1, nz
1124
1125	DO i = 0, nx
1126	work_fftx(i,k,j) = work(j,k,i)
1127	ENDDO
1128
1129	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1130
1131	ENDDO
1132	ENDDO
1133
1134	!$OMP DO
1135	DO i = 0, nx
1136	DO j = nys, nyn
1137	DO k = 1, nz
1138	f_out(k,j,i) = work_fftx(i,k,j)
1139	ENDDO
1140	ENDDO
1141	ENDDO
1142	!$OMP END PARALLEL
1143
1144	ENDIF
1145	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1146
1147	END SUBROUTINE tr_yx_fftx
1148
1149
1150	SUBROUTINE ffty_tri_ffty( ar )
1151
1152	!------------------------------------------------------------------------------!
1153	! FFT along y, solution of the tridiagonal system and backward FFT for
1154	! a 1d-decomposition along y
1155	!
1156	! WARNING: this subroutine may still not work for hybrid parallelization
1157	! with OpenMP (for possible necessary changes see the original
1158	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1159	!------------------------------------------------------------------------------!
1160
1161	USE control_parameters
1162	USE cpulog
1163	USE grid_variables
1164	USE indices
1165	USE interfaces
1166	USE pegrid
1167	USE transpose_indices
1168
1169	IMPLICIT NONE
1170
1171	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1172
1173	REAL, DIMENSION(0:ny) :: work_ffty
1174	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1175	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: ar
1176	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1177
1178
1179	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1180
1181	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1182
1183	tn = 0 ! Default thread number in case of one thread
1184	!$OMP PARALLEL PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1185	!$OMP DO
1186	DO i = nxl_y, nxr_y
1187
1188	!$ tn = omp_get_thread_num()
1189
1190	IF ( host(1:3) == 'nec' ) THEN
1191	!
1192	!-- Code optimized for vector processors
1193	DO k = 1, nz
1194
1195	m = 0
1196	DO n = 1, pdims(2)
1197	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1198	work_triy(m,k) = ar(j,k,i,n)
1199	m = m + 1
1200	ENDDO
1201	ENDDO
1202
1203	ENDDO
1204
1205	CALL fft_y_m( work_triy, ny, 'forward' )
1206
1207	ELSE
1208	!
1209	!-- Cache optimized code
1210	DO k = 1, nz
1211
1212	m = 0
1213	DO n = 1, pdims(2)
1214	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1215	work_ffty(m) = ar(j,k,i,n)
1216	m = m + 1
1217	ENDDO
1218	ENDDO
1219
1220	CALL fft_y( work_ffty, 'forward' )
1221
1222	DO j = 0, ny
1223	work_triy(j,k) = work_ffty(j)
1224	ENDDO
1225
1226	ENDDO
1227
1228	ENDIF
1229
1230	!
1231	!-- Solve the linear equation system
1232	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1233
1234	IF ( host(1:3) == 'nec' ) THEN
1235	!
1236	!-- Code optimized for vector processors
1237	CALL fft_y_m( work_triy, ny, 'backward' )
1238
1239	DO k = 1, nz
1240
1241	m = 0
1242	DO n = 1, pdims(2)
1243	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1244	ar(j,k,i,n) = work_triy(m,k)
1245	m = m + 1
1246	ENDDO
1247	ENDDO
1248
1249	ENDDO
1250
1251	ELSE
1252	!
1253	!-- Cache optimized code
1254	DO k = 1, nz
1255
1256	DO j = 0, ny
1257	work_ffty(j) = work_triy(j,k)
1258	ENDDO
1259
1260	CALL fft_y( work_ffty, 'backward' )
1261
1262	m = 0
1263	DO n = 1, pdims(2)
1264	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1265	ar(j,k,i,n) = work_ffty(m)
1266	m = m + 1
1267	ENDDO
1268	ENDDO
1269
1270	ENDDO
1271
1272	ENDIF
1273
1274	ENDDO
1275	!$OMP END PARALLEL
1276
1277	DEALLOCATE( tri )
1278
1279	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1280
1281	END SUBROUTINE ffty_tri_ffty
1282
1283
1284	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1285
1286	!------------------------------------------------------------------------------!
1287	! Solves the linear system of equations for a 1d-decomposition along x (see
1288	! tridia)
1289	!
1290	! Attention: when using the intel compiler, array tri must be passed as an
1291	! argument to the contained subroutines. Otherwise addres faults
1292	! will occur.
1293	! On NEC, tri should not be passed (except for routine substi_1dd)
1294	! because this causes very bad performance.
1295	!------------------------------------------------------------------------------!
1296
1297	USE arrays_3d
1298	USE control_parameters
1299
1300	USE pegrid
1301
1302	IMPLICIT NONE
1303
1304	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1305
1306	REAL :: ddx2, ddy2
1307
1308	REAL, DIMENSION(0:nx,1:nz) :: ar
1309	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1310	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1311
1312
1313	nnyh = ( ny + 1 ) / 2
1314
1315	!
1316	!-- Define constant elements of the tridiagonal matrix.
1317	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1318	!-- the exchanged loops create bank conflicts. The following directive
1319	!-- prohibits loop exchange and the loops perform much better.
1320	! tn = omp_get_thread_num()
1321	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1322	! CALL FLUSH_( 120+tn )
1323	!CDIR NOLOOPCHG
1324	DO k = 0, nz-1
1325	DO i = 0,nx
1326	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
1327	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
1328	ENDDO
1329	ENDDO
1330	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1331	! CALL FLUSH_( 120+tn )
1332
1333	IF ( j <= nnyh ) THEN
1334	#if defined( __lcmuk )
1335	CALL maketri_1dd( j, tri )
1336	#else
1337	CALL maketri_1dd( j )
1338	#endif
1339	ELSE
1340	#if defined( __lcmuk )
1341	CALL maketri_1dd( ny+1-j, tri )
1342	#else
1343	CALL maketri_1dd( ny+1-j )
1344	#endif
1345	ENDIF
1346	#if defined( __lcmuk )
1347	CALL split_1dd( tri )
1348	#else
1349	CALL split_1dd
1350	#endif
1351	CALL substi_1dd( ar, tri )
1352
1353	CONTAINS
1354
1355	#if defined( __lcmuk )
1356	SUBROUTINE maketri_1dd( j, tri )
1357	#else
1358	SUBROUTINE maketri_1dd( j )
1359	#endif
1360
1361	!------------------------------------------------------------------------------!
1362	! computes the i- and j-dependent component of the matrix
1363	!------------------------------------------------------------------------------!
1364
1365	USE constants
1366
1367	IMPLICIT NONE
1368
1369	INTEGER :: i, j, k, nnxh
1370	REAL :: a, c
1371
1372	REAL, DIMENSION(0:nx) :: l
1373
1374	#if defined( __lcmuk )
1375	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1376	#endif
1377
1378
1379	nnxh = ( nx + 1 ) / 2
1380	!
1381	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1382	!-- Fourier space. The coefficients are computed following the method of
1383	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1384	!-- Siano's original version by discretizing the Poisson equation,
1385	!-- before it is Fourier-transformed
1386	DO i = 0, nx
1387	IF ( i >= 0 .AND. i < nnxh ) THEN
1388	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1389	FLOAT( nx+1 ) ) ) * ddx2 + &
1390	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1391	FLOAT( ny+1 ) ) ) * ddy2
1392	ELSEIF ( i == nnxh ) THEN
1393	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1394	FLOAT( nx+1 ) ) ) * ddx2 + &
1395	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1396	FLOAT( ny+1 ) ) ) * ddy2
1397	ELSE
1398	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1399	FLOAT( nx+1 ) ) ) * ddx2 + &
1400	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1401	FLOAT( ny+1 ) ) ) * ddy2
1402	ENDIF
1403	ENDDO
1404
1405	DO k = 0, nz-1
1406	DO i = 0, nx
1407	a = -1.0 * ddzu(k+2) * ddzw(k+1)
1408	c = -1.0 * ddzu(k+1) * ddzw(k+1)
1409	tri(1,i,k) = a + c - l(i)
1410	ENDDO
1411	ENDDO
1412	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1413	DO i = 0, nx
1414	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1415	ENDDO
1416	ENDIF
1417	IF ( ibc_p_t == 1 ) THEN
1418	DO i = 0, nx
1419	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1420	ENDDO
1421	ENDIF
1422
1423	END SUBROUTINE maketri_1dd
1424
1425
1426	#if defined( __lcmuk )
1427	SUBROUTINE split_1dd( tri )
1428	#else
1429	SUBROUTINE split_1dd
1430	#endif
1431
1432	!------------------------------------------------------------------------------!
1433	! Splitting of the tridiagonal matrix (Thomas algorithm)
1434	!------------------------------------------------------------------------------!
1435
1436	IMPLICIT NONE
1437
1438	INTEGER :: i, k
1439
1440	#if defined( __lcmuk )
1441	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1442	#endif
1443
1444
1445	!
1446	!-- Splitting
1447	DO i = 0, nx
1448	tri(4,i,0) = tri(1,i,0)
1449	ENDDO
1450	DO k = 1, nz-1
1451	DO i = 0, nx
1452	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1453	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1454	ENDDO
1455	ENDDO
1456
1457	END SUBROUTINE split_1dd
1458
1459
1460	SUBROUTINE substi_1dd( ar, tri )
1461
1462	!------------------------------------------------------------------------------!
1463	! Substitution (Forward and Backward) (Thomas algorithm)
1464	!------------------------------------------------------------------------------!
1465
1466	IMPLICIT NONE
1467
1468	INTEGER :: i, j, k
1469
1470	REAL, DIMENSION(0:nx,nz) :: ar
1471	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1472	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1473
1474	!
1475	!-- Forward substitution
1476	DO i = 0, nx
1477	ar1(i,0) = ar(i,1)
1478	ENDDO
1479	DO k = 1, nz-1
1480	DO i = 0, nx
1481	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1482	ENDDO
1483	ENDDO
1484
1485	!
1486	!-- Backward substitution
1487	DO i = 0, nx
1488	ar(i,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
1489	ENDDO
1490	DO k = nz-2, 0, -1
1491	DO i = 0, nx
1492	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1493	/ tri(4,i,k)
1494	ENDDO
1495	ENDDO
1496
1497	END SUBROUTINE substi_1dd
1498
1499	END SUBROUTINE tridia_1dd
1500
1501	#endif
1502
1503	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |