Home

Context Navigation

poisfft.f90 @ 92

Last change on this file since 92 was 90, checked in by raasch, 17 years ago

New:
---
Calculation and output of user-defined profiles. New &userpar parameters data_output_pr_user and max_pr_user.

check_parameters, flow_statistics, modules, parin, read_var_list, user_interface, write_var_list

Changed:

Division through dt_3d replaced by multiplication of the inverse. For performance optimisation, this is done in the loop calculating the divergence instead of using a seperate loop. (pres.f90) var_hom and var_sum renamed pr_palm.

data_output_profiles, flow_statistics, init_3d_model, modules, parin, pres, read_var_list, run_control, time_integration

Errors:

Bugfix: work_fft*_vec removed from some PRIVATE-declarations (poisfft).

Bugfix: field_chr renamed field_char (user_interface).

Bugfix: output of use_upstream_for_tke (header).

header, poisfft, user_interface

Property svn:keywords set to Id

File size: 45.0 KB

Line
1	MODULE poisfft_mod
2
3	!------------------------------------------------------------------------------!
4	! Actual revisions:
5	! -----------------
6	!
7	!
8	! Former revisions:
9	! -----------------
10	! $Id: poisfft.f90 90 2007-05-30 09:18:47Z raasch $
11	!
12	! 85 2007-05-11 09:35:14Z raasch
13	! Bugfix: work_fft*_vec removed from some PRIVATE-declarations
14	!
15	! 76 2007-03-29 00:58:32Z raasch
16	! Tridiagonal coefficients adjusted for Neumann boundary conditions both at
17	! the bottom and the top.
18	!
19	! RCS Log replace by Id keyword, revision history cleaned up
20	!
21	! Revision 1.24 2006/08/04 15:00:24 raasch
22	! Default setting of the thread number tn in case of not using OpenMP
23	!
24	! Revision 1.23 2006/02/23 12:48:38 raasch
25	! Additional compiler directive in routine tridia_1dd for preventing loop
26	! exchange on NEC-SX6
27	!
28	! Revision 1.20 2004/04/30 12:38:09 raasch
29	! Parts of former poisfft_hybrid moved to this subroutine,
30	! former subroutine changed to a module, renaming of FFT-subroutines and
31	! -module, FFTs completely substituted by calls of fft_x and fft_y,
32	! NAG fft used in the non-parallel case completely removed, l in maketri
33	! is now a 1d-array, variables passed by modules instead of using parameter
34	! lists, enlarged transposition arrays introduced
35	!
36	! Revision 1.1 1997/07/24 11:24:14 raasch
37	! Initial revision
38	!
39	!
40	! Description:
41	! ------------
42	! See below.
43	!------------------------------------------------------------------------------!
44
45	!--------------------------------------------------------------------------!
46	! poisfft !
47	! !
48	! Original version: Stephan Siano (pois3d) !
49	! !
50	! Institute of Meteorology and Climatology, University of Hannover !
51	! Germany !
52	! !
53	! Version as of July 23,1996 !
54	! !
55	! !
56	! Version for parallel computers: Siegfried Raasch !
57	! !
58	! Version as of July 03,1997 !
59	! !
60	! Solves the Poisson equation with a 2D spectral method !
61	! d^2 p / dx^2 + d^2 p / dy^2 + d^2 p / dz^2 = s !
62	! !
63	! Input: !
64	! real ar contains in the (nnx,nny,nnz) elements, !
65	! starting from the element (1,nys,nxl), the !
66	! values for s !
67	! real work Temporary array !
68	! !
69	! Output: !
70	! real ar contains the solution for p !
71	!--------------------------------------------------------------------------!
72
73	USE fft_xy
74	USE indices
75	USE transpose_indices
76
77	IMPLICIT NONE
78
79	PRIVATE
80	PUBLIC poisfft, poisfft_init
81
82	INTERFACE poisfft
83	MODULE PROCEDURE poisfft
84	END INTERFACE poisfft
85
86	INTERFACE poisfft_init
87	MODULE PROCEDURE poisfft_init
88	END INTERFACE poisfft_init
89
90	CONTAINS
91
92	SUBROUTINE poisfft_init
93
94	CALL fft_init
95
96	END SUBROUTINE poisfft_init
97
98
99	SUBROUTINE poisfft( ar, work )
100
101	USE cpulog
102	USE interfaces
103	USE pegrid
104
105	IMPLICIT NONE
106
107	REAL, DIMENSION(1:nza,nys:nyna,nxl:nxra) :: ar, work
108
109
110	CALL cpu_log( log_point_s(3), 'poisfft', 'start' )
111
112	!
113	!-- Two-dimensional Fourier Transformation in x- and y-direction.
114	#if defined( __parallel )
115	IF ( pdims(2) == 1 ) THEN
116
117	!
118	!-- 1d-domain-decomposition along x:
119	!-- FFT along y and transposition y --> x
120	CALL ffty_tr_yx( ar, work, ar )
121
122	!
123	!-- FFT along x, solving the tridiagonal system and backward FFT
124	CALL fftx_tri_fftx( ar )
125
126	!
127	!-- Transposition x --> y and backward FFT along y
128	CALL tr_xy_ffty( ar, work, ar )
129
130	ELSEIF ( pdims(1) == 1 ) THEN
131
132	!
133	!-- 1d-domain-decomposition along y:
134	!-- FFT along x and transposition x --> y
135	CALL fftx_tr_xy( ar, work, ar )
136
137	!
138	!-- FFT along y, solving the tridiagonal system and backward FFT
139	CALL ffty_tri_ffty( ar )
140
141	!
142	!-- Transposition y --> x and backward FFT along x
143	CALL tr_yx_fftx( ar, work, ar )
144
145	ELSE
146
147	!
148	!-- 2d-domain-decomposition
149	!-- Transposition z --> x
150	CALL cpu_log( log_point_s(5), 'transpo forward', 'start' )
151	CALL transpose_zx( ar, work, ar, work, ar )
152	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
153
154	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
155	CALL fftxp( ar, 'forward' )
156	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
157
158	!
159	!-- Transposition x --> y
160	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
161	CALL transpose_xy( ar, work, ar, work, ar )
162	CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' )
163
164	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
165	CALL fftyp( ar, 'forward' )
166	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
167
168	!
169	!-- Transposition y --> z
170	CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' )
171	CALL transpose_yz( ar, work, ar, work, ar )
172	CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' )
173
174	!
175	!-- Solve the Poisson equation in z-direction in cartesian space.
176	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
177	CALL tridia( ar )
178	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
179
180	!
181	!-- Inverse Fourier Transformation
182	!-- Transposition z --> y
183	CALL cpu_log( log_point_s(8), 'transpo invers', 'start' )
184	CALL transpose_zy( ar, work, ar, work, ar )
185	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
186
187	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
188	CALL fftyp( ar, 'backward' )
189	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
190
191	!
192	!-- Transposition y --> x
193	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
194	CALL transpose_yx( ar, work, ar, work, ar )
195	CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' )
196
197	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
198	CALL fftxp( ar, 'backward' )
199	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
200
201	!
202	!-- Transposition x --> z
203	CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' )
204	CALL transpose_xz( ar, work, ar, work, ar )
205	CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' )
206
207	ENDIF
208
209	#else
210
211	!
212	!-- Two-dimensional Fourier Transformation along x- and y-direction.
213	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
214	CALL fftx( ar, 'forward' )
215	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
216	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
217	CALL ffty( ar, 'forward' )
218	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
219
220	!
221	!-- Solve the Poisson equation in z-direction in cartesian space.
222	CALL cpu_log( log_point_s(6), 'tridia', 'start' )
223	CALL tridia( ar )
224	CALL cpu_log( log_point_s(6), 'tridia', 'stop' )
225
226	!
227	!-- Inverse Fourier Transformation.
228	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
229	CALL ffty( ar, 'backward' )
230	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
231	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
232	CALL fftx( ar, 'backward' )
233	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
234
235	#endif
236
237	CALL cpu_log( log_point_s(3), 'poisfft', 'stop' )
238
239	END SUBROUTINE poisfft
240
241
242
243	SUBROUTINE tridia( ar )
244
245	!------------------------------------------------------------------------------!
246	! solves the linear system of equations:
247	!
248	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
249	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
250	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
251	!
252	! by using the Thomas algorithm
253	!------------------------------------------------------------------------------!
254
255	USE arrays_3d
256
257	IMPLICIT NONE
258
259	INTEGER :: i, j, k, nnyh
260
261	REAL, DIMENSION(nxl_z:nxr_z,0:nz-1) :: ar1
262	REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri
263
264	#if defined( __parallel )
265	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
266	#else
267	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
268	#endif
269
270
271	nnyh = (ny+1) / 2
272
273	!
274	!-- Define constant elements of the tridiagonal matrix.
275	DO k = 0, nz-1
276	DO i = nxl_z, nxr_z
277	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
278	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
279	ENDDO
280	ENDDO
281
282	#if defined( __parallel )
283	!
284	!-- Repeat for all y-levels.
285	DO j = nys_z, nyn_z
286	IF ( j <= nnyh ) THEN
287	CALL maketri( tri, j )
288	ELSE
289	CALL maketri( tri, ny+1-j )
290	ENDIF
291	CALL split( tri )
292	CALL substi( ar, ar1, tri, j )
293	ENDDO
294	#else
295	!
296	!-- First y-level.
297	CALL maketri( tri, nys_z )
298	CALL split( tri )
299	CALL substi( ar, ar1, tri, 0 )
300
301	!
302	!-- Further y-levels.
303	DO j = 1, nnyh - 1
304	CALL maketri( tri, j )
305	CALL split( tri )
306	CALL substi( ar, ar1, tri, j )
307	CALL substi( ar, ar1, tri, ny+1-j )
308	ENDDO
309	CALL maketri( tri, nnyh )
310	CALL split( tri )
311	CALL substi( ar, ar1, tri, nnyh+nys )
312	#endif
313
314	CONTAINS
315
316	SUBROUTINE maketri( tri, j )
317
318	!------------------------------------------------------------------------------!
319	! Computes the i- and j-dependent component of the matrix
320	!------------------------------------------------------------------------------!
321
322	USE arrays_3d
323	USE constants
324	USE control_parameters
325	USE grid_variables
326
327	IMPLICIT NONE
328
329	INTEGER :: i, j, k, nnxh
330	REAL :: a, c
331	REAL :: ll(nxl_z:nxr_z)
332	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
333
334
335	nnxh = ( nx + 1 ) / 2
336
337	!
338	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
339	!-- Fourier space. The coefficients are computed following the method of
340	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
341	!-- Siano's original version by discretizing the Poisson equation,
342	!-- before it is Fourier-transformed
343	#if defined( __parallel )
344	DO i = nxl_z, nxr_z
345	IF ( i >= 0 .AND. i < nnxh ) THEN
346	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
347	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
348	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
349	FLOAT( ny+1 ) ) ) / ( dy * dy )
350	ELSEIF ( i == nnxh ) THEN
351	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
352	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
353	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
354	FLOAT( ny+1 ) ) ) / ( dy * dy )
355	ELSE
356	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
357	FLOAT( nx+1 ) ) ) / ( dx * dx ) + &
358	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
359	FLOAT( ny+1 ) ) ) / ( dy * dy )
360	ENDIF
361	DO k = 0,nz-1
362	a = -1.0 * ddzu(k+2) * ddzw(k+1)
363	c = -1.0 * ddzu(k+1) * ddzw(k+1)
364	tri(1,i,k) = a + c - ll(i)
365	ENDDO
366	ENDDO
367	#else
368	DO i = 0, nnxh
369	ll(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / FLOAT( nx+1 ) ) ) / &
370	( dx * dx ) + &
371	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / FLOAT( ny+1 ) ) ) / &
372	( dy * dy )
373	DO k = 0, nz-1
374	a = -1.0 * ddzu(k+2) * ddzw(k+1)
375	c = -1.0 * ddzu(k+1) * ddzw(k+1)
376	tri(1,i,k) = a + c - ll(i)
377	IF ( i >= 1 .and. i < nnxh ) THEN
378	tri(1,nx+1-i,k) = tri(1,i,k)
379	ENDIF
380	ENDDO
381	ENDDO
382	#endif
383	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
384	DO i = nxl_z, nxr_z
385	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
386	ENDDO
387	ENDIF
388	IF ( ibc_p_t == 1 ) THEN
389	DO i = nxl_z, nxr_z
390	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
391	ENDDO
392	ENDIF
393
394	END SUBROUTINE maketri
395
396
397	SUBROUTINE substi( ar, ar1, tri, j )
398
399	!------------------------------------------------------------------------------!
400	! Substitution (Forward and Backward) (Thomas algorithm)
401	!------------------------------------------------------------------------------!
402
403	USE control_parameters
404
405	IMPLICIT NONE
406
407	INTEGER :: i, j, k
408	REAL :: ar1(nxl_z:nxr_z,0:nz-1)
409	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
410	#if defined( __parallel )
411	REAL :: ar(nxl_z:nxr_za,nys_z:nyn_za,1:nza)
412	#else
413	REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)
414	#endif
415
416	!
417	!-- Forward substitution.
418	DO i = nxl_z, nxr_z
419	#if defined( __parallel )
420	ar1(i,0) = ar(i,j,1)
421	#else
422	ar1(i,0) = ar(1,j,i)
423	#endif
424	ENDDO
425	DO k = 1, nz - 1
426	DO i = nxl_z, nxr_z
427	#if defined( __parallel )
428	ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1)
429	#else
430	ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)
431	#endif
432	ENDDO
433	ENDDO
434
435	!
436	!-- Backward substitution.
437	DO i = nxl_z, nxr_z
438	#if defined( __parallel )
439	ar(i,j,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
440	#else
441	ar(nz,j,i) = ar1(i,nz-1) / tri(4,i,nz-1)
442	#endif
443	ENDDO
444	DO k = nz-2, 0, -1
445	DO i = nxl_z, nxr_z
446	#if defined( __parallel )
447	ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) &
448	/ tri(4,i,k)
449	#else
450	ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &
451	/ tri(4,i,k)
452	#endif
453	ENDDO
454	ENDDO
455
456	!
457	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
458	!-- The respective values of ar should be zero at all k-levels if
459	!-- acceleration of horizontally averaged vertical velocity is zero.
460	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
461	IF ( j == 0 .AND. nxl_z == 0 ) THEN
462	#if defined( __parallel )
463	DO k = 1, nz
464	ar(nxl_z,j,k) = 0.0
465	ENDDO
466	#else
467	DO k = 1, nz
468	ar(k,j,nxl_z) = 0.0
469	ENDDO
470	#endif
471	ENDIF
472	ENDIF
473
474	END SUBROUTINE substi
475
476
477	SUBROUTINE split( tri )
478
479	!------------------------------------------------------------------------------!
480	! Splitting of the tridiagonal matrix (Thomas algorithm)
481	!------------------------------------------------------------------------------!
482
483	IMPLICIT NONE
484
485	INTEGER :: i, k
486	REAL :: tri(5,nxl_z:nxr_z,0:nz-1)
487
488	!
489	!-- Splitting.
490	DO i = nxl_z, nxr_z
491	tri(4,i,0) = tri(1,i,0)
492	ENDDO
493	DO k = 1, nz-1
494	DO i = nxl_z, nxr_z
495	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
496	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
497	ENDDO
498	ENDDO
499
500	END SUBROUTINE split
501
502	END SUBROUTINE tridia
503
504
505	#if defined( __parallel )
506	SUBROUTINE fftxp( ar, direction )
507
508	!------------------------------------------------------------------------------!
509	! Fourier-transformation along x-direction Parallelized version
510	!------------------------------------------------------------------------------!
511
512	IMPLICIT NONE
513
514	CHARACTER (LEN=*) :: direction
515	INTEGER :: j, k
516	REAL :: ar(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa)
517
518	!
519	!-- Performing the fft with one of the methods implemented
520	DO k = nzb_x, nzt_x
521	DO j = nys_x, nyn_x
522	CALL fft_x( ar(0:nx,j,k), direction )
523	ENDDO
524	ENDDO
525
526	END SUBROUTINE fftxp
527
528	#else
529	SUBROUTINE fftx( ar, direction )
530
531	!------------------------------------------------------------------------------!
532	! Fourier-transformation along x-direction Non parallel version
533	!------------------------------------------------------------------------------!
534
535	IMPLICIT NONE
536
537	CHARACTER (LEN=*) :: direction
538	INTEGER :: i, j, k
539	REAL :: ar(1:nz,0:ny,0:nx)
540
541	!
542	!-- Performing the fft with one of the methods implemented
543	DO k = 1, nz
544	DO j = 0, ny
545	CALL fft_x( ar(k,j,0:nx), direction )
546	ENDDO
547	ENDDO
548
549	END SUBROUTINE fftx
550	#endif
551
552
553	#if defined( __parallel )
554	SUBROUTINE fftyp( ar, direction )
555
556	!------------------------------------------------------------------------------!
557	! Fourier-transformation along y-direction Parallelized version
558	!------------------------------------------------------------------------------!
559
560	IMPLICIT NONE
561
562	CHARACTER (LEN=*) :: direction
563	INTEGER :: i, k
564	REAL :: ar(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya)
565
566	!
567	!-- Performing the fft with one of the methods implemented
568	DO k = nzb_y, nzt_y
569	DO i = nxl_y, nxr_y
570	CALL fft_y( ar(0:ny,i,k), direction )
571	ENDDO
572	ENDDO
573
574	END SUBROUTINE fftyp
575
576	#else
577	SUBROUTINE ffty( ar, direction )
578
579	!------------------------------------------------------------------------------!
580	! Fourier-transformation along y-direction Non parallel version
581	!------------------------------------------------------------------------------!
582
583	IMPLICIT NONE
584
585	CHARACTER (LEN=*) :: direction
586	INTEGER :: i, k
587	REAL :: ar(1:nz,0:ny,0:nx)
588
589	!
590	!-- Performing the fft with one of the methods implemented
591	DO k = 1, nz
592	DO i = 0, nx
593	CALL fft_y( ar(k,0:ny,i), direction )
594	ENDDO
595	ENDDO
596
597	END SUBROUTINE ffty
598	#endif
599
600	#if defined( __parallel )
601	SUBROUTINE ffty_tr_yx( f_in, work, f_out )
602
603	!------------------------------------------------------------------------------!
604	! Fourier-transformation along y with subsequent transposition y --> x for
605	! a 1d-decomposition along x
606	!
607	! ATTENTION: The performance of this routine is much faster on the NEC-SX6,
608	! if the first index of work_ffty_vec is odd. Otherwise
609	! memory bank conflicts may occur (especially if the index is a
610	! multiple of 128). That's why work_ffty_vec is dimensioned as
611	! 0:ny+1.
612	! Of course, this will not work if users are using an odd number
613	! of gridpoints along y.
614	!------------------------------------------------------------------------------!
615
616	USE control_parameters
617	USE cpulog
618	USE indices
619	USE interfaces
620	USE pegrid
621	USE transpose_indices
622
623	IMPLICIT NONE
624
625	INTEGER :: i, iend, iouter, ir, j, k
626	INTEGER, PARAMETER :: stridex = 4
627
628	REAL, DIMENSION(0:ny,stridex) :: work_ffty
629	#if defined( __nec )
630	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
631	#endif
632	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_in
633	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_out
634	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
635
636	!
637	!-- Carry out the FFT along y, where all data are present due to the
638	!-- 1d-decomposition along x. Resort the data in a way that x becomes
639	!-- the first index.
640	CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
641
642	IF ( host(1:3) == 'nec' ) THEN
643	#if defined( __nec )
644	!
645	!-- Code optimized for vector processors
646	!$OMP PARALLEL PRIVATE ( i, j, k )
647	!$OMP DO
648	DO i = nxl, nxr
649
650	DO j = 0, ny
651	DO k = 1, nz
652	work_ffty_vec(j,k,i) = f_in(k,j,i)
653	ENDDO
654	ENDDO
655
656	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'forward' )
657
658	ENDDO
659
660	!$OMP DO
661	DO k = 1, nz
662	DO j = 0, ny
663	DO i = nxl, nxr
664	work(i,k,j) = work_ffty_vec(j,k,i)
665	ENDDO
666	ENDDO
667	ENDDO
668	!$OMP END PARALLEL
669	#endif
670
671	ELSE
672
673	!
674	!-- Cache optimized code.
675	!-- The i-(x-)direction is split into a strided outer loop and an inner
676	!-- loop for better cache performance
677	!$OMP PARALLEL PRIVATE (i,iend,iouter,ir,j,k,work_ffty)
678	!$OMP DO
679	DO iouter = nxl, nxr, stridex
680
681	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
682
683	DO k = 1, nz
684
685	DO i = iouter, iend
686
687	ir = i-iouter+1 ! counter within a stride
688	DO j = 0, ny
689	work_ffty(j,ir) = f_in(k,j,i)
690	ENDDO
691	!
692	!-- FFT along y
693	CALL fft_y( work_ffty(:,ir), 'forward' )
694
695	ENDDO
696
697	!
698	!-- Resort
699	DO j = 0, ny
700	DO i = iouter, iend
701	work(i,k,j) = work_ffty(j,i-iouter+1)
702	ENDDO
703	ENDDO
704
705	ENDDO
706
707	ENDDO
708	!$OMP END PARALLEL
709
710	ENDIF
711	CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
712
713	!
714	!-- Transpose array
715	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
716	CALL MPI_ALLTOALL( work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
717	f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
718	comm1dx, ierr )
719	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
720
721	END SUBROUTINE ffty_tr_yx
722
723
724	SUBROUTINE tr_xy_ffty( f_in, work, f_out )
725
726	!------------------------------------------------------------------------------!
727	! Transposition x --> y with a subsequent backward Fourier transformation for
728	! a 1d-decomposition along x
729	!------------------------------------------------------------------------------!
730
731	USE control_parameters
732	USE cpulog
733	USE indices
734	USE interfaces
735	USE pegrid
736	USE transpose_indices
737
738	IMPLICIT NONE
739
740	INTEGER :: i, iend, iouter, ir, j, k
741	INTEGER, PARAMETER :: stridex = 4
742
743	REAL, DIMENSION(0:ny,stridex) :: work_ffty
744	#if defined( __nec )
745	REAL, DIMENSION(0:ny+1,1:nz,nxl:nxr) :: work_ffty_vec
746	#endif
747	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: f_in
748	REAL, DIMENSION(1:nza,0:nya,nxl:nxra) :: f_out
749	REAL, DIMENSION(nxl:nxra,1:nza,0:nya) :: work
750
751	!
752	!-- Transpose array
753	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
754	CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
755	work(nxl,1,0), sendrecvcount_xy, MPI_REAL, &
756	comm1dx, ierr )
757	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
758
759	!
760	!-- Resort the data in a way that y becomes the first index and carry out the
761	!-- backward fft along y.
762	CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
763
764	IF ( host(1:3) == 'nec' ) THEN
765	#if defined( __nec )
766	!
767	!-- Code optimized for vector processors
768	!$OMP PARALLEL PRIVATE ( i, j, k )
769	!$OMP DO
770	DO k = 1, nz
771	DO j = 0, ny
772	DO i = nxl, nxr
773	work_ffty_vec(j,k,i) = work(i,k,j)
774	ENDDO
775	ENDDO
776	ENDDO
777
778	!$OMP DO
779	DO i = nxl, nxr
780
781	CALL fft_y_m( work_ffty_vec(:,:,i), ny+1, 'backward' )
782
783	DO j = 0, ny
784	DO k = 1, nz
785	f_out(k,j,i) = work_ffty_vec(j,k,i)
786	ENDDO
787	ENDDO
788
789	ENDDO
790	!$OMP END PARALLEL
791	#endif
792
793	ELSE
794
795	!
796	!-- Cache optimized code.
797	!-- The i-(x-)direction is split into a strided outer loop and an inner
798	!-- loop for better cache performance
799	!$OMP PARALLEL PRIVATE ( i, iend, iouter, ir, j, k, work_ffty )
800	!$OMP DO
801	DO iouter = nxl, nxr, stridex
802
803	iend = MIN( iouter+stridex-1, nxr ) ! Upper bound for inner i loop
804
805	DO k = 1, nz
806	!
807	!-- Resort
808	DO j = 0, ny
809	DO i = iouter, iend
810	work_ffty(j,i-iouter+1) = work(i,k,j)
811	ENDDO
812	ENDDO
813
814	DO i = iouter, iend
815
816	!
817	!-- FFT along y
818	ir = i-iouter+1 ! counter within a stride
819	CALL fft_y( work_ffty(:,ir), 'backward' )
820
821	DO j = 0, ny
822	f_out(k,j,i) = work_ffty(j,ir)
823	ENDDO
824	ENDDO
825
826	ENDDO
827
828	ENDDO
829	!$OMP END PARALLEL
830
831	ENDIF
832
833	CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
834
835	END SUBROUTINE tr_xy_ffty
836
837
838	SUBROUTINE fftx_tri_fftx( ar )
839
840	!------------------------------------------------------------------------------!
841	! FFT along x, solution of the tridiagonal system and backward FFT for
842	! a 1d-decomposition along x
843	!
844	! WARNING: this subroutine may still not work for hybrid parallelization
845	! with OpenMP (for possible necessary changes see the original
846	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
847	!------------------------------------------------------------------------------!
848
849	USE control_parameters
850	USE cpulog
851	USE grid_variables
852	USE indices
853	USE interfaces
854	USE pegrid
855	USE transpose_indices
856
857	IMPLICIT NONE
858
859	character(len=3) :: myth_char
860
861	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
862
863	REAL, DIMENSION(0:nx) :: work_fftx
864	REAL, DIMENSION(0:nx,1:nz) :: work_trix
865	REAL, DIMENSION(nnx,1:nza,nys_x:nyn_xa,pdims(1)) :: ar
866	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
867
868
869	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
870
871	ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) )
872
873	tn = 0 ! Default thread number in case of one thread
874	!$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_fftx, work_trix )
875	DO j = nys_x, nyn_x
876
877	!$ tn = omp_get_thread_num()
878
879	IF ( host(1:3) == 'nec' ) THEN
880	!
881	!-- Code optimized for vector processors
882	DO k = 1, nz
883
884	m = 0
885	DO n = 1, pdims(1)
886	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
887	work_trix(m,k) = ar(i,k,j,n)
888	m = m + 1
889	ENDDO
890	ENDDO
891
892	ENDDO
893
894	CALL fft_x_m( work_trix, 'forward' )
895
896	ELSE
897	!
898	!-- Cache optimized code
899	DO k = 1, nz
900
901	m = 0
902	DO n = 1, pdims(1)
903	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
904	work_fftx(m) = ar(i,k,j,n)
905	m = m + 1
906	ENDDO
907	ENDDO
908
909	CALL fft_x( work_fftx, 'forward' )
910
911	DO i = 0, nx
912	work_trix(i,k) = work_fftx(i)
913	ENDDO
914
915	ENDDO
916
917	ENDIF
918
919	!
920	!-- Solve the linear equation system
921	CALL tridia_1dd( ddx2, ddy2, nx, ny, j, work_trix, tri(:,:,:,tn) )
922
923	IF ( host(1:3) == 'nec' ) THEN
924	!
925	!-- Code optimized for vector processors
926	CALL fft_x_m( work_trix, 'backward' )
927
928	DO k = 1, nz
929
930	m = 0
931	DO n = 1, pdims(1)
932	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
933	ar(i,k,j,n) = work_trix(m,k)
934	m = m + 1
935	ENDDO
936	ENDDO
937
938	ENDDO
939
940	ELSE
941	!
942	!-- Cache optimized code
943	DO k = 1, nz
944
945	DO i = 0, nx
946	work_fftx(i) = work_trix(i,k)
947	ENDDO
948
949	CALL fft_x( work_fftx, 'backward' )
950
951	m = 0
952	DO n = 1, pdims(1)
953	DO i = 1, nnx_pe( n-1 ) ! WARN: pcoord(i) should be used!!
954	ar(i,k,j,n) = work_fftx(m)
955	m = m + 1
956	ENDDO
957	ENDDO
958
959	ENDDO
960
961	ENDIF
962
963	ENDDO
964
965	DEALLOCATE( tri )
966
967	CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
968
969	END SUBROUTINE fftx_tri_fftx
970
971
972	SUBROUTINE fftx_tr_xy( f_in, work, f_out )
973
974	!------------------------------------------------------------------------------!
975	! Fourier-transformation along x with subsequent transposition x --> y for
976	! a 1d-decomposition along y
977	!
978	! ATTENTION: The NEC-branch of this routine may significantly profit from
979	! further optimizations. So far, performance is much worse than
980	! for routine ffty_tr_yx (more than three times slower).
981	!------------------------------------------------------------------------------!
982
983	USE control_parameters
984	USE cpulog
985	USE indices
986	USE interfaces
987	USE pegrid
988	USE transpose_indices
989
990	IMPLICIT NONE
991
992	INTEGER :: i, j, k
993
994	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
995	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_in
996	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_out
997	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
998
999	!
1000	!-- Carry out the FFT along x, where all data are present due to the
1001	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1002	!-- the first index.
1003	CALL cpu_log( log_point_s(4), 'fft_x', 'start' )
1004
1005	IF ( host(1:3) == 'nec' ) THEN
1006	!
1007	!-- Code for vector processors
1008	!$OMP PARALLEL PRIVATE ( i, j, k )
1009	!$OMP DO
1010	DO i = 0, nx
1011
1012	DO j = nys, nyn
1013	DO k = 1, nz
1014	work_fftx(i,k,j) = f_in(k,j,i)
1015	ENDDO
1016	ENDDO
1017
1018	ENDDO
1019
1020	!$OMP DO
1021	DO j = nys, nyn
1022
1023	CALL fft_x_m( work_fftx(:,:,j), 'forward' )
1024
1025	DO k = 1, nz
1026	DO i = 0, nx
1027	work(j,k,i) = work_fftx(i,k,j)
1028	ENDDO
1029	ENDDO
1030
1031	ENDDO
1032	!$OMP END PARALLEL
1033
1034	ELSE
1035
1036	!
1037	!-- Cache optimized code (there might be still a potential for better
1038	!-- optimization).
1039	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1040	!$OMP DO
1041	DO i = 0, nx
1042
1043	DO j = nys, nyn
1044	DO k = 1, nz
1045	work_fftx(i,k,j) = f_in(k,j,i)
1046	ENDDO
1047	ENDDO
1048
1049	ENDDO
1050
1051	!$OMP DO
1052	DO j = nys, nyn
1053	DO k = 1, nz
1054
1055	CALL fft_x( work_fftx(0:nx,k,j), 'forward' )
1056
1057	DO i = 0, nx
1058	work(j,k,i) = work_fftx(i,k,j)
1059	ENDDO
1060	ENDDO
1061
1062	ENDDO
1063	!$OMP END PARALLEL
1064
1065	ENDIF
1066	CALL cpu_log( log_point_s(4), 'fft_x', 'pause' )
1067
1068	!
1069	!-- Transpose array
1070	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1071	CALL MPI_ALLTOALL( work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1072	f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1073	comm1dy, ierr )
1074	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1075
1076	END SUBROUTINE fftx_tr_xy
1077
1078
1079	SUBROUTINE tr_yx_fftx( f_in, work, f_out )
1080
1081	!------------------------------------------------------------------------------!
1082	! Transposition y --> x with a subsequent backward Fourier transformation for
1083	! a 1d-decomposition along x
1084	!------------------------------------------------------------------------------!
1085
1086	USE control_parameters
1087	USE cpulog
1088	USE indices
1089	USE interfaces
1090	USE pegrid
1091	USE transpose_indices
1092
1093	IMPLICIT NONE
1094
1095	INTEGER :: i, j, k
1096
1097	REAL, DIMENSION(0:nx,1:nz,nys:nyn) :: work_fftx
1098	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: f_in
1099	REAL, DIMENSION(1:nza,nys:nyna,0:nxa) :: f_out
1100	REAL, DIMENSION(nys:nyna,1:nza,0:nxa) :: work
1101
1102	!
1103	!-- Transpose array
1104	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
1105	CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
1106	work(nys,1,0), sendrecvcount_xy, MPI_REAL, &
1107	comm1dy, ierr )
1108	CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
1109
1110	!
1111	!-- Carry out the FFT along x, where all data are present due to the
1112	!-- 1d-decomposition along y. Resort the data in a way that y becomes
1113	!-- the first index.
1114	CALL cpu_log( log_point_s(4), 'fft_x', 'continue' )
1115
1116	IF ( host(1:3) == 'nec' ) THEN
1117	!
1118	!-- Code optimized for vector processors
1119	!$OMP PARALLEL PRIVATE ( i, j, k )
1120	!$OMP DO
1121	DO j = nys, nyn
1122
1123	DO k = 1, nz
1124	DO i = 0, nx
1125	work_fftx(i,k,j) = work(j,k,i)
1126	ENDDO
1127	ENDDO
1128
1129	CALL fft_x_m( work_fftx(:,:,j), 'backward' )
1130
1131	ENDDO
1132
1133	!$OMP DO
1134	DO i = 0, nx
1135	DO j = nys, nyn
1136	DO k = 1, nz
1137	f_out(k,j,i) = work_fftx(i,k,j)
1138	ENDDO
1139	ENDDO
1140	ENDDO
1141	!$OMP END PARALLEL
1142
1143	ELSE
1144
1145	!
1146	!-- Cache optimized code (there might be still a potential for better
1147	!-- optimization).
1148	!$OMP PARALLEL PRIVATE (i,j,k,work_fftx)
1149	!$OMP DO
1150	DO j = nys, nyn
1151	DO k = 1, nz
1152
1153	DO i = 0, nx
1154	work_fftx(i,k,j) = work(j,k,i)
1155	ENDDO
1156
1157	CALL fft_x( work_fftx(0:nx,k,j), 'backward' )
1158
1159	ENDDO
1160	ENDDO
1161
1162	!$OMP DO
1163	DO i = 0, nx
1164	DO j = nys, nyn
1165	DO k = 1, nz
1166	f_out(k,j,i) = work_fftx(i,k,j)
1167	ENDDO
1168	ENDDO
1169	ENDDO
1170	!$OMP END PARALLEL
1171
1172	ENDIF
1173	CALL cpu_log( log_point_s(4), 'fft_x', 'stop' )
1174
1175	END SUBROUTINE tr_yx_fftx
1176
1177
1178	SUBROUTINE ffty_tri_ffty( ar )
1179
1180	!------------------------------------------------------------------------------!
1181	! FFT along y, solution of the tridiagonal system and backward FFT for
1182	! a 1d-decomposition along y
1183	!
1184	! WARNING: this subroutine may still not work for hybrid parallelization
1185	! with OpenMP (for possible necessary changes see the original
1186	! routine poisfft_hybrid, developed by Klaus Ketelsen, May 2002)
1187	!------------------------------------------------------------------------------!
1188
1189	USE control_parameters
1190	USE cpulog
1191	USE grid_variables
1192	USE indices
1193	USE interfaces
1194	USE pegrid
1195	USE transpose_indices
1196
1197	IMPLICIT NONE
1198
1199	INTEGER :: i, j, k, m, n, omp_get_thread_num, tn
1200
1201	REAL, DIMENSION(0:ny) :: work_ffty
1202	REAL, DIMENSION(0:ny,1:nz) :: work_triy
1203	REAL, DIMENSION(nny,1:nza,nxl_y:nxr_ya,pdims(2)) :: ar
1204	REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: tri
1205
1206
1207	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )
1208
1209	ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) )
1210
1211	tn = 0 ! Default thread number in case of one thread
1212	!$OMP PARALLEL PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
1213	!$OMP DO
1214	DO i = nxl_y, nxr_y
1215
1216	!$ tn = omp_get_thread_num()
1217
1218	IF ( host(1:3) == 'nec' ) THEN
1219	!
1220	!-- Code optimized for vector processors
1221	DO k = 1, nz
1222
1223	m = 0
1224	DO n = 1, pdims(2)
1225	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1226	work_triy(m,k) = ar(j,k,i,n)
1227	m = m + 1
1228	ENDDO
1229	ENDDO
1230
1231	ENDDO
1232
1233	CALL fft_y_m( work_triy, ny, 'forward' )
1234
1235	ELSE
1236	!
1237	!-- Cache optimized code
1238	DO k = 1, nz
1239
1240	m = 0
1241	DO n = 1, pdims(2)
1242	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1243	work_ffty(m) = ar(j,k,i,n)
1244	m = m + 1
1245	ENDDO
1246	ENDDO
1247
1248	CALL fft_y( work_ffty, 'forward' )
1249
1250	DO j = 0, ny
1251	work_triy(j,k) = work_ffty(j)
1252	ENDDO
1253
1254	ENDDO
1255
1256	ENDIF
1257
1258	!
1259	!-- Solve the linear equation system
1260	CALL tridia_1dd( ddy2, ddx2, ny, nx, i, work_triy, tri(:,:,:,tn) )
1261
1262	IF ( host(1:3) == 'nec' ) THEN
1263	!
1264	!-- Code optimized for vector processors
1265	CALL fft_y_m( work_triy, ny, 'backward' )
1266
1267	DO k = 1, nz
1268
1269	m = 0
1270	DO n = 1, pdims(2)
1271	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1272	ar(j,k,i,n) = work_triy(m,k)
1273	m = m + 1
1274	ENDDO
1275	ENDDO
1276
1277	ENDDO
1278
1279	ELSE
1280	!
1281	!-- Cache optimized code
1282	DO k = 1, nz
1283
1284	DO j = 0, ny
1285	work_ffty(j) = work_triy(j,k)
1286	ENDDO
1287
1288	CALL fft_y( work_ffty, 'backward' )
1289
1290	m = 0
1291	DO n = 1, pdims(2)
1292	DO j = 1, nny_pe( n-1 ) ! WARN: pcoord(j) should be used!!
1293	ar(j,k,i,n) = work_ffty(m)
1294	m = m + 1
1295	ENDDO
1296	ENDDO
1297
1298	ENDDO
1299
1300	ENDIF
1301
1302	ENDDO
1303	!$OMP END PARALLEL
1304
1305	DEALLOCATE( tri )
1306
1307	CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )
1308
1309	END SUBROUTINE ffty_tri_ffty
1310
1311
1312	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
1313
1314	!------------------------------------------------------------------------------!
1315	! Solves the linear system of equations for a 1d-decomposition along x (see
1316	! tridia)
1317	!
1318	! Attention: when using the intel compiler, array tri must be passed as an
1319	! argument to the contained subroutines. Otherwise addres faults
1320	! will occur.
1321	! On NEC, tri should not be passed (except for routine substi_1dd)
1322	! because this causes very bad performance.
1323	!------------------------------------------------------------------------------!
1324
1325	USE arrays_3d
1326	USE control_parameters
1327
1328	USE pegrid
1329
1330	IMPLICIT NONE
1331
1332	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
1333
1334	REAL :: ddx2, ddy2
1335
1336	REAL, DIMENSION(0:nx,1:nz) :: ar
1337	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1338	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1339
1340
1341	nnyh = ( ny + 1 ) / 2
1342
1343	!
1344	!-- Define constant elements of the tridiagonal matrix.
1345	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
1346	!-- the exchanged loops create bank conflicts. The following directive
1347	!-- prohibits loop exchange and the loops perform much better.
1348	! tn = omp_get_thread_num()
1349	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
1350	! CALL local_flush( 120+tn )
1351	!CDIR NOLOOPCHG
1352	DO k = 0, nz-1
1353	DO i = 0,nx
1354	tri(2,i,k) = ddzu(k+1) * ddzw(k+1)
1355	tri(3,i,k) = ddzu(k+2) * ddzw(k+1)
1356	ENDDO
1357	ENDDO
1358	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
1359	! CALL local_flush( 120+tn )
1360
1361	IF ( j <= nnyh ) THEN
1362	#if defined( __lcmuk )
1363	CALL maketri_1dd( j, tri )
1364	#else
1365	CALL maketri_1dd( j )
1366	#endif
1367	ELSE
1368	#if defined( __lcmuk )
1369	CALL maketri_1dd( ny+1-j, tri )
1370	#else
1371	CALL maketri_1dd( ny+1-j )
1372	#endif
1373	ENDIF
1374	#if defined( __lcmuk )
1375	CALL split_1dd( tri )
1376	#else
1377	CALL split_1dd
1378	#endif
1379	CALL substi_1dd( ar, tri )
1380
1381	CONTAINS
1382
1383	#if defined( __lcmuk )
1384	SUBROUTINE maketri_1dd( j, tri )
1385	#else
1386	SUBROUTINE maketri_1dd( j )
1387	#endif
1388
1389	!------------------------------------------------------------------------------!
1390	! computes the i- and j-dependent component of the matrix
1391	!------------------------------------------------------------------------------!
1392
1393	USE constants
1394
1395	IMPLICIT NONE
1396
1397	INTEGER :: i, j, k, nnxh
1398	REAL :: a, c
1399
1400	REAL, DIMENSION(0:nx) :: l
1401
1402	#if defined( __lcmuk )
1403	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1404	#endif
1405
1406
1407	nnxh = ( nx + 1 ) / 2
1408	!
1409	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
1410	!-- Fourier space. The coefficients are computed following the method of
1411	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
1412	!-- Siano's original version by discretizing the Poisson equation,
1413	!-- before it is Fourier-transformed
1414	DO i = 0, nx
1415	IF ( i >= 0 .AND. i < nnxh ) THEN
1416	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
1417	FLOAT( nx+1 ) ) ) * ddx2 + &
1418	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1419	FLOAT( ny+1 ) ) ) * ddy2
1420	ELSEIF ( i == nnxh ) THEN
1421	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1422	FLOAT( nx+1 ) ) ) * ddx2 + &
1423	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1424	FLOAT( ny+1 ) ) ) * ddy2
1425	ELSE
1426	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
1427	FLOAT( nx+1 ) ) ) * ddx2 + &
1428	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
1429	FLOAT( ny+1 ) ) ) * ddy2
1430	ENDIF
1431	ENDDO
1432
1433	DO k = 0, nz-1
1434	DO i = 0, nx
1435	a = -1.0 * ddzu(k+2) * ddzw(k+1)
1436	c = -1.0 * ddzu(k+1) * ddzw(k+1)
1437	tri(1,i,k) = a + c - l(i)
1438	ENDDO
1439	ENDDO
1440	IF ( ibc_p_b == 1 .OR. ibc_p_b == 2 ) THEN
1441	DO i = 0, nx
1442	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
1443	ENDDO
1444	ENDIF
1445	IF ( ibc_p_t == 1 ) THEN
1446	DO i = 0, nx
1447	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
1448	ENDDO
1449	ENDIF
1450
1451	END SUBROUTINE maketri_1dd
1452
1453
1454	#if defined( __lcmuk )
1455	SUBROUTINE split_1dd( tri )
1456	#else
1457	SUBROUTINE split_1dd
1458	#endif
1459
1460	!------------------------------------------------------------------------------!
1461	! Splitting of the tridiagonal matrix (Thomas algorithm)
1462	!------------------------------------------------------------------------------!
1463
1464	IMPLICIT NONE
1465
1466	INTEGER :: i, k
1467
1468	#if defined( __lcmuk )
1469	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1470	#endif
1471
1472
1473	!
1474	!-- Splitting
1475	DO i = 0, nx
1476	tri(4,i,0) = tri(1,i,0)
1477	ENDDO
1478	DO k = 1, nz-1
1479	DO i = 0, nx
1480	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
1481	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
1482	ENDDO
1483	ENDDO
1484
1485	END SUBROUTINE split_1dd
1486
1487
1488	SUBROUTINE substi_1dd( ar, tri )
1489
1490	!------------------------------------------------------------------------------!
1491	! Substitution (Forward and Backward) (Thomas algorithm)
1492	!------------------------------------------------------------------------------!
1493
1494	IMPLICIT NONE
1495
1496	INTEGER :: i, k
1497
1498	REAL, DIMENSION(0:nx,nz) :: ar
1499	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
1500	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
1501
1502	!
1503	!-- Forward substitution
1504	DO i = 0, nx
1505	ar1(i,0) = ar(i,1)
1506	ENDDO
1507	DO k = 1, nz-1
1508	DO i = 0, nx
1509	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
1510	ENDDO
1511	ENDDO
1512
1513	!
1514	!-- Backward substitution
1515	DO i = 0, nx
1516	ar(i,nz) = ar1(i,nz-1) / tri(4,i,nz-1)
1517	ENDDO
1518	DO k = nz-2, 0, -1
1519	DO i = 0, nx
1520	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
1521	/ tri(4,i,k)
1522	ENDDO
1523	ENDDO
1524
1525	!
1526	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
1527	!-- The respective values of ar should be zero at all k-levels if
1528	!-- acceleration of horizontally averaged vertical velocity is zero.
1529	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
1530	IF ( j == 0 ) THEN
1531	DO k = 1, nz
1532	ar(0,k) = 0.0
1533	ENDDO
1534	ENDIF
1535	ENDIF
1536
1537	END SUBROUTINE substi_1dd
1538
1539	END SUBROUTINE tridia_1dd
1540
1541	#endif
1542
1543	END MODULE poisfft_mod

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: palm/trunk/SOURCE/poisfft.f90 @ 92

Download in other formats: