Home

Context Navigation

source: palm/trunk/SOURCE/tridia_solver.f90 @ 1216

Last change on this file since 1216 was 1216, checked in by raasch, 11 years ago
overlapping execution of fft and transpositions (MPI_ALLTOALL), but real overlapping is not activated so far, fftw implemented for 1D-decomposition resorting of arrays moved to separate routines resort_for_... bugfix in mbuild concerning Makefile_check
Property svn:keywords set to `Id`
File size: 18.8 KB

Line
1	MODULE tridia_solver
2
3	!--------------------------------------------------------------------------------!
4	! This file is part of PALM.
5	!
6	! PALM is free software: you can redistribute it and/or modify it under the terms
7	! of the GNU General Public License as published by the Free Software Foundation,
8	! either version 3 of the License, or (at your option) any later version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2012 Leibniz University Hannover
18	!--------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! ------------------
22	! +tridia_substi_overlap for handling overlapping fft / transposition
23	!
24	! Former revisions:
25	! -----------------
26	! $Id: tridia_solver.f90 1216 2013-08-26 09:31:42Z raasch $
27	!
28	! 1212 2013-08-15 08:46:27Z raasch
29	! Initial revision.
30	! Routines have been moved to seperate module from former file poisfft to here.
31	! The tridiagonal matrix coefficients of array tri are calculated only once at
32	! the beginning, i.e. routine split is called within tridia_init.
33	!
34	!
35	! Description:
36	! ------------
37	! solves the linear system of equations:
38	!
39	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
40	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
41	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
42	!
43	! by using the Thomas algorithm
44	!------------------------------------------------------------------------------!
45
46	USE indices
47	USE transpose_indices
48
49	IMPLICIT NONE
50
51	REAL, DIMENSION(:,:), ALLOCATABLE :: ddzuw
52
53	PRIVATE
54
55	INTERFACE tridia_substi
56	MODULE PROCEDURE tridia_substi
57	END INTERFACE tridia_substi
58
59	INTERFACE tridia_substi_overlap
60	MODULE PROCEDURE tridia_substi_overlap
61	END INTERFACE tridia_substi_overlap
62
63	PUBLIC tridia_substi, tridia_substi_overlap, tridia_init, tridia_1dd
64
65	CONTAINS
66
67
68	SUBROUTINE tridia_init
69
70	USE arrays_3d, ONLY: ddzu_pres, ddzw
71
72	IMPLICIT NONE
73
74	INTEGER :: k
75
76	ALLOCATE( ddzuw(0:nz-1,3) )
77
78	DO k = 0, nz-1
79	ddzuw(k,1) = ddzu_pres(k+1) * ddzw(k+1)
80	ddzuw(k,2) = ddzu_pres(k+2) * ddzw(k+1)
81	ddzuw(k,3) = -1.0 * &
82	( ddzu_pres(k+2) * ddzw(k+1) + ddzu_pres(k+1) * ddzw(k+1) )
83	ENDDO
84	!
85	!-- Calculate constant coefficients of the tridiagonal matrix
86	#if ! defined ( __check )
87	CALL maketri
88	CALL split
89	#endif
90
91	END SUBROUTINE tridia_init
92
93
94	SUBROUTINE maketri
95
96	!------------------------------------------------------------------------------!
97	! Computes the i- and j-dependent component of the matrix
98	!------------------------------------------------------------------------------!
99
100	USE arrays_3d, ONLY: tric
101	USE constants
102	USE control_parameters
103	USE grid_variables
104
105	IMPLICIT NONE
106
107	INTEGER :: i, j, k, nnxh, nnyh
108
109	!$acc declare create( ll )
110	REAL :: ll(nxl_z:nxr_z,nys_z:nyn_z)
111
112
113	nnxh = ( nx + 1 ) / 2
114	nnyh = ( ny + 1 ) / 2
115
116	!
117	!-- Provide the constant coefficients of the tridiagonal matrix for solution
118	!-- of the Poisson equation in Fourier space.
119	!-- The coefficients are computed following the method of
120	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
121	!-- Siano's original version by discretizing the Poisson equation,
122	!-- before it is Fourier-transformed.
123
124	!$acc kernels present( tric )
125	!$acc loop vector( 32 )
126	DO j = nys_z, nyn_z
127	DO i = nxl_z, nxr_z
128	IF ( j >= 0 .AND. j <= nnyh ) THEN
129	IF ( i >= 0 .AND. i <= nnxh ) THEN
130	ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
131	REAL( nx+1 ) ) ) / ( dx * dx ) + &
132	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
133	REAL( ny+1 ) ) ) / ( dy * dy )
134	ELSE
135	ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
136	REAL( nx+1 ) ) ) / ( dx * dx ) + &
137	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
138	REAL( ny+1 ) ) ) / ( dy * dy )
139	ENDIF
140	ELSE
141	IF ( i >= 0 .AND. i <= nnxh ) THEN
142	ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
143	REAL( nx+1 ) ) ) / ( dx * dx ) + &
144	2.0 * ( 1.0 - COS( ( 2.0 * pi * ( ny+1-j ) ) / &
145	REAL( ny+1 ) ) ) / ( dy * dy )
146	ELSE
147	ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
148	REAL( nx+1 ) ) ) / ( dx * dx ) + &
149	2.0 * ( 1.0 - COS( ( 2.0 * pi * ( ny+1-j ) ) / &
150	REAL( ny+1 ) ) ) / ( dy * dy )
151	ENDIF
152	ENDIF
153	ENDDO
154	ENDDO
155
156	!$acc loop
157	DO k = 0, nz-1
158	DO j = nys_z, nyn_z
159	!$acc loop vector( 32 )
160	DO i = nxl_z, nxr_z
161	tric(i,j,k) = ddzuw(k,3) - ll(i,j)
162	ENDDO
163	ENDDO
164	ENDDO
165	!$acc end kernels
166
167	IF ( ibc_p_b == 1 ) THEN
168	!$acc kernels present( tric )
169	!$acc loop
170	DO j = nys_z, nyn_z
171	DO i = nxl_z, nxr_z
172	tric(i,j,0) = tric(i,j,0) + ddzuw(0,1)
173	ENDDO
174	ENDDO
175	!$acc end kernels
176	ENDIF
177	IF ( ibc_p_t == 1 ) THEN
178	!$acc kernels present( tric )
179	!$acc loop
180	DO j = nys_z, nyn_z
181	DO i = nxl_z, nxr_z
182	tric(i,j,nz-1) = tric(i,j,nz-1) + ddzuw(nz-1,2)
183	ENDDO
184	ENDDO
185	!$acc end kernels
186	ENDIF
187
188	END SUBROUTINE maketri
189
190
191	SUBROUTINE tridia_substi( ar )
192
193	!------------------------------------------------------------------------------!
194	! Substitution (Forward and Backward) (Thomas algorithm)
195	!------------------------------------------------------------------------------!
196
197	USE arrays_3d, ONLY: tri
198	USE control_parameters
199
200	IMPLICIT NONE
201
202	INTEGER :: i, j, k
203
204	REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)
205
206	!$acc declare create( ar1 )
207	REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1) :: ar1
208
209	!
210	!-- Forward substitution
211	DO k = 0, nz - 1
212	!$acc kernels present( ar, tri )
213	!$acc loop
214	DO j = nys_z, nyn_z
215	DO i = nxl_z, nxr_z
216
217	IF ( k == 0 ) THEN
218	ar1(i,j,k) = ar(i,j,k+1)
219	ELSE
220	ar1(i,j,k) = ar(i,j,k+1) - tri(i,j,k,2) * ar1(i,j,k-1)
221	ENDIF
222
223	ENDDO
224	ENDDO
225	!$acc end kernels
226	ENDDO
227
228	!
229	!-- Backward substitution
230	!-- Note, the 1.0E-20 in the denominator is due to avoid divisions
231	!-- by zero appearing if the pressure bc is set to neumann at the top of
232	!-- the model domain.
233	DO k = nz-1, 0, -1
234	!$acc kernels present( ar, tri )
235	!$acc loop
236	DO j = nys_z, nyn_z
237	DO i = nxl_z, nxr_z
238
239	IF ( k == nz-1 ) THEN
240	ar(i,j,k+1) = ar1(i,j,k) / ( tri(i,j,k,1) + 1.0E-20 )
241	ELSE
242	ar(i,j,k+1) = ( ar1(i,j,k) - ddzuw(k,2) * ar(i,j,k+2) ) &
243	/ tri(i,j,k,1)
244	ENDIF
245	ENDDO
246	ENDDO
247	!$acc end kernels
248	ENDDO
249
250	!
251	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
252	!-- The respective values of ar should be zero at all k-levels if
253	!-- acceleration of horizontally averaged vertical velocity is zero.
254	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
255	IF ( nys_z == 0 .AND. nxl_z == 0 ) THEN
256	!$acc kernels loop present( ar )
257	DO k = 1, nz
258	ar(nxl_z,nys_z,k) = 0.0
259	ENDDO
260	ENDIF
261	ENDIF
262
263	END SUBROUTINE tridia_substi
264
265
266	SUBROUTINE tridia_substi_overlap( ar, jj )
267
268	!------------------------------------------------------------------------------!
269	! Substitution (Forward and Backward) (Thomas algorithm)
270	!------------------------------------------------------------------------------!
271
272	USE arrays_3d, ONLY: tri
273	USE control_parameters
274
275	IMPLICIT NONE
276
277	INTEGER :: i, j, jj, k
278
279	REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)
280
281	!$acc declare create( ar1 )
282	REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1) :: ar1
283
284	!
285	!-- Forward substitution
286	DO k = 0, nz - 1
287	!$acc kernels present( ar, tri )
288	!$acc loop
289	DO j = nys_z, nyn_z
290	DO i = nxl_z, nxr_z
291
292	IF ( k == 0 ) THEN
293	ar1(i,j,k) = ar(i,j,k+1)
294	ELSE
295	ar1(i,j,k) = ar(i,j,k+1) - tri(i,jj,k,2) * ar1(i,j,k-1)
296	ENDIF
297
298	ENDDO
299	ENDDO
300	!$acc end kernels
301	ENDDO
302
303	!
304	!-- Backward substitution
305	!-- Note, the 1.0E-20 in the denominator is due to avoid divisions
306	!-- by zero appearing if the pressure bc is set to neumann at the top of
307	!-- the model domain.
308	DO k = nz-1, 0, -1
309	!$acc kernels present( ar, tri )
310	!$acc loop
311	DO j = nys_z, nyn_z
312	DO i = nxl_z, nxr_z
313
314	IF ( k == nz-1 ) THEN
315	ar(i,j,k+1) = ar1(i,j,k) / ( tri(i,jj,k,1) + 1.0E-20 )
316	ELSE
317	ar(i,j,k+1) = ( ar1(i,j,k) - ddzuw(k,2) * ar(i,j,k+2) ) &
318	/ tri(i,jj,k,1)
319	ENDIF
320	ENDDO
321	ENDDO
322	!$acc end kernels
323	ENDDO
324
325	!
326	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
327	!-- The respective values of ar should be zero at all k-levels if
328	!-- acceleration of horizontally averaged vertical velocity is zero.
329	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
330	IF ( nys_z == 0 .AND. nxl_z == 0 ) THEN
331	!$acc kernels loop present( ar )
332	DO k = 1, nz
333	ar(nxl_z,nys_z,k) = 0.0
334	ENDDO
335	ENDIF
336	ENDIF
337
338	END SUBROUTINE tridia_substi_overlap
339
340
341	SUBROUTINE split
342
343	!------------------------------------------------------------------------------!
344	! Splitting of the tridiagonal matrix (Thomas algorithm)
345	!------------------------------------------------------------------------------!
346
347	USE arrays_3d, ONLY: tri, tric
348
349	IMPLICIT NONE
350
351	INTEGER :: i, j, k
352
353	!
354	!-- Splitting
355	!$acc kernels present( tri, tric )
356	!$acc loop
357	DO j = nys_z, nyn_z
358	!$acc loop vector( 32 )
359	DO i = nxl_z, nxr_z
360	tri(i,j,0,1) = tric(i,j,0)
361	ENDDO
362	ENDDO
363	!$acc end kernels
364
365	DO k = 1, nz-1
366	!$acc kernels present( tri, tric )
367	!$acc loop
368	DO j = nys_z, nyn_z
369	!$acc loop vector( 32 )
370	DO i = nxl_z, nxr_z
371	tri(i,j,k,2) = ddzuw(k,1) / tri(i,j,k-1,1)
372	tri(i,j,k,1) = tric(i,j,k) - ddzuw(k-1,2) * tri(i,j,k,2)
373	ENDDO
374	ENDDO
375	!$acc end kernels
376	ENDDO
377
378	END SUBROUTINE split
379
380
381	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri )
382
383	!------------------------------------------------------------------------------!
384	! Solves the linear system of equations for a 1d-decomposition along x (see
385	! tridia)
386	!
387	! Attention: when using the intel compilers older than 12.0, array tri must
388	! be passed as an argument to the contained subroutines. Otherwise
389	! addres faults will occur. This feature can be activated with
390	! cpp-switch __intel11
391	! On NEC, tri should not be passed (except for routine substi_1dd)
392	! because this causes very bad performance.
393	!------------------------------------------------------------------------------!
394
395	USE arrays_3d
396	USE control_parameters
397
398	USE pegrid
399
400	IMPLICIT NONE
401
402	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
403
404	REAL :: ddx2, ddy2
405
406	REAL, DIMENSION(0:nx,1:nz) :: ar
407	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
408
409
410	nnyh = ( ny + 1 ) / 2
411
412	!
413	!-- Define constant elements of the tridiagonal matrix.
414	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
415	!-- the exchanged loops create bank conflicts. The following directive
416	!-- prohibits loop exchange and the loops perform much better.
417	! tn = omp_get_thread_num()
418	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
419	! CALL local_flush( 120+tn )
420	!CDIR NOLOOPCHG
421	DO k = 0, nz-1
422	DO i = 0,nx
423	tri(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
424	tri(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
425	ENDDO
426	ENDDO
427	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
428	! CALL local_flush( 120+tn )
429
430	IF ( j <= nnyh ) THEN
431	#if defined( __intel11 )
432	CALL maketri_1dd( j, tri )
433	#else
434	CALL maketri_1dd( j )
435	#endif
436	ELSE
437	#if defined( __intel11 )
438	CALL maketri_1dd( ny+1-j, tri )
439	#else
440	CALL maketri_1dd( ny+1-j )
441	#endif
442	ENDIF
443	#if defined( __intel11 )
444	CALL split_1dd( tri )
445	#else
446	CALL split_1dd
447	#endif
448	CALL substi_1dd( ar, tri )
449
450	CONTAINS
451
452	#if defined( __intel11 )
453	SUBROUTINE maketri_1dd( j, tri )
454	#else
455	SUBROUTINE maketri_1dd( j )
456	#endif
457
458	!------------------------------------------------------------------------------!
459	! computes the i- and j-dependent component of the matrix
460	!------------------------------------------------------------------------------!
461
462	USE constants
463
464	IMPLICIT NONE
465
466	INTEGER :: i, j, k, nnxh
467	REAL :: a, c
468
469	REAL, DIMENSION(0:nx) :: l
470
471	#if defined( __intel11 )
472	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
473	#endif
474
475
476	nnxh = ( nx + 1 ) / 2
477	!
478	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
479	!-- Fourier space. The coefficients are computed following the method of
480	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
481	!-- Siano's original version by discretizing the Poisson equation,
482	!-- before it is Fourier-transformed
483	DO i = 0, nx
484	IF ( i >= 0 .AND. i <= nnxh ) THEN
485	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
486	REAL( nx+1 ) ) ) * ddx2 + &
487	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
488	REAL( ny+1 ) ) ) * ddy2
489	ELSE
490	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
491	REAL( nx+1 ) ) ) * ddx2 + &
492	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
493	REAL( ny+1 ) ) ) * ddy2
494	ENDIF
495	ENDDO
496
497	DO k = 0, nz-1
498	DO i = 0, nx
499	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
500	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
501	tri(1,i,k) = a + c - l(i)
502	ENDDO
503	ENDDO
504	IF ( ibc_p_b == 1 ) THEN
505	DO i = 0, nx
506	tri(1,i,0) = tri(1,i,0) + tri(2,i,0)
507	ENDDO
508	ENDIF
509	IF ( ibc_p_t == 1 ) THEN
510	DO i = 0, nx
511	tri(1,i,nz-1) = tri(1,i,nz-1) + tri(3,i,nz-1)
512	ENDDO
513	ENDIF
514
515	END SUBROUTINE maketri_1dd
516
517
518	#if defined( __intel11 )
519	SUBROUTINE split_1dd( tri )
520	#else
521	SUBROUTINE split_1dd
522	#endif
523
524	!------------------------------------------------------------------------------!
525	! Splitting of the tridiagonal matrix (Thomas algorithm)
526	!------------------------------------------------------------------------------!
527
528	IMPLICIT NONE
529
530	INTEGER :: i, k
531
532	#if defined( __intel11 )
533	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
534	#endif
535
536
537	!
538	!-- Splitting
539	DO i = 0, nx
540	tri(4,i,0) = tri(1,i,0)
541	ENDDO
542	DO k = 1, nz-1
543	DO i = 0, nx
544	tri(5,i,k) = tri(2,i,k) / tri(4,i,k-1)
545	tri(4,i,k) = tri(1,i,k) - tri(3,i,k-1) * tri(5,i,k)
546	ENDDO
547	ENDDO
548
549	END SUBROUTINE split_1dd
550
551
552	SUBROUTINE substi_1dd( ar, tri )
553
554	!------------------------------------------------------------------------------!
555	! Substitution (Forward and Backward) (Thomas algorithm)
556	!------------------------------------------------------------------------------!
557
558	IMPLICIT NONE
559
560	INTEGER :: i, k
561
562	REAL, DIMENSION(0:nx,nz) :: ar
563	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
564	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri
565
566	!
567	!-- Forward substitution
568	DO i = 0, nx
569	ar1(i,0) = ar(i,1)
570	ENDDO
571	DO k = 1, nz-1
572	DO i = 0, nx
573	ar1(i,k) = ar(i,k+1) - tri(5,i,k) * ar1(i,k-1)
574	ENDDO
575	ENDDO
576
577	!
578	!-- Backward substitution
579	!-- Note, the add of 1.0E-20 in the denominator is due to avoid divisions
580	!-- by zero appearing if the pressure bc is set to neumann at the top of
581	!-- the model domain.
582	DO i = 0, nx
583	ar(i,nz) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )
584	ENDDO
585	DO k = nz-2, 0, -1
586	DO i = 0, nx
587	ar(i,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,k+2) ) &
588	/ tri(4,i,k)
589	ENDDO
590	ENDDO
591
592	!
593	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
594	!-- The respective values of ar should be zero at all k-levels if
595	!-- acceleration of horizontally averaged vertical velocity is zero.
596	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
597	IF ( j == 0 ) THEN
598	DO k = 1, nz
599	ar(0,k) = 0.0
600	ENDDO
601	ENDIF
602	ENDIF
603
604	END SUBROUTINE substi_1dd
605
606	END SUBROUTINE tridia_1dd
607
608
609	END MODULE tridia_solver

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |