Home

Context Navigation

tridia_solver.f90 @ 1221

Last change on this file since 1221 was 1221, checked in by raasch, 11 years ago

New:

openACC porting of reduction operations
additional 3D-flag arrays for replacing the 2D-index arrays nzb_s_inner and nzb_diff_s_inner
(flow_statistics, init_grid, init_3d_model, modules, palm, pres, time_integration)

Changed:

for PGI/openACC performance reasons set default compile options for openACC to "-ta=nocache",
and set environment variable PGI_ACC_SYNCHRONOUS=1
(MAKE.inc.pgi.openacc, palm_simple_run)

wall_flags_0 changed to 32bit INTEGER, additional array wall_flags_00 introduced to hold
bits 32-63
(advec_ws, init_grid, modules, palm)

Errors:

dummy argument tri in 1d-routines replaced by tri_for_1d because of name
conflict with arry tri in module arrays_3d
(tridia_solver)

Property svn:keywords set to Id

File size: 19.2 KB

Line
1	MODULE tridia_solver
2
3	!--------------------------------------------------------------------------------!
4	! This file is part of PALM.
5	!
6	! PALM is free software: you can redistribute it and/or modify it under the terms
7	! of the GNU General Public License as published by the Free Software Foundation,
8	! either version 3 of the License, or (at your option) any later version.
9	!
10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13	!
14	! You should have received a copy of the GNU General Public License along with
15	! PALM. If not, see <http://www.gnu.org/licenses/>.
16	!
17	! Copyright 1997-2012 Leibniz University Hannover
18	!--------------------------------------------------------------------------------!
19	!
20	! Current revisions:
21	! ------------------
22	! dummy argument tri in 1d-routines replaced by tri_for_1d because of name
23	! conflict with arry tri in module arrays_3d
24	!
25	! Former revisions:
26	! -----------------
27	! $Id: tridia_solver.f90 1221 2013-09-10 08:59:13Z raasch $
28	!
29	! 1216 2013-08-26 09:31:42Z raasch
30	! +tridia_substi_overlap for handling overlapping fft / transposition
31	!
32	! 1212 2013-08-15 08:46:27Z raasch
33	! Initial revision.
34	! Routines have been moved to seperate module from former file poisfft to here.
35	! The tridiagonal matrix coefficients of array tri are calculated only once at
36	! the beginning, i.e. routine split is called within tridia_init.
37	!
38	!
39	! Description:
40	! ------------
41	! solves the linear system of equations:
42	!
43	! -(4 pi^2(i^2/(dx^2nnx^2)+j^2/(dy^2nny^2))+
44	! 1/(dzu(k)dzw(k))+1/(dzu(k-1)dzw(k)))*p(i,j,k)+
45	! 1/(dzu(k)dzw(k))p(i,j,k+1)+1/(dzu(k-1)dzw(k))p(i,j,k-1)=d(i,j,k)
46	!
47	! by using the Thomas algorithm
48	!------------------------------------------------------------------------------!
49
50	USE indices
51	USE transpose_indices
52
53	IMPLICIT NONE
54
55	REAL, DIMENSION(:,:), ALLOCATABLE :: ddzuw
56
57	PRIVATE
58
59	INTERFACE tridia_substi
60	MODULE PROCEDURE tridia_substi
61	END INTERFACE tridia_substi
62
63	INTERFACE tridia_substi_overlap
64	MODULE PROCEDURE tridia_substi_overlap
65	END INTERFACE tridia_substi_overlap
66
67	PUBLIC tridia_substi, tridia_substi_overlap, tridia_init, tridia_1dd
68
69	CONTAINS
70
71
72	SUBROUTINE tridia_init
73
74	USE arrays_3d, ONLY: ddzu_pres, ddzw
75
76	IMPLICIT NONE
77
78	INTEGER :: k
79
80	ALLOCATE( ddzuw(0:nz-1,3) )
81
82	DO k = 0, nz-1
83	ddzuw(k,1) = ddzu_pres(k+1) * ddzw(k+1)
84	ddzuw(k,2) = ddzu_pres(k+2) * ddzw(k+1)
85	ddzuw(k,3) = -1.0 * &
86	( ddzu_pres(k+2) * ddzw(k+1) + ddzu_pres(k+1) * ddzw(k+1) )
87	ENDDO
88	!
89	!-- Calculate constant coefficients of the tridiagonal matrix
90	#if ! defined ( __check )
91	CALL maketri
92	CALL split
93	#endif
94
95	END SUBROUTINE tridia_init
96
97
98	SUBROUTINE maketri
99
100	!------------------------------------------------------------------------------!
101	! Computes the i- and j-dependent component of the matrix
102	!------------------------------------------------------------------------------!
103
104	USE arrays_3d, ONLY: tric
105	USE constants
106	USE control_parameters
107	USE grid_variables
108
109	IMPLICIT NONE
110
111	INTEGER :: i, j, k, nnxh, nnyh
112
113	!$acc declare create( ll )
114	REAL :: ll(nxl_z:nxr_z,nys_z:nyn_z)
115
116
117	nnxh = ( nx + 1 ) / 2
118	nnyh = ( ny + 1 ) / 2
119
120	!
121	!-- Provide the constant coefficients of the tridiagonal matrix for solution
122	!-- of the Poisson equation in Fourier space.
123	!-- The coefficients are computed following the method of
124	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
125	!-- Siano's original version by discretizing the Poisson equation,
126	!-- before it is Fourier-transformed.
127
128	!$acc kernels present( tric )
129	!$acc loop vector( 32 )
130	DO j = nys_z, nyn_z
131	DO i = nxl_z, nxr_z
132	IF ( j >= 0 .AND. j <= nnyh ) THEN
133	IF ( i >= 0 .AND. i <= nnxh ) THEN
134	ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
135	REAL( nx+1 ) ) ) / ( dx * dx ) + &
136	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
137	REAL( ny+1 ) ) ) / ( dy * dy )
138	ELSE
139	ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
140	REAL( nx+1 ) ) ) / ( dx * dx ) + &
141	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
142	REAL( ny+1 ) ) ) / ( dy * dy )
143	ENDIF
144	ELSE
145	IF ( i >= 0 .AND. i <= nnxh ) THEN
146	ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
147	REAL( nx+1 ) ) ) / ( dx * dx ) + &
148	2.0 * ( 1.0 - COS( ( 2.0 * pi * ( ny+1-j ) ) / &
149	REAL( ny+1 ) ) ) / ( dy * dy )
150	ELSE
151	ll(i,j) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
152	REAL( nx+1 ) ) ) / ( dx * dx ) + &
153	2.0 * ( 1.0 - COS( ( 2.0 * pi * ( ny+1-j ) ) / &
154	REAL( ny+1 ) ) ) / ( dy * dy )
155	ENDIF
156	ENDIF
157	ENDDO
158	ENDDO
159
160	!$acc loop
161	DO k = 0, nz-1
162	DO j = nys_z, nyn_z
163	!$acc loop vector( 32 )
164	DO i = nxl_z, nxr_z
165	tric(i,j,k) = ddzuw(k,3) - ll(i,j)
166	ENDDO
167	ENDDO
168	ENDDO
169	!$acc end kernels
170
171	IF ( ibc_p_b == 1 ) THEN
172	!$acc kernels present( tric )
173	!$acc loop
174	DO j = nys_z, nyn_z
175	DO i = nxl_z, nxr_z
176	tric(i,j,0) = tric(i,j,0) + ddzuw(0,1)
177	ENDDO
178	ENDDO
179	!$acc end kernels
180	ENDIF
181	IF ( ibc_p_t == 1 ) THEN
182	!$acc kernels present( tric )
183	!$acc loop
184	DO j = nys_z, nyn_z
185	DO i = nxl_z, nxr_z
186	tric(i,j,nz-1) = tric(i,j,nz-1) + ddzuw(nz-1,2)
187	ENDDO
188	ENDDO
189	!$acc end kernels
190	ENDIF
191
192	END SUBROUTINE maketri
193
194
195	SUBROUTINE tridia_substi( ar )
196
197	!------------------------------------------------------------------------------!
198	! Substitution (Forward and Backward) (Thomas algorithm)
199	!------------------------------------------------------------------------------!
200
201	USE arrays_3d, ONLY: tri
202	USE control_parameters
203
204	IMPLICIT NONE
205
206	INTEGER :: i, j, k
207
208	REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)
209
210	!$acc declare create( ar1 )
211	REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1) :: ar1
212
213	!
214	!-- Forward substitution
215	DO k = 0, nz - 1
216	!$acc kernels present( ar, tri )
217	!$acc loop
218	DO j = nys_z, nyn_z
219	DO i = nxl_z, nxr_z
220
221	IF ( k == 0 ) THEN
222	ar1(i,j,k) = ar(i,j,k+1)
223	ELSE
224	ar1(i,j,k) = ar(i,j,k+1) - tri(i,j,k,2) * ar1(i,j,k-1)
225	ENDIF
226
227	ENDDO
228	ENDDO
229	!$acc end kernels
230	ENDDO
231
232	!
233	!-- Backward substitution
234	!-- Note, the 1.0E-20 in the denominator is due to avoid divisions
235	!-- by zero appearing if the pressure bc is set to neumann at the top of
236	!-- the model domain.
237	DO k = nz-1, 0, -1
238	!$acc kernels present( ar, tri )
239	!$acc loop
240	DO j = nys_z, nyn_z
241	DO i = nxl_z, nxr_z
242
243	IF ( k == nz-1 ) THEN
244	ar(i,j,k+1) = ar1(i,j,k) / ( tri(i,j,k,1) + 1.0E-20 )
245	ELSE
246	ar(i,j,k+1) = ( ar1(i,j,k) - ddzuw(k,2) * ar(i,j,k+2) ) &
247	/ tri(i,j,k,1)
248	ENDIF
249	ENDDO
250	ENDDO
251	!$acc end kernels
252	ENDDO
253
254	!
255	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
256	!-- The respective values of ar should be zero at all k-levels if
257	!-- acceleration of horizontally averaged vertical velocity is zero.
258	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
259	IF ( nys_z == 0 .AND. nxl_z == 0 ) THEN
260	!$acc kernels loop present( ar )
261	DO k = 1, nz
262	ar(nxl_z,nys_z,k) = 0.0
263	ENDDO
264	ENDIF
265	ENDIF
266
267	END SUBROUTINE tridia_substi
268
269
270	SUBROUTINE tridia_substi_overlap( ar, jj )
271
272	!------------------------------------------------------------------------------!
273	! Substitution (Forward and Backward) (Thomas algorithm)
274	!------------------------------------------------------------------------------!
275
276	USE arrays_3d, ONLY: tri
277	USE control_parameters
278
279	IMPLICIT NONE
280
281	INTEGER :: i, j, jj, k
282
283	REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz)
284
285	!$acc declare create( ar1 )
286	REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1) :: ar1
287
288	!
289	!-- Forward substitution
290	DO k = 0, nz - 1
291	!$acc kernels present( ar, tri )
292	!$acc loop
293	DO j = nys_z, nyn_z
294	DO i = nxl_z, nxr_z
295
296	IF ( k == 0 ) THEN
297	ar1(i,j,k) = ar(i,j,k+1)
298	ELSE
299	ar1(i,j,k) = ar(i,j,k+1) - tri(i,jj,k,2) * ar1(i,j,k-1)
300	ENDIF
301
302	ENDDO
303	ENDDO
304	!$acc end kernels
305	ENDDO
306
307	!
308	!-- Backward substitution
309	!-- Note, the 1.0E-20 in the denominator is due to avoid divisions
310	!-- by zero appearing if the pressure bc is set to neumann at the top of
311	!-- the model domain.
312	DO k = nz-1, 0, -1
313	!$acc kernels present( ar, tri )
314	!$acc loop
315	DO j = nys_z, nyn_z
316	DO i = nxl_z, nxr_z
317
318	IF ( k == nz-1 ) THEN
319	ar(i,j,k+1) = ar1(i,j,k) / ( tri(i,jj,k,1) + 1.0E-20 )
320	ELSE
321	ar(i,j,k+1) = ( ar1(i,j,k) - ddzuw(k,2) * ar(i,j,k+2) ) &
322	/ tri(i,jj,k,1)
323	ENDIF
324	ENDDO
325	ENDDO
326	!$acc end kernels
327	ENDDO
328
329	!
330	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
331	!-- The respective values of ar should be zero at all k-levels if
332	!-- acceleration of horizontally averaged vertical velocity is zero.
333	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
334	IF ( nys_z == 0 .AND. nxl_z == 0 ) THEN
335	!$acc kernels loop present( ar )
336	DO k = 1, nz
337	ar(nxl_z,nys_z,k) = 0.0
338	ENDDO
339	ENDIF
340	ENDIF
341
342	END SUBROUTINE tridia_substi_overlap
343
344
345	SUBROUTINE split
346
347	!------------------------------------------------------------------------------!
348	! Splitting of the tridiagonal matrix (Thomas algorithm)
349	!------------------------------------------------------------------------------!
350
351	USE arrays_3d, ONLY: tri, tric
352
353	IMPLICIT NONE
354
355	INTEGER :: i, j, k
356
357	!
358	!-- Splitting
359	!$acc kernels present( tri, tric )
360	!$acc loop
361	DO j = nys_z, nyn_z
362	!$acc loop vector( 32 )
363	DO i = nxl_z, nxr_z
364	tri(i,j,0,1) = tric(i,j,0)
365	ENDDO
366	ENDDO
367	!$acc end kernels
368
369	DO k = 1, nz-1
370	!$acc kernels present( tri, tric )
371	!$acc loop
372	DO j = nys_z, nyn_z
373	!$acc loop vector( 32 )
374	DO i = nxl_z, nxr_z
375	tri(i,j,k,2) = ddzuw(k,1) / tri(i,j,k-1,1)
376	tri(i,j,k,1) = tric(i,j,k) - ddzuw(k-1,2) * tri(i,j,k,2)
377	ENDDO
378	ENDDO
379	!$acc end kernels
380	ENDDO
381
382	END SUBROUTINE split
383
384
385	SUBROUTINE tridia_1dd( ddx2, ddy2, nx, ny, j, ar, tri_for_1d )
386
387	!------------------------------------------------------------------------------!
388	! Solves the linear system of equations for a 1d-decomposition along x (see
389	! tridia)
390	!
391	! Attention: when using the intel compilers older than 12.0, array tri must
392	! be passed as an argument to the contained subroutines. Otherwise
393	! addres faults will occur. This feature can be activated with
394	! cpp-switch __intel11
395	! On NEC, tri should not be passed (except for routine substi_1dd)
396	! because this causes very bad performance.
397	!------------------------------------------------------------------------------!
398
399	USE arrays_3d
400	USE control_parameters
401
402	USE pegrid
403
404	IMPLICIT NONE
405
406	INTEGER :: i, j, k, nnyh, nx, ny, omp_get_thread_num, tn
407
408	REAL :: ddx2, ddy2
409
410	REAL, DIMENSION(0:nx,1:nz) :: ar
411	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri_for_1d
412
413
414	nnyh = ( ny + 1 ) / 2
415
416	!
417	!-- Define constant elements of the tridiagonal matrix.
418	!-- The compiler on SX6 does loop exchange. If 0:nx is a high power of 2,
419	!-- the exchanged loops create bank conflicts. The following directive
420	!-- prohibits loop exchange and the loops perform much better.
421	! tn = omp_get_thread_num()
422	! WRITE( 120+tn, * ) '+++ id=',myid,' nx=',nx,' thread=', omp_get_thread_num()
423	! CALL local_flush( 120+tn )
424	!CDIR NOLOOPCHG
425	DO k = 0, nz-1
426	DO i = 0,nx
427	tri_for_1d(2,i,k) = ddzu_pres(k+1) * ddzw(k+1)
428	tri_for_1d(3,i,k) = ddzu_pres(k+2) * ddzw(k+1)
429	ENDDO
430	ENDDO
431	! WRITE( 120+tn, * ) '+++ id=',myid,' end of first tridia loop thread=', omp_get_thread_num()
432	! CALL local_flush( 120+tn )
433
434	IF ( j <= nnyh ) THEN
435	#if defined( __intel11 )
436	CALL maketri_1dd( j, tri_for_1d )
437	#else
438	CALL maketri_1dd( j )
439	#endif
440	ELSE
441	#if defined( __intel11 )
442	CALL maketri_1dd( ny+1-j, tri_for_1d )
443	#else
444	CALL maketri_1dd( ny+1-j )
445	#endif
446	ENDIF
447	#if defined( __intel11 )
448	CALL split_1dd( tri_for_1d )
449	#else
450	CALL split_1dd
451	#endif
452	CALL substi_1dd( ar, tri_for_1d )
453
454	CONTAINS
455
456	#if defined( __intel11 )
457	SUBROUTINE maketri_1dd( j, tri_for_1d )
458	#else
459	SUBROUTINE maketri_1dd( j )
460	#endif
461
462	!------------------------------------------------------------------------------!
463	! computes the i- and j-dependent component of the matrix
464	!------------------------------------------------------------------------------!
465
466	USE constants
467
468	IMPLICIT NONE
469
470	INTEGER :: i, j, k, nnxh
471	REAL :: a, c
472
473	REAL, DIMENSION(0:nx) :: l
474
475	#if defined( __intel11 )
476	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri_for_1d
477	#endif
478
479
480	nnxh = ( nx + 1 ) / 2
481	!
482	!-- Provide the tridiagonal matrix for solution of the Poisson equation in
483	!-- Fourier space. The coefficients are computed following the method of
484	!-- Schmidt et al. (DFVLR-Mitteilung 84-15), which departs from Stephan
485	!-- Siano's original version by discretizing the Poisson equation,
486	!-- before it is Fourier-transformed
487	DO i = 0, nx
488	IF ( i >= 0 .AND. i <= nnxh ) THEN
489	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * i ) / &
490	REAL( nx+1 ) ) ) * ddx2 + &
491	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
492	REAL( ny+1 ) ) ) * ddy2
493	ELSE
494	l(i) = 2.0 * ( 1.0 - COS( ( 2.0 * pi * ( nx+1-i ) ) / &
495	REAL( nx+1 ) ) ) * ddx2 + &
496	2.0 * ( 1.0 - COS( ( 2.0 * pi * j ) / &
497	REAL( ny+1 ) ) ) * ddy2
498	ENDIF
499	ENDDO
500
501	DO k = 0, nz-1
502	DO i = 0, nx
503	a = -1.0 * ddzu_pres(k+2) * ddzw(k+1)
504	c = -1.0 * ddzu_pres(k+1) * ddzw(k+1)
505	tri_for_1d(1,i,k) = a + c - l(i)
506	ENDDO
507	ENDDO
508	IF ( ibc_p_b == 1 ) THEN
509	DO i = 0, nx
510	tri_for_1d(1,i,0) = tri_for_1d(1,i,0) + tri_for_1d(2,i,0)
511	ENDDO
512	ENDIF
513	IF ( ibc_p_t == 1 ) THEN
514	DO i = 0, nx
515	tri_for_1d(1,i,nz-1) = tri_for_1d(1,i,nz-1) + tri_for_1d(3,i,nz-1)
516	ENDDO
517	ENDIF
518
519	END SUBROUTINE maketri_1dd
520
521
522	#if defined( __intel11 )
523	SUBROUTINE split_1dd( tri_for_1d )
524	#else
525	SUBROUTINE split_1dd
526	#endif
527
528	!------------------------------------------------------------------------------!
529	! Splitting of the tridiagonal matrix (Thomas algorithm)
530	!------------------------------------------------------------------------------!
531
532	IMPLICIT NONE
533
534	INTEGER :: i, k
535
536	#if defined( __intel11 )
537	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri_for_1d
538	#endif
539
540
541	!
542	!-- Splitting
543	DO i = 0, nx
544	tri_for_1d(4,i,0) = tri_for_1d(1,i,0)
545	ENDDO
546	DO k = 1, nz-1
547	DO i = 0, nx
548	tri_for_1d(5,i,k) = tri_for_1d(2,i,k) / tri_for_1d(4,i,k-1)
549	tri_for_1d(4,i,k) = tri_for_1d(1,i,k) - tri_for_1d(3,i,k-1) * tri_for_1d(5,i,k)
550	ENDDO
551	ENDDO
552
553	END SUBROUTINE split_1dd
554
555
556	SUBROUTINE substi_1dd( ar, tri_for_1d )
557
558	!------------------------------------------------------------------------------!
559	! Substitution (Forward and Backward) (Thomas algorithm)
560	!------------------------------------------------------------------------------!
561
562	IMPLICIT NONE
563
564	INTEGER :: i, k
565
566	REAL, DIMENSION(0:nx,nz) :: ar
567	REAL, DIMENSION(0:nx,0:nz-1) :: ar1
568	REAL, DIMENSION(5,0:nx,0:nz-1) :: tri_for_1d
569
570	!
571	!-- Forward substitution
572	DO i = 0, nx
573	ar1(i,0) = ar(i,1)
574	ENDDO
575	DO k = 1, nz-1
576	DO i = 0, nx
577	ar1(i,k) = ar(i,k+1) - tri_for_1d(5,i,k) * ar1(i,k-1)
578	ENDDO
579	ENDDO
580
581	!
582	!-- Backward substitution
583	!-- Note, the add of 1.0E-20 in the denominator is due to avoid divisions
584	!-- by zero appearing if the pressure bc is set to neumann at the top of
585	!-- the model domain.
586	DO i = 0, nx
587	ar(i,nz) = ar1(i,nz-1) / ( tri_for_1d(4,i,nz-1) + 1.0E-20 )
588	ENDDO
589	DO k = nz-2, 0, -1
590	DO i = 0, nx
591	ar(i,k+1) = ( ar1(i,k) - tri_for_1d(3,i,k) * ar(i,k+2) ) &
592	/ tri_for_1d(4,i,k)
593	ENDDO
594	ENDDO
595
596	!
597	!-- Indices i=0, j=0 correspond to horizontally averaged pressure.
598	!-- The respective values of ar should be zero at all k-levels if
599	!-- acceleration of horizontally averaged vertical velocity is zero.
600	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
601	IF ( j == 0 ) THEN
602	DO k = 1, nz
603	ar(0,k) = 0.0
604	ENDDO
605	ENDIF
606	ENDIF
607
608	END SUBROUTINE substi_1dd
609
610	END SUBROUTINE tridia_1dd
611
612
613	END MODULE tridia_solver

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: palm/trunk/SOURCE/tridia_solver.f90 @ 1221

Download in other formats: