source: palm/trunk/SOURCE/cuda_fft_interfaces.f90 @ 1111

Last change on this file since 1111 was 1111, checked in by raasch, 9 years ago

New:
---

GPU porting of pres, swap_timelevel. Adjustments of openACC directives.
Further porting of poisfft, which now runs completely on GPU without any
host/device data transfer for serial an parallel runs (but parallel runs
require data transfer before and after the MPI transpositions).
GPU-porting of tridiagonal solver:
tridiagonal routines split into extermal subroutines (instead using CONTAINS),
no distinction between parallel/non-parallel in poisfft and tridia any more,
tridia routines moved to end of file because of probable bug in PGI compiler
(otherwise "invalid device function" is indicated during runtime).
(cuda_fft_interfaces, fft_xy, flow_statistics, init_3d_model, palm, poisfft, pres, prognostic_equations, swap_timelevel, time_integration, transpose)
output of accelerator board information. (header)

optimization of tridia routines: constant elements and coefficients of tri are
stored in seperate arrays ddzuw and tric, last dimension of tri reduced from 5 to 2,
(init_grid, init_3d_model, modules, palm, poisfft)

poisfft_init is now called internally from poisfft,
(Makefile, Makefile_check, init_pegrid, poisfft, poisfft_hybrid)

CPU-time per grid point and timestep is output to CPU_MEASURES file
(cpu_statistics, modules, time_integration)

Changed:


resorting from/to array work changed, work now has 4 dimensions instead of 1 (transpose)
array diss allocated only if required (init_3d_model)

pressure boundary condition "Neumann+inhomo" removed from the code
(check_parameters, header, poisfft, poisfft_hybrid, pres)

Errors:


bugfix: dependency added for cuda_fft_interfaces (Makefile)
bugfix: CUDA fft plans adjusted for domain decomposition (before they always
used total domain) (fft_xy)

  • Property svn:keywords set to Id
File size: 3.3 KB
Line 
1 MODULE cuda_fft_interfaces
2
3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
20! Current revisions:
21! -----------------
22! idata and odata changed from 1d- to 3d-arrays
23!
24! Former revisions:
25! -----------------
26! $Id: cuda_fft_interfaces.f90 1111 2013-03-08 23:54:10Z raasch $
27!
28! 1106 2013-03-04 05:31:38Z raasch
29! Initial revision
30!
31! Description:
32! ------------
33! FORTRAN interfaces for the CUDA fft
34! Routines for the fft along x and y (forward/backward) using the CUDA fft
35!--------------------------------------------------------------------------------!
36
37#if defined ( __cuda_fft )
38
39    INTEGER :: CUFFT_FORWARD = -1,  &
40               CUFFT_INVERSE =  1,  &
41               CUFFT_R2C = Z'2a',   &    ! Real to Complex (interleaved)
42               CUFFT_C2R = Z'2c',   &    ! Complex (interleaved) to Real
43               CUFFT_C2C = Z'29',   &    ! Complex to Complex, interleaved
44               CUFFT_D2Z = Z'6a',   &    ! Double to Double-Complex
45               CUFFT_Z2D = Z'6c',   &    ! Double-Complex to Double
46               CUFFT_Z2Z = Z'69'         ! Double-Complex to Double-Complex
47
48    PUBLIC
49
50
51!
52!-- cufftPlan1d( cufftHandle *plan, int nx, cufftType type, int batch )
53    INTERFACE CUFFTPLAN1D
54
55       SUBROUTINE CUFFTPLAN1D( plan, nx, type, batch ) bind( C, name='cufftPlan1d' )
56
57          USE ISO_C_BINDING
58
59          INTEGER(C_INT) ::  plan
60          INTEGER(C_INT), value ::  batch, nx, type
61
62       END SUBROUTINE CUFFTPLAN1D
63
64    END INTERFACE CUFFTPLAN1D
65
66!
67!-- cufftDestroy( cufftHandle plan )  !!! remove later if not really needed !!!
68    INTERFACE CUFFTDESTROY
69
70       SUBROUTINE CUFFTDESTROY( plan ) bind( C, name='cufftDestroy' )
71
72          USE ISO_C_BINDING
73
74          INTEGER(C_INT), value ::  plan
75
76       END SUBROUTINE CUFFTDESTROY
77
78    END INTERFACE CUFFTDESTROY
79
80
81    INTERFACE CUFFTEXECZ2D
82
83       SUBROUTINE CUFFTEXECZ2D( plan, idata, odata ) bind( C, name='cufftExecZ2D' )
84
85          USE ISO_C_BINDING
86          USE precision_kind
87
88          INTEGER(C_INT), value ::  plan
89          COMPLEX(dpk), device  ::  idata(:,:,:)
90          REAL(dpk), device     ::  odata(:,:,:)
91
92       END SUBROUTINE CUFFTEXECZ2D
93
94    END INTERFACE CUFFTEXECZ2D
95
96
97    INTERFACE CUFFTEXECD2Z
98
99       SUBROUTINE CUFFTEXECD2Z( plan, idata, odata ) bind( C, name='cufftExecD2Z' )
100
101          USE ISO_C_BINDING
102          USE precision_kind
103
104          INTEGER(C_INT), value ::  plan
105          REAL(dpk), device     ::  idata(:,:,:)
106          COMPLEX(dpk), device  ::  odata(:,:,:)
107
108       END SUBROUTINE CUFFTEXECD2Z
109
110    END INTERFACE CUFFTEXECD2Z
111
112#endif
113 END MODULE cuda_fft_interfaces
Note: See TracBrowser for help on using the repository browser.