Changeset 1106 for palm/trunk/SOURCE
- Timestamp:
- Mar 4, 2013 5:31:38 AM (12 years ago)
- Location:
- palm/trunk/SOURCE
- Files:
-
- 1 added
- 14 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/Makefile
r1055 r1106 20 20 # Current revisions: 21 21 # ------------------ 22 # +cuda_fft_interfaces 22 23 # 23 24 # Former revisions: … … 116 117 calc_radiation.f90 calc_spectra.f90 check_for_restart.f90 \ 117 118 check_open.f90 check_parameters.f90 close_file.f90 compute_vpt.f90 \ 118 coriolis.f90 cpu_log.f90 cpu_statistics.f90 data_log.f90 \ 119 coriolis.f90 cpu_log.f90 cpu_statistics.f90 cuda_fft_interfaces.f90 \ 120 data_log.f90 \ 119 121 data_output_dvrp.f90 data_output_mask.f90 data_output_profiles.f90 \ 120 122 data_output_ptseries.f90 data_output_spectra.f90 \ … … 140 142 lpm_set_attributes.f90 lpm_sort_arrays.f90 \ 141 143 lpm_write_exchange_statistics.f90 lpm_write_restart_file.f90 \ 142 message.f90 m odules.f90 netcdf.f90 package_parin.f90 palm.f90 \143 pa rin.f90 plant_canopy_model.f90 poisfft.f90 \144 message.f90 microphysics.f90 modules.f90 netcdf.f90 package_parin.f90 \ 145 palm.f90 parin.f90 plant_canopy_model.f90 poisfft.f90 \ 144 146 poisfft_hybrid.f90 poismg.f90 prandtl_fluxes.f90 pres.f90 print_1d.f90 \ 145 147 production_e.f90 prognostic_equations.f90 random_function.f90 \ … … 161 163 user_parin.f90 user_read_restart_data.f90 \ 162 164 user_spectra.f90 user_statistics.f90 wall_fluxes.f90 \ 163 write_3d_binary.f90 write_compressed.f90 write_var_list.f90 \ 164 microphysics.f90 165 write_3d_binary.f90 write_compressed.f90 write_var_list.f90 165 166 166 167 OBJS = advec_s_bc.o advec_s_pw.o advec_s_up.o advec_u_pw.o advec_u_up.o \ … … 170 171 calc_spectra.o check_for_restart.o check_open.o check_parameters.o \ 171 172 close_file.o compute_vpt.o coriolis.o cpu_log.o cpu_statistics.o \ 172 data_log.o data_output_dvrp.o data_output_mask.o \173 cuda_fft_interfaces.o data_log.o data_output_dvrp.o data_output_mask.o \ 173 174 data_output_profiles.o data_output_ptseries.o \ 174 175 data_output_spectra.o data_output_tseries.o data_output_2d.o \ … … 190 191 lpm_pack_arrays.o lpm_read_restart_file.o lpm_release_set.o \ 191 192 lpm_set_attributes.o lpm_sort_arrays.o lpm_write_exchange_statistics.o \ 192 lpm_write_restart_file.o message.o m odules.o netcdf.o package_parin.o \193 pa lm.o parin.o plant_canopy_model.o poisfft.o \193 lpm_write_restart_file.o message.o microphysics.o modules.o netcdf.o \ 194 package_parin.o palm.o parin.o plant_canopy_model.o poisfft.o \ 194 195 poisfft_hybrid.o poismg.o prandtl_fluxes.o pres.o print_1d.o \ 195 196 production_e.o prognostic_equations.o random_function.o random_gauss.o \ … … 208 209 user_lpm_init.o user_lpm_set_attributes.o user_module.o user_parin.o \ 209 210 user_read_restart_data.o user_spectra.o user_statistics.o \ 210 wall_fluxes.o write_3d_binary.o write_compressed.o write_var_list.o \ 211 microphysics.o 211 wall_fluxes.o write_3d_binary.o write_compressed.o write_var_list.o 212 212 213 213 CC = cc … … 264 264 cpu_log.o: modules.o 265 265 cpu_statistics.o: modules.o 266 cuda_fft_interfaces.o: cuda_fft_interfaces.f90 266 267 data_log.o: modules.o 267 268 data_output_dvrp.o: modules.o … … 284 285 exchange_horiz.o: modules.o 285 286 exchange_horiz_2d.o: modules.o 286 fft_xy.o: modules.o singleton.o temperton_fft.o287 fft_xy.o: cuda_fft_interfaces.o modules.o singleton.o temperton_fft.o 287 288 flow_statistics.o: modules.o 288 289 global_min_max.o: modules.o … … 330 331 lpm_write_restart_file.o: modules.o 331 332 message.o: modules.o 333 microphysics.o: modules.o 332 334 modules.o: modules.f90 333 335 netcdf.o: modules.o … … 399 401 write_compressed.o: modules.o 400 402 write_var_list.o: modules.o 401 microphysics.o: modules.o -
palm/trunk/SOURCE/Makefile_check
r1037 r1106 20 20 # Current revisions: 21 21 # ------------------ 22 # 22 # +cuda_fft_interfaces 23 23 # 24 24 # Former revisions: … … 56 56 57 57 RCS = check_open.f90 check_namelist_files.f90 check_parameters.f90 \ 58 close_file.f90 cpu_log.f90 exchange_horiz.f90 exchange_horiz_2d.f90 \59 fft_xy.f90 init_grid.f90 init_masks.f90 init_cloud_physics.f90 \60 init_ pegrid.f90 local_flush.f90 local_stop.f90 local_system.f90 \61 message.f90 modules.f90 package_parin.f90 parin.f90 poisfft.f90 \62 poisfft _hybrid.f90 random_function.f90 singleton.f90 subsidence.f90 \63 temperton_fft.f90 \58 close_file.f90 cpu_log.f90 cuda_fft_interfaces.f90 exchange_horiz.f90 \ 59 exchange_horiz_2d.f90 fft_xy.f90 init_grid.f90 init_masks.f90 \ 60 init_cloud_physics.f90 init_pegrid.f90 local_flush.f90 local_stop.f90 \ 61 local_system.f90 message.f90 modules.f90 package_parin.f90 parin.f90 \ 62 poisfft.f90 poisfft_hybrid.f90 random_function.f90 singleton.f90 \ 63 subsidence.f90 temperton_fft.f90 \ 64 64 user_3d_data_averaging.f90 user_actions.f90 \ 65 65 user_additional_routines.f90 user_check_data_output.f90 \ … … 78 78 79 79 OBJS = check_open.o check_namelist_files.o check_parameters.o close_file.o \ 80 cpu_log.o exchange_horiz.o exchange_horiz_2d.o fft_xy.o init_grid.o \81 init_masks.o init_pegrid.o init_cloud_physics.o\82 local_flush.o local_stop.o local_system.o message.o \83 modules.o package_parin.o parin.o poisfft.o \84 poisfft_hybrid.o random_function.o singleton.o subsidence.o temperton_fft.o \85 user_3d_data_averaging.o user_actions.o user_additional_routines.o \86 user_check_data_output.o user_check_data_output_pr.o \87 user_check_parameters.o user_data_output_2d.o user_data_output_3d.o \88 user_data_output_mask.o user_data_output_dvrp.o \89 user_define_netcdf_grid.o user_dvrp_coltab.o user_header.o \90 user_init.o user_init_3d_model.o user_init_grid.o \91 user_init_plant_canopy.o user_last_actions.o user_lpm_advec.o \92 user_lpm_init.o user_lpm_set_attributes.o user_module.o user_parin.o \93 user_read_restart_data.o user_spectra.o user_statistics.o \80 cpu_log.o cuda_fft_interfaces.o exchange_horiz.o exchange_horiz_2d.o \ 81 fft_xy.o init_grid.o init_masks.o init_pegrid.o init_cloud_physics.o\ 82 local_flush.o local_stop.o local_system.o message.o \ 83 modules.o package_parin.o parin.o poisfft.o \ 84 poisfft_hybrid.o random_function.o singleton.o subsidence.o temperton_fft.o \ 85 user_3d_data_averaging.o user_actions.o user_additional_routines.o \ 86 user_check_data_output.o user_check_data_output_pr.o \ 87 user_check_parameters.o user_data_output_2d.o user_data_output_3d.o \ 88 user_data_output_mask.o user_data_output_dvrp.o \ 89 user_define_netcdf_grid.o user_dvrp_coltab.o user_header.o \ 90 user_init.o user_init_3d_model.o user_init_grid.o \ 91 user_init_plant_canopy.o user_last_actions.o user_lpm_advec.o \ 92 user_lpm_init.o user_lpm_set_attributes.o user_module.o user_parin.o \ 93 user_read_restart_data.o user_spectra.o user_statistics.o \ 94 94 95 95 CC = cc … … 124 124 close_file.o: modules.o 125 125 cpu_log.o: modules.o 126 cuda_fft_interfaces.o: cuda_fft_interfaces.f90 126 127 exchange_horiz.o: modules.o 127 128 exchange_horiz_2d.o: modules.o 128 fft_xy.o: modules.o singleton.o temperton_fft.o129 fft_xy.o: cuda_fft_interfaces.o modules.o singleton.o temperton_fft.o 129 130 init_cloud_physics.o: modules.o 130 131 init_grid.o: modules.o -
palm/trunk/SOURCE/check_open.f90
r1093 r1106 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! array_kind renamed precision_kind 23 23 ! 24 24 ! Former revisions: … … 109 109 !------------------------------------------------------------------------------! 110 110 111 USE array_kind112 111 USE arrays_3d 113 112 USE control_parameters … … 117 116 USE particle_attributes 118 117 USE pegrid 118 USE precision_kind 119 119 USE profil_parameter 120 120 USE statistics -
palm/trunk/SOURCE/data_output_3d.f90
r1077 r1106 19 19 ! 20 20 ! Current revisions: 21 ! ----------------- 21 ! ------------------ 22 ! array_kind renamed precision_kind 22 23 ! 23 24 ! Former revisions: … … 105 106 !------------------------------------------------------------------------------! 106 107 107 USE array_kind108 108 USE arrays_3d 109 109 USE averaging … … 116 116 USE particle_attributes 117 117 USE pegrid 118 USE precision_kind 118 119 119 120 IMPLICIT NONE -
palm/trunk/SOURCE/data_output_profiles.f90
r1093 r1106 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! bugfix: initial time for preruns of coupled runs is output as -coupling_start_time 23 23 ! 24 24 ! Former revisions: … … 126 126 #if defined( __netcdf ) 127 127 ! 128 !-- Store initial time (t=0) to time axis, but only if an output 129 !-- is required for at least one of the profiles 128 !-- Store initial time to time axis, but only if an output 129 !-- is required for at least one of the profiles. The initial time 130 !-- is either 0, or, in case of a prerun for coupled atmosphere-ocean 131 !-- runs, has a negative value 130 132 DO i = 1, dopr_n 131 133 IF ( dopr_initial_index(i) /= 0 ) THEN 132 134 nc_stat = NF90_PUT_VAR( id_set_pr, id_var_time_pr, & 133 (/ 0.0 /), start = (/ 1/), &134 135 (/ -coupling_start_time /), & 136 start = (/ 1 /), count = (/ 1 /) ) 135 137 CALL handle_netcdf_error( 'data_output_profiles', 329 ) 136 138 output_for_t0 = .TRUE. -
palm/trunk/SOURCE/fft_xy.f90
r1093 r1106 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! CUDA fft added 23 ! array_kind renamed precision_kind, 3D- instead of 1D-loops in fft_x and fft_y 24 ! old fft_x, fft_y become fft_x_1d, fft_y_1d and are used for 1D-decomposition 23 25 ! 24 26 ! Former revisions: … … 62 64 !------------------------------------------------------------------------------! 63 65 64 USE array_kind65 66 USE control_parameters 66 67 USE indices 68 USE precision_kind 67 69 USE singleton 68 70 USE temperton_fft 71 USE transpose_indices 69 72 70 73 IMPLICIT NONE 71 74 72 75 PRIVATE 73 PUBLIC fft_x, fft_ y, fft_init, fft_x_m, fft_y_m76 PUBLIC fft_x, fft_x_1d, fft_y, fft_y_1d, fft_init, fft_x_m, fft_y_m 74 77 75 78 INTEGER, DIMENSION(:), ALLOCATABLE, SAVE :: ifax_x, ifax_y … … 77 80 LOGICAL, SAVE :: init_fft = .FALSE. 78 81 79 REAL, SAVE :: sqr_nx, sqr_ny82 REAL, SAVE :: dnx, dny, sqr_dnx, sqr_dny 80 83 REAL, DIMENSION(:), ALLOCATABLE, SAVE :: trigs_x, trigs_y 81 84 … … 90 93 REAL, DIMENSION(:), ALLOCATABLE, SAVE :: trig_xb, trig_xf, trig_yb, & 91 94 trig_yf 95 #elif defined( __cuda_fft ) 96 INTEGER, SAVE :: plan_xf, plan_xi, plan_yf, plan_yi, total_points_x_transpo, & 97 total_points_y_transpo 92 98 #endif 93 99 … … 102 108 END INTERFACE fft_x 103 109 110 INTERFACE fft_x_1d 111 MODULE PROCEDURE fft_x_1d 112 END INTERFACE fft_x_1d 113 104 114 INTERFACE fft_y 105 115 MODULE PROCEDURE fft_y 106 116 END INTERFACE fft_y 107 117 118 INTERFACE fft_y_1d 119 MODULE PROCEDURE fft_y_1d 120 END INTERFACE fft_y_1d 121 108 122 INTERFACE fft_x_m 109 123 MODULE PROCEDURE fft_x_m … … 118 132 119 133 SUBROUTINE fft_init 134 135 USE cuda_fft_interfaces 120 136 121 137 IMPLICIT NONE … … 145 161 IF ( fft_method == 'system-specific' ) THEN 146 162 147 sqr_nx = SQRT( 1.0 / ( nx + 1.0 ) ) 148 sqr_ny = SQRT( 1.0 / ( ny + 1.0 ) ) 163 dnx = 1.0 / ( nx + 1.0 ) 164 dny = 1.0 / ( ny + 1.0 ) 165 sqr_dnx = SQRT( dnx ) 166 sqr_dny = SQRT( dny ) 149 167 #if defined( __ibm ) && ! defined( __ibmy_special ) 150 168 ! 151 169 !-- Initialize tables for fft along x 152 CALL DRCFT( 1, workx, 1, workx, 1, nx+1, 1, 1, sqr_ nx, aux1, nau1, &170 CALL DRCFT( 1, workx, 1, workx, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1, & 153 171 aux2, nau2 ) 154 CALL DCRFT( 1, workx, 1, workx, 1, nx+1, 1, -1, sqr_ nx, aux3, nau1, &172 CALL DCRFT( 1, workx, 1, workx, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, & 155 173 aux4, nau2 ) 156 174 ! 157 175 !-- Initialize tables for fft along y 158 CALL DRCFT( 1, worky, 1, worky, 1, ny+1, 1, 1, sqr_ ny, auy1, nau1, &176 CALL DRCFT( 1, worky, 1, worky, 1, ny+1, 1, 1, sqr_dny, auy1, nau1, & 159 177 auy2, nau2 ) 160 CALL DCRFT( 1, worky, 1, worky, 1, ny+1, 1, -1, sqr_ ny, auy3, nau1, &178 CALL DCRFT( 1, worky, 1, worky, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, & 161 179 auy4, nau2 ) 162 180 #elif defined( __nec ) … … 175 193 ! 176 194 !-- Initialize tables for fft along x (non-vector and vector case (M)) 177 CALL DZFFT( 0, nx+1, sqr_ nx, work_x, work_x, trig_xf, workx, 0 )178 CALL ZDFFT( 0, nx+1, sqr_ nx, work_x, work_x, trig_xb, workx, 0 )179 CALL DZFFTM( 0, nx+1, nz1, sqr_ nx, work_x, nx+4, work_x, nx+4, &195 CALL DZFFT( 0, nx+1, sqr_dnx, work_x, work_x, trig_xf, workx, 0 ) 196 CALL ZDFFT( 0, nx+1, sqr_dnx, work_x, work_x, trig_xb, workx, 0 ) 197 CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4, & 180 198 trig_xf, workx, 0 ) 181 CALL ZDFFTM( 0, nx+1, nz1, sqr_ nx, work_x, nx+4, work_x, nx+4, &199 CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work_x, nx+4, work_x, nx+4, & 182 200 trig_xb, workx, 0 ) 183 201 ! 184 202 !-- Initialize tables for fft along y (non-vector and vector case (M)) 185 CALL DZFFT( 0, ny+1, sqr_ ny, work_y, work_y, trig_yf, worky, 0 )186 CALL ZDFFT( 0, ny+1, sqr_ ny, work_y, work_y, trig_yb, worky, 0 )187 CALL DZFFTM( 0, ny+1, nz1, sqr_ ny, work_y, ny+4, work_y, ny+4, &203 CALL DZFFT( 0, ny+1, sqr_dny, work_y, work_y, trig_yf, worky, 0 ) 204 CALL ZDFFT( 0, ny+1, sqr_dny, work_y, work_y, trig_yb, worky, 0 ) 205 CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4, & 188 206 trig_yf, worky, 0 ) 189 CALL ZDFFTM( 0, ny+1, nz1, sqr_ ny, work_y, ny+4, work_y, ny+4, &207 CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4, & 190 208 trig_yb, worky, 0 ) 209 #elif defined( __cuda_fft ) 210 total_points_x_transpo = (nx+1) * (nyn_x-nys_x+1) * (nzt_x-nzb_x+1) 211 total_points_y_transpo = (ny+1) * (nxr_y-nxl_y+1) * (nzt_y-nzb_y+1) 212 CALL CUFFTPLAN1D( plan_xf, nx+1, CUFFT_D2Z, (ny+1)*nz ) 213 CALL CUFFTPLAN1D( plan_xi, nx+1, CUFFT_Z2D, (ny+1)*nz ) 214 CALL CUFFTPLAN1D( plan_yf, ny+1, CUFFT_D2Z, (nx+1)*nz ) 215 CALL CUFFTPLAN1D( plan_yi, ny+1, CUFFT_Z2D, (nx+1)*nz ) 191 216 #else 192 217 message_string = 'no system-specific fft-call available' … … 222 247 ! ! 223 248 ! Fourier-transformation along x-direction ! 249 ! Version for 2D-decomposition ! 224 250 ! ! 225 251 ! fft_x uses internal algorithms (Singleton or Temperton) or ! … … 227 253 !----------------------------------------------------------------------! 228 254 255 USE cuda_fft_interfaces 256 257 IMPLICIT NONE 258 259 CHARACTER (LEN=*) :: direction 260 INTEGER :: i, ishape(1), j, k, m 261 262 LOGICAL :: forward_fft 263 264 REAL, DIMENSION(0:nx+2) :: work 265 REAL, DIMENSION(nx+2) :: work1 266 COMPLEX, DIMENSION(:), ALLOCATABLE :: cwork 267 #if defined( __ibm ) 268 REAL, DIMENSION(nau2) :: aux2, aux4 269 #elif defined( __nec ) 270 REAL, DIMENSION(6*(nx+1)) :: work2 271 #elif defined( __cuda_fft ) 272 REAL(dpk), DEVICE, DIMENSION(:), ALLOCATABLE :: cuda_a_device 273 COMPLEX(dpk), DEVICE, DIMENSION(:), ALLOCATABLE :: cuda_b_device 274 COMPLEX(dpk), DIMENSION(:), ALLOCATABLE :: cuda_host 275 #endif 276 REAL, DIMENSION(0:nx,nys_x:nyn_x,nzb_x:nzt_x) :: ar 277 278 IF ( direction == 'forward' ) THEN 279 forward_fft = .TRUE. 280 ELSE 281 forward_fft = .FALSE. 282 ENDIF 283 284 IF ( fft_method == 'singleton-algorithm' ) THEN 285 286 ! 287 !-- Performing the fft with singleton's software works on every system, 288 !-- since it is part of the model 289 ALLOCATE( cwork(0:nx) ) 290 291 IF ( forward_fft ) then 292 293 !$OMP PARALLEL PRIVATE ( cwork, i, ishape, j, k ) 294 !$OMP DO 295 DO k = nzb_x, nzt_x 296 DO j = nys_x, nyn_x 297 298 DO i = 0, nx 299 cwork(i) = CMPLX( ar(i,j,k) ) 300 ENDDO 301 302 ishape = SHAPE( cwork ) 303 CALL FFTN( cwork, ishape ) 304 305 DO i = 0, (nx+1)/2 306 ar(i,j,k) = REAL( cwork(i) ) 307 ENDDO 308 DO i = 1, (nx+1)/2 - 1 309 ar(nx+1-i,j,k) = -AIMAG( cwork(i) ) 310 ENDDO 311 312 ENDDO 313 ENDDO 314 !$OMP END PARALLEL 315 316 ELSE 317 318 !$OMP PARALLEL PRIVATE ( cwork, i, ishape, j, k ) 319 !$OMP DO 320 DO k = nzb_x, nzt_x 321 DO j = nys_x, nyn_x 322 323 cwork(0) = CMPLX( ar(0,j,k), 0.0 ) 324 DO i = 1, (nx+1)/2 - 1 325 cwork(i) = CMPLX( ar(i,j,k), -ar(nx+1-i,j,k) ) 326 cwork(nx+1-i) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k) ) 327 ENDDO 328 cwork((nx+1)/2) = CMPLX( ar((nx+1)/2,j,k), 0.0 ) 329 330 ishape = SHAPE( cwork ) 331 CALL FFTN( cwork, ishape, inv = .TRUE. ) 332 333 DO i = 0, nx 334 ar(i,j,k) = REAL( cwork(i) ) 335 ENDDO 336 337 ENDDO 338 ENDDO 339 !$OMP END PARALLEL 340 341 ENDIF 342 343 DEALLOCATE( cwork ) 344 345 ELSEIF ( fft_method == 'temperton-algorithm' ) THEN 346 347 ! 348 !-- Performing the fft with Temperton's software works on every system, 349 !-- since it is part of the model 350 IF ( forward_fft ) THEN 351 352 !$OMP PARALLEL PRIVATE ( work, i, j, k ) 353 !$OMP DO 354 DO k = nzb_x, nzt_x 355 DO j = nys_x, nyn_x 356 357 work(0:nx) = ar(0:nx,j,k) 358 CALL fft991cy( work, work1, trigs_x, ifax_x, 1, nx+1, nx+1, 1, -1 ) 359 360 DO i = 0, (nx+1)/2 361 ar(i,j,k) = work(2*i) 362 ENDDO 363 DO i = 1, (nx+1)/2 - 1 364 ar(nx+1-i,j,k) = work(2*i+1) 365 ENDDO 366 367 ENDDO 368 ENDDO 369 !$OMP END PARALLEL 370 371 ELSE 372 373 !$OMP PARALLEL PRIVATE ( work, i, j, k ) 374 !$OMP DO 375 DO k = nzb_x, nzt_x 376 DO j = nys_x, nyn_x 377 378 DO i = 0, (nx+1)/2 379 work(2*i) = ar(i,j,k) 380 ENDDO 381 DO i = 1, (nx+1)/2 - 1 382 work(2*i+1) = ar(nx+1-i,j,k) 383 ENDDO 384 work(1) = 0.0 385 work(nx+2) = 0.0 386 387 CALL fft991cy( work, work1, trigs_x, ifax_x, 1, nx+1, nx+1, 1, 1 ) 388 ar(0:nx,j,k) = work(0:nx) 389 390 ENDDO 391 ENDDO 392 !$OMP END PARALLEL 393 394 ENDIF 395 396 ELSEIF ( fft_method == 'system-specific' ) THEN 397 398 #if defined( __ibm ) && ! defined( __ibmy_special ) 399 IF ( forward_fft ) THEN 400 401 !$OMP PARALLEL PRIVATE ( work, i, j, k ) 402 !$OMP DO 403 DO k = nzb_x, nzt_x 404 DO j = nys_x, nyn_x 405 406 CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1, & 407 aux2, nau2 ) 408 409 DO i = 0, (nx+1)/2 410 ar(i,j,k) = work(2*i) 411 ENDDO 412 DO i = 1, (nx+1)/2 - 1 413 ar(nx+1-i,j,k) = work(2*i+1) 414 ENDDO 415 416 ENDDO 417 ENDDO 418 !$OMP END PARALLEL 419 420 ELSE 421 422 !$OMP PARALLEL PRIVATE ( work, i, j, k ) 423 !$OMP DO 424 DO k = nzb_x, nzt_x 425 DO j = nys_x, nyn_x 426 427 DO i = 0, (nx+1)/2 428 work(2*i) = ar(i,j,k) 429 ENDDO 430 DO i = 1, (nx+1)/2 - 1 431 work(2*i+1) = ar(nx+1-i,j,k) 432 ENDDO 433 work(1) = 0.0 434 work(nx+2) = 0.0 435 436 CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, & 437 aux4, nau2 ) 438 439 DO i = 0, nx 440 ar(i,j,k) = work(i) 441 ENDDO 442 443 ENDDO 444 ENDDO 445 !$OMP END PARALLEL 446 447 ENDIF 448 449 #elif defined( __nec ) 450 451 IF ( forward_fft ) THEN 452 453 !$OMP PARALLEL PRIVATE ( work, i, j, k ) 454 !$OMP DO 455 DO k = nzb_x, nzt_x 456 DO j = nys_x, nyn_x 457 458 work(0:nx) = ar(0:nx,j,k) 459 460 CALL DZFFT( 1, nx+1, sqr_dnx, work, work, trig_xf, work2, 0 ) 461 462 DO i = 0, (nx+1)/2 463 ar(i,j,k) = work(2*i) 464 ENDDO 465 DO i = 1, (nx+1)/2 - 1 466 ar(nx+1-i,j,k) = work(2*i+1) 467 ENDDO 468 469 ENDDO 470 ENDDO 471 !$END OMP PARALLEL 472 473 ELSE 474 475 !$OMP PARALLEL PRIVATE ( work, i, j, k ) 476 !$OMP DO 477 DO k = nzb_x, nzt_x 478 DO j = nys_x, nyn_x 479 480 DO i = 0, (nx+1)/2 481 work(2*i) = ar(i,j,k) 482 ENDDO 483 DO i = 1, (nx+1)/2 - 1 484 work(2*i+1) = ar(nx+1-i,j,k) 485 ENDDO 486 work(1) = 0.0 487 work(nx+2) = 0.0 488 489 CALL ZDFFT( -1, nx+1, sqr_dnx, work, work, trig_xb, work2, 0 ) 490 491 ar(0:nx,j,k) = work(0:nx) 492 493 ENDDO 494 ENDDO 495 !$OMP END PARALLEL 496 497 ENDIF 498 499 #elif defined( __cuda_fft ) 500 501 ALLOCATE( cuda_a_device(0:total_points_x_transpo-1) ) 502 ALLOCATE( cuda_b_device(0:((nx+1)/2+1) * (nyn_x-nys_x+1) * (nzt_x-nzb_x+1) - 1) ) 503 ALLOCATE( cuda_host(0:((nx+1)/2+1) * (nyn_x-nys_x+1) * (nzt_x-nzb_x+1) - 1) ) 504 505 m = 0 506 507 IF ( forward_fft ) THEN 508 509 cuda_a_device = ar(0:total_points_x_transpo-1,nys_x,nzb_x) 510 511 CALL CUFFTEXECD2Z( plan_xf, cuda_a_device, cuda_b_device ) 512 cuda_host = cuda_b_device 513 514 DO k = nzb_x, nzt_x 515 DO j = nys_x, nyn_x 516 517 DO i = 0, (nx+1)/2 518 ar(i,j,k) = REAL( cuda_host(m+i) ) * dnx 519 ENDDO 520 521 DO i = 1, (nx+1)/2 - 1 522 ar(nx+1-i,j,k) = AIMAG( cuda_host(m+i) ) * dnx 523 ENDDO 524 525 m = m + (nx+1)/2 + 1 526 527 ENDDO 528 ENDDO 529 530 ELSE 531 532 DO k = nzb_x, nzt_x 533 DO j = nys_x, nyn_x 534 535 cuda_host(m) = CMPLX( ar(0,j,k), 0.0 ) 536 537 DO i = 1, (nx+1)/2 - 1 538 cuda_host(m+i) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k) ) 539 ENDDO 540 cuda_host(m+(nx+1)/2) = CMPLX( ar((nx+1)/2,j,k), 0.0 ) 541 542 m = m + (nx+1)/2 + 1 543 544 ENDDO 545 ENDDO 546 547 cuda_b_device = cuda_host 548 CALL CUFFTEXECZ2D( plan_xi, cuda_b_device, cuda_a_device ) 549 550 ar(0:total_points_x_transpo-1,nys_x,nzb_x) = cuda_a_device 551 552 ENDIF 553 554 DEALLOCATE( cuda_a_device, cuda_b_device, cuda_host ) 555 556 #else 557 message_string = 'no system-specific fft-call available' 558 CALL message( 'fft_x', 'PA0188', 1, 2, 0, 6, 0 ) 559 #endif 560 561 ELSE 562 563 message_string = 'fft method "' // TRIM( fft_method) // & 564 '" not available' 565 CALL message( 'fft_x', 'PA0189', 1, 2, 0, 6, 0 ) 566 567 ENDIF 568 569 END SUBROUTINE fft_x 570 571 SUBROUTINE fft_x_1d( ar, direction ) 572 573 !----------------------------------------------------------------------! 574 ! fft_x_1d ! 575 ! ! 576 ! Fourier-transformation along x-direction ! 577 ! Version for 1D-decomposition ! 578 ! ! 579 ! fft_x uses internal algorithms (Singleton or Temperton) or ! 580 ! system-specific routines, if they are available ! 581 !----------------------------------------------------------------------! 582 229 583 IMPLICIT NONE 230 584 … … 232 586 INTEGER :: i, ishape(1) 233 587 234 !kk REAL, DIMENSION(:) :: ar !kk Does NOT work (Bug??) 588 LOGICAL :: forward_fft 589 235 590 REAL, DIMENSION(0:nx) :: ar 236 591 REAL, DIMENSION(0:nx+2) :: work … … 243 598 #endif 244 599 600 IF ( direction == 'forward' ) THEN 601 forward_fft = .TRUE. 602 ELSE 603 forward_fft = .FALSE. 604 ENDIF 605 245 606 IF ( fft_method == 'singleton-algorithm' ) THEN 246 607 … … 250 611 ALLOCATE( cwork(0:nx) ) 251 612 252 IF ( direction == 'forward') then613 IF ( forward_fft ) then 253 614 254 615 DO i = 0, nx … … 257 618 ishape = SHAPE( cwork ) 258 619 CALL FFTN( cwork, ishape ) 259 260 620 DO i = 0, (nx+1)/2 261 621 ar(i) = REAL( cwork(i) ) … … 290 650 !-- Performing the fft with Temperton's software works on every system, 291 651 !-- since it is part of the model 292 IF ( direction == 'forward') THEN652 IF ( forward_fft ) THEN 293 653 294 654 work(0:nx) = ar … … 321 681 322 682 #if defined( __ibm ) && ! defined( __ibmy_special ) 323 IF ( direction == 'forward') THEN324 325 CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_ nx, aux1, nau1, &683 IF ( forward_fft ) THEN 684 685 CALL DRCFT( 0, ar, 1, work, 1, nx+1, 1, 1, sqr_dnx, aux1, nau1, & 326 686 aux2, nau2 ) 327 687 … … 344 704 work(nx+2) = 0.0 345 705 346 CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_ nx, aux3, nau1, &706 CALL DCRFT( 0, work, 1, work, 1, nx+1, 1, -1, sqr_dnx, aux3, nau1, & 347 707 aux4, nau2 ) 348 708 … … 353 713 ENDIF 354 714 #elif defined( __nec ) 355 IF ( direction == 'forward') THEN715 IF ( forward_fft ) THEN 356 716 357 717 work(0:nx) = ar(0:nx) 358 718 359 CALL DZFFT( 1, nx+1, sqr_ nx, work, work, trig_xf, work2, 0 )360 719 CALL DZFFT( 1, nx+1, sqr_dnx, work, work, trig_xf, work2, 0 ) 720 361 721 DO i = 0, (nx+1)/2 362 722 ar(i) = work(2*i) … … 377 737 work(nx+2) = 0.0 378 738 379 CALL ZDFFT( -1, nx+1, sqr_ nx, work, work, trig_xb, work2, 0 )739 CALL ZDFFT( -1, nx+1, sqr_dnx, work, work, trig_xb, work2, 0 ) 380 740 381 741 ar(0:nx) = work(0:nx) … … 384 744 #else 385 745 message_string = 'no system-specific fft-call available' 386 CALL message( 'fft_x ', 'PA0188', 1, 2, 0, 6, 0 )746 CALL message( 'fft_x_1d', 'PA0188', 1, 2, 0, 6, 0 ) 387 747 #endif 388 748 ELSE 389 749 message_string = 'fft method "' // TRIM( fft_method) // & 390 750 '" not available' 391 CALL message( 'fft_x ', 'PA0189', 1, 2, 0, 6, 0 )751 CALL message( 'fft_x_1d', 'PA0189', 1, 2, 0, 6, 0 ) 392 752 393 753 ENDIF 394 754 395 END SUBROUTINE fft_x 755 END SUBROUTINE fft_x_1d 396 756 397 757 SUBROUTINE fft_y( ar, direction ) … … 401 761 ! ! 402 762 ! Fourier-transformation along y-direction ! 763 ! Version for 2D-decomposition ! 403 764 ! ! 404 765 ! fft_y uses internal algorithms (Singleton or Temperton) or ! … … 406 767 !----------------------------------------------------------------------! 407 768 769 USE cuda_fft_interfaces 770 771 IMPLICIT NONE 772 773 CHARACTER (LEN=*) :: direction 774 INTEGER :: i, j, jshape(1), k, m 775 776 LOGICAL :: forward_fft 777 778 REAL, DIMENSION(0:ny+2) :: work 779 REAL, DIMENSION(ny+2) :: work1 780 COMPLEX, DIMENSION(:), ALLOCATABLE :: cwork 781 #if defined( __ibm ) 782 REAL, DIMENSION(nau2) :: auy2, auy4 783 #elif defined( __nec ) 784 REAL, DIMENSION(6*(ny+1)) :: work2 785 #elif defined( __cuda_fft ) 786 REAL(dpk), DEVICE, DIMENSION(:), ALLOCATABLE :: cuda_a_device 787 COMPLEX(dpk), DEVICE, DIMENSION(:), ALLOCATABLE :: cuda_b_device 788 COMPLEX(dpk), DIMENSION(:), ALLOCATABLE :: cuda_host 789 #endif 790 REAL, DIMENSION(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) :: ar 791 792 IF ( direction == 'forward' ) THEN 793 forward_fft = .TRUE. 794 ELSE 795 forward_fft = .FALSE. 796 ENDIF 797 798 IF ( fft_method == 'singleton-algorithm' ) THEN 799 800 ! 801 !-- Performing the fft with singleton's software works on every system, 802 !-- since it is part of the model 803 ALLOCATE( cwork(0:ny) ) 804 805 IF ( forward_fft ) then 806 807 !$OMP PARALLEL PRIVATE ( cwork, i, jshape, j, k ) 808 !$OMP DO 809 DO k = nzb_y, nzt_y 810 DO i = nxl_y, nxr_y 811 812 DO j = 0, ny 813 cwork(j) = CMPLX( ar(j,i,k) ) 814 ENDDO 815 816 jshape = SHAPE( cwork ) 817 CALL FFTN( cwork, jshape ) 818 819 DO j = 0, (ny+1)/2 820 ar(j,i,k) = REAL( cwork(j) ) 821 ENDDO 822 DO j = 1, (ny+1)/2 - 1 823 ar(ny+1-j,i,k) = -AIMAG( cwork(j) ) 824 ENDDO 825 826 ENDDO 827 ENDDO 828 !$OMP END PARALLEL 829 830 ELSE 831 832 !$OMP PARALLEL PRIVATE ( cwork, i, jshape, j, k ) 833 !$OMP DO 834 DO k = nzb_y, nzt_y 835 DO i = nxl_y, nxr_y 836 837 cwork(0) = CMPLX( ar(0,i,k), 0.0 ) 838 DO j = 1, (ny+1)/2 - 1 839 cwork(j) = CMPLX( ar(j,i,k), -ar(ny+1-j,i,k) ) 840 cwork(ny+1-j) = CMPLX( ar(j,i,k), ar(ny+1-j,i,k) ) 841 ENDDO 842 cwork((ny+1)/2) = CMPLX( ar((ny+1)/2,i,k), 0.0 ) 843 844 jshape = SHAPE( cwork ) 845 CALL FFTN( cwork, jshape, inv = .TRUE. ) 846 847 DO j = 0, ny 848 ar(j,i,k) = REAL( cwork(j) ) 849 ENDDO 850 851 ENDDO 852 ENDDO 853 !$OMP END PARALLEL 854 855 ENDIF 856 857 DEALLOCATE( cwork ) 858 859 ELSEIF ( fft_method == 'temperton-algorithm' ) THEN 860 861 ! 862 !-- Performing the fft with Temperton's software works on every system, 863 !-- since it is part of the model 864 IF ( forward_fft ) THEN 865 866 !$OMP PARALLEL PRIVATE ( work, i, j, k ) 867 !$OMP DO 868 DO k = nzb_y, nzt_y 869 DO i = nxl_y, nxr_y 870 871 work(0:ny) = ar(0:ny,i,k) 872 CALL fft991cy( work, work1, trigs_y, ifax_y, 1, ny+1, ny+1, 1, -1 ) 873 874 DO j = 0, (ny+1)/2 875 ar(j,i,k) = work(2*j) 876 ENDDO 877 DO j = 1, (ny+1)/2 - 1 878 ar(ny+1-j,i,k) = work(2*j+1) 879 ENDDO 880 881 ENDDO 882 ENDDO 883 !$OMP END PARALLEL 884 885 ELSE 886 887 !$OMP PARALLEL PRIVATE ( work, i, j, k ) 888 !$OMP DO 889 DO k = nzb_y, nzt_y 890 DO i = nxl_y, nxr_y 891 892 DO j = 0, (ny+1)/2 893 work(2*j) = ar(j,i,k) 894 ENDDO 895 DO j = 1, (ny+1)/2 - 1 896 work(2*j+1) = ar(ny+1-j,i,k) 897 ENDDO 898 work(1) = 0.0 899 work(ny+2) = 0.0 900 901 CALL fft991cy( work, work1, trigs_y, ifax_y, 1, ny+1, ny+1, 1, 1 ) 902 ar(0:ny,i,k) = work(0:ny) 903 904 ENDDO 905 ENDDO 906 !$OMP END PARALLEL 907 908 ENDIF 909 910 ELSEIF ( fft_method == 'system-specific' ) THEN 911 912 #if defined( __ibm ) && ! defined( __ibmy_special ) 913 IF ( forward_fft) THEN 914 915 !$OMP PARALLEL PRIVATE ( work, i, j, k ) 916 !$OMP DO 917 DO k = nzb_y, nzt_y 918 DO i = nxl_y, nxr_y 919 920 CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1, nau1, & 921 auy2, nau2 ) 922 923 DO j = 0, (ny+1)/2 924 ar(j,i,k) = work(2*j) 925 ENDDO 926 DO j = 1, (ny+1)/2 - 1 927 ar(ny+1-j,i,k) = work(2*j+1) 928 ENDDO 929 930 ENDDO 931 ENDDO 932 !$OMP END PARALLEL 933 934 ELSE 935 936 !$OMP PARALLEL PRIVATE ( work, i, j, k ) 937 !$OMP DO 938 DO k = nzb_y, nzt_y 939 DO i = nxl_y, nxr_y 940 941 DO j = 0, (ny+1)/2 942 work(2*j) = ar(j,i,k) 943 ENDDO 944 DO j = 1, (ny+1)/2 - 1 945 work(2*j+1) = ar(ny+1-j,i,k) 946 ENDDO 947 work(1) = 0.0 948 work(ny+2) = 0.0 949 950 CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, & 951 auy4, nau2 ) 952 953 DO j = 0, ny 954 ar(j,i,k) = work(j) 955 ENDDO 956 957 ENDDO 958 ENDDO 959 !$OMP END PARALLEL 960 961 ENDIF 962 #elif defined( __nec ) 963 IF ( forward_fft ) THEN 964 965 !$OMP PARALLEL PRIVATE ( work, i, j, k ) 966 !$OMP DO 967 DO k = nzb_y, nzt_y 968 DO i = nxl_y, nxr_y 969 970 work(0:ny) = ar(0:ny,i,k) 971 972 CALL DZFFT( 1, ny+1, sqr_dny, work, work, trig_yf, work2, 0 ) 973 974 DO j = 0, (ny+1)/2 975 ar(j,i,k) = work(2*j) 976 ENDDO 977 DO j = 1, (ny+1)/2 - 1 978 ar(ny+1-j,i,k) = work(2*j+1) 979 ENDDO 980 981 ENDDO 982 ENDDO 983 !$END OMP PARALLEL 984 985 ELSE 986 987 !$OMP PARALLEL PRIVATE ( work, i, j, k ) 988 !$OMP DO 989 DO k = nzb_y, nzt_y 990 DO i = nxl_y, nxr_y 991 992 DO j = 0, (ny+1)/2 993 work(2*j) = ar(j,i,k) 994 ENDDO 995 DO j = 1, (ny+1)/2 - 1 996 work(2*j+1) = ar(ny+1-j,i,k) 997 ENDDO 998 work(1) = 0.0 999 work(ny+2) = 0.0 1000 1001 CALL ZDFFT( -1, ny+1, sqr_dny, work, work, trig_yb, work2, 0 ) 1002 1003 ar(0:ny,i,k) = work(0:ny) 1004 1005 ENDDO 1006 ENDDO 1007 !$OMP END PARALLEL 1008 1009 ENDIF 1010 #elif defined( __cuda_fft ) 1011 1012 ALLOCATE( cuda_a_device(0:total_points_y_transpo-1) ) 1013 ALLOCATE( cuda_b_device(0:((ny+1)/2+1) * (nxr_y-nxl_y+1) * (nzt_y-nzb_y+1) - 1) ) 1014 ALLOCATE( cuda_host(0:((ny+1)/2+1) * (nxr_y-nxl_y+1) * (nzt_y-nzb_y+1) - 1) ) 1015 1016 m = 0 1017 1018 IF ( forward_fft ) THEN 1019 1020 cuda_a_device = ar(0:total_points_y_transpo-1,nxl_y,nzb_y) 1021 1022 CALL CUFFTEXECD2Z( plan_yf, cuda_a_device, cuda_b_device ) 1023 cuda_host = cuda_b_device 1024 1025 DO k = nzb_y, nzt_y 1026 DO i = nxl_y, nxr_y 1027 1028 DO j = 0, (ny+1)/2 1029 ar(j,i,k) = REAL( cuda_host(m+j) ) * dny 1030 ENDDO 1031 1032 DO j = 1, (ny+1)/2 - 1 1033 ar(ny+1-j,i,k) = AIMAG( cuda_host(m+j) ) * dny 1034 ENDDO 1035 1036 m = m + (ny+1)/2 + 1 1037 1038 ENDDO 1039 ENDDO 1040 1041 ELSE 1042 1043 DO k = nzb_y, nzt_y 1044 DO i = nxl_y, nxr_y 1045 1046 cuda_host(m) = CMPLX( ar(0,i,k), 0.0 ) 1047 1048 DO j = 1, (ny+1)/2 - 1 1049 cuda_host(m+j) = CMPLX( ar(j,i,k), ar(ny+1-j,i,k) ) 1050 ENDDO 1051 cuda_host(m+(ny+1)/2) = CMPLX( ar((ny+1)/2,i,k), 0.0 ) 1052 1053 m = m + (ny+1)/2 + 1 1054 1055 ENDDO 1056 ENDDO 1057 1058 cuda_b_device = cuda_host 1059 CALL CUFFTEXECZ2D( plan_yi, cuda_b_device, cuda_a_device ) 1060 1061 ar(0:total_points_y_transpo-1,nxl_y,nzb_y) = cuda_a_device 1062 1063 ENDIF 1064 1065 DEALLOCATE( cuda_a_device, cuda_b_device, cuda_host ) 1066 1067 #else 1068 message_string = 'no system-specific fft-call available' 1069 CALL message( 'fft_y', 'PA0188', 1, 2, 0, 6, 0 ) 1070 #endif 1071 1072 ELSE 1073 1074 message_string = 'fft method "' // TRIM( fft_method) // & 1075 '" not available' 1076 CALL message( 'fft_y', 'PA0189', 1, 2, 0, 6, 0 ) 1077 1078 ENDIF 1079 1080 END SUBROUTINE fft_y 1081 1082 SUBROUTINE fft_y_1d( ar, direction ) 1083 1084 !----------------------------------------------------------------------! 1085 ! fft_y_1d ! 1086 ! ! 1087 ! Fourier-transformation along y-direction ! 1088 ! Version for 1D-decomposition ! 1089 ! ! 1090 ! fft_y uses internal algorithms (Singleton or Temperton) or ! 1091 ! system-specific routines, if they are available ! 1092 !----------------------------------------------------------------------! 1093 408 1094 IMPLICIT NONE 409 1095 … … 411 1097 INTEGER :: j, jshape(1) 412 1098 413 !kk REAL, DIMENSION(:) :: ar !kk Does NOT work (Bug??) 1099 LOGICAL :: forward_fft 1100 414 1101 REAL, DIMENSION(0:ny) :: ar 415 1102 REAL, DIMENSION(0:ny+2) :: work … … 422 1109 #endif 423 1110 1111 IF ( direction == 'forward' ) THEN 1112 forward_fft = .TRUE. 1113 ELSE 1114 forward_fft = .FALSE. 1115 ENDIF 1116 424 1117 IF ( fft_method == 'singleton-algorithm' ) THEN 425 1118 … … 429 1122 ALLOCATE( cwork(0:ny) ) 430 1123 431 IF ( direction == 'forward') THEN1124 IF ( forward_fft ) THEN 432 1125 433 1126 DO j = 0, ny … … 470 1163 !-- Performing the fft with Temperton's software works on every system, 471 1164 !-- since it is part of the model 472 IF ( direction == 'forward') THEN1165 IF ( forward_fft ) THEN 473 1166 474 1167 work(0:ny) = ar … … 501 1194 502 1195 #if defined( __ibm ) && ! defined( __ibmy_special ) 503 IF ( direction == 'forward') THEN504 505 CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_ ny, auy1, nau1, &1196 IF ( forward_fft ) THEN 1197 1198 CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1, nau1, & 506 1199 auy2, nau2 ) 507 1200 … … 524 1217 work(ny+2) = 0.0 525 1218 526 CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_ ny, auy3, nau1, &1219 CALL DCRFT( 0, work, 1, work, 1, ny+1, 1, -1, sqr_dny, auy3, nau1, & 527 1220 auy4, nau2 ) 528 1221 … … 533 1226 ENDIF 534 1227 #elif defined( __nec ) 535 IF ( direction == 'forward') THEN1228 IF ( forward_fft ) THEN 536 1229 537 1230 work(0:ny) = ar(0:ny) 538 1231 539 CALL DZFFT( 1, ny+1, sqr_ ny, work, work, trig_yf, work2, 0 )1232 CALL DZFFT( 1, ny+1, sqr_dny, work, work, trig_yf, work2, 0 ) 540 1233 541 1234 DO j = 0, (ny+1)/2 … … 557 1250 work(ny+2) = 0.0 558 1251 559 CALL ZDFFT( -1, ny+1, sqr_ ny, work, work, trig_yb, work2, 0 )1252 CALL ZDFFT( -1, ny+1, sqr_dny, work, work, trig_yb, work2, 0 ) 560 1253 561 1254 ar(0:ny) = work(0:ny) … … 564 1257 #else 565 1258 message_string = 'no system-specific fft-call available' 566 CALL message( 'fft_y ', 'PA0188', 1, 2, 0, 6, 0 )1259 CALL message( 'fft_y_1d', 'PA0188', 1, 2, 0, 6, 0 ) 567 1260 568 1261 #endif … … 572 1265 message_string = 'fft method "' // TRIM( fft_method) // & 573 1266 '" not available' 574 CALL message( 'fft_y ', 'PA0189', 1, 2, 0, 6, 0 )1267 CALL message( 'fft_y_1d', 'PA0189', 1, 2, 0, 6, 0 ) 575 1268 576 1269 ENDIF 577 1270 578 END SUBROUTINE fft_y 1271 END SUBROUTINE fft_y_1d 579 1272 580 1273 SUBROUTINE fft_x_m( ar, direction ) … … 654 1347 !-- Tables are initialized once more. This call should not be 655 1348 !-- necessary, but otherwise program aborts in asymmetric case 656 CALL DZFFTM( 0, nx+1, nz1, sqr_ nx, work, nx+4, work, nx+4, &1349 CALL DZFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4, & 657 1350 trig_xf, work1, 0 ) 658 1351 … … 662 1355 ENDIF 663 1356 664 CALL DZFFTM( 1, nx+1, nz1, sqr_ nx, ai, siza, work, sizw, &1357 CALL DZFFTM( 1, nx+1, nz1, sqr_dnx, ai, siza, work, sizw, & 665 1358 trig_xf, work1, 0 ) 666 1359 … … 679 1372 !-- Tables are initialized once more. This call should not be 680 1373 !-- necessary, but otherwise program aborts in asymmetric case 681 CALL ZDFFTM( 0, nx+1, nz1, sqr_ nx, work, nx+4, work, nx+4, &1374 CALL ZDFFTM( 0, nx+1, nz1, sqr_dnx, work, nx+4, work, nx+4, & 682 1375 trig_xb, work1, 0 ) 683 1376 … … 693 1386 ENDDO 694 1387 695 CALL ZDFFTM( -1, nx+1, nz1, sqr_ nx, work, sizw, ai, siza, &1388 CALL ZDFFTM( -1, nx+1, nz1, sqr_dnx, work, sizw, ai, siza, & 696 1389 trig_xb, work1, 0 ) 697 1390 … … 791 1484 !-- Tables are initialized once more. This call should not be 792 1485 !-- necessary, but otherwise program aborts in asymmetric case 793 CALL DZFFTM( 0, ny+1, nz1, sqr_ ny, work, ny+4, work, ny+4, &1486 CALL DZFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, & 794 1487 trig_yf, work1, 0 ) 795 1488 … … 799 1492 ENDIF 800 1493 801 CALL DZFFTM( 1, ny+1, nz1, sqr_ ny, ai, siza, work, sizw, &1494 CALL DZFFTM( 1, ny+1, nz1, sqr_dny, ai, siza, work, sizw, & 802 1495 trig_yf, work1, 0 ) 803 1496 … … 816 1509 !-- Tables are initialized once more. This call should not be 817 1510 !-- necessary, but otherwise program aborts in asymmetric case 818 CALL ZDFFTM( 0, ny+1, nz1, sqr_ ny, work, ny+4, work, ny+4, &1511 CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work, ny+4, work, ny+4, & 819 1512 trig_yb, work1, 0 ) 820 1513 … … 830 1523 ENDDO 831 1524 832 CALL ZDFFTM( -1, ny+1, nz1, sqr_ ny, work, sizw, ai, siza, &1525 CALL ZDFFTM( -1, ny+1, nz1, sqr_dny, work, sizw, ai, siza, & 833 1526 trig_yb, work1, 0 ) 834 1527 … … 852 1545 END SUBROUTINE fft_y_m 853 1546 1547 854 1548 END MODULE fft_xy -
palm/trunk/SOURCE/header.f90
r1093 r1106 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! some format changes for coupled runs 23 23 ! 24 24 ! Former revisions: … … 192 192 CHARACTER (LEN=10) :: coor_chr, host_chr 193 193 CHARACTER (LEN=16) :: begin_chr 194 CHARACTER (LEN=2 3) :: ver_rev194 CHARACTER (LEN=26) :: ver_rev 195 195 CHARACTER (LEN=40) :: output_format 196 196 CHARACTER (LEN=70) :: char1, char2, dopr_chr, & … … 258 258 WRITE ( io, 101 ) mpi_type, coupling_mode 259 259 ENDIF 260 IF ( coupling_start_time /= 0.0 ) THEN 261 IF ( coupling_start_time > simulated_time_at_begin ) THEN 262 WRITE ( io, 109 ) 263 ELSE 264 WRITE ( io, 114 ) 265 ENDIF 266 ENDIF 260 267 WRITE ( io, 102 ) run_date, run_identifier, run_time, runnr, & 261 268 ADJUSTR( host_chr ) … … 426 433 IF ( time_restart /= 9999999.9 .AND. time_restart < end_time ) THEN 427 434 IF ( dt_restart == 9999999.9 ) THEN 428 WRITE ( io, 204 ) ' Next restart at: ',time_restart435 WRITE ( io, 204 ) ' Next restart at: ',time_restart 429 436 ELSE 430 WRITE ( io, 205 ) ' Next restart at: ',time_restart, dt_restart437 WRITE ( io, 205 ) ' Next restart at: ',time_restart, dt_restart 431 438 ENDIF 432 439 ENDIF … … 435 442 ! 436 443 !-- Start time for coupled runs, if independent precursor runs for atmosphere 437 !-- and ocean are used . In this case, coupling_start_time defines thetime438 !-- when the coupling is switched on.444 !-- and ocean are used or have been used. In this case, coupling_start_time 445 !-- defines the time when the coupling is switched on. 439 446 IF ( coupling_start_time /= 0.0 ) THEN 440 IF ( coupling_start_time >= simulated_time_at_begin ) THEN 441 char1 = 'Precursor run for a coupled atmosphere-ocean run' 442 ELSE 443 char1 = 'Coupled atmosphere-ocean run following independent ' // & 444 'precursor runs' 445 ENDIF 446 WRITE ( io, 207 ) char1, coupling_start_time 447 WRITE ( io, 207 ) coupling_start_time 447 448 ENDIF 448 449 … … 1581 1582 1582 1583 99 FORMAT (1X,78('-')) 1583 100 FORMAT (/1X,'*************************** ',9X,42('-')/ &1584 1X,'* ',A,' *', 9X,A/ &1585 1X,'*************************** ',9X,42('-'))1584 100 FORMAT (/1X,'******************************',6X,42('-')/ & 1585 1X,'* ',A,' *',6X,A/ & 1586 1X,'******************************',6X,42('-')) 1586 1587 101 FORMAT (37X,'coupled run using MPI-',I1,': ',A/ & 1587 1588 37X,42('-')) 1588 102 FORMAT (/' Date: ',A8,9X,'Run: ',A20/ &1589 ' Time: ',A8,9X,'Run-No.: ',I2.2/ &1590 ' Run on host: ',A10)1589 102 FORMAT (/' Date: ',A8,6X,'Run: ',A20/ & 1590 ' Time: ',A8,6X,'Run-No.: ',I2.2/ & 1591 ' Run on host: ',A10) 1591 1592 #if defined( __parallel ) 1592 103 FORMAT (' Number of PEs:', 8X,I5,9X,'Processor grid (x,y): (',I3,',',I3, &1593 103 FORMAT (' Number of PEs:',10X,I6,6X,'Processor grid (x,y): (',I3,',',I3, & 1593 1594 ')',1X,A) 1594 1595 104 FORMAT (' Number of PEs:',8X,I5,9X,'Tasks:',I4,' threads per task:',I4/ & … … 1599 1600 107 FORMAT (37X,'A 1d-decomposition along ',A,' is used') 1600 1601 108 FORMAT (37X,'Max. # of parallel I/O streams is ',I5) 1602 109 FORMAT (37X,'Precursor run for coupled atmos-ocean run'/ & 1603 37X,42('-')) 1604 114 FORMAT (37X,'Coupled atmosphere-ocean run following'/ & 1605 37X,'independent precursor runs'/ & 1606 37X,42('-')) 1601 1607 #endif 1602 1608 110 FORMAT (/' Numerical Schemes:'/ & … … 1610 1616 ' or Upstream') 1611 1617 118 FORMAT (' --> Scalar advection via Bott-Chlond-Scheme') 1612 119 FORMAT (' --> Galilei-Transform applied to horizontal advection ',&1613 ' Translation velocity = ',A/ &1618 119 FORMAT (' --> Galilei-Transform applied to horizontal advection:'/ & 1619 ' translation velocity = ',A/ & 1614 1620 ' distance advected ',A,': ',F8.3,' km(x) ',F8.3,' km(y)') 1615 1621 122 FORMAT (' --> Time differencing scheme: ',A) … … 1656 1662 200 FORMAT (//' Run time and time step information:'/ & 1657 1663 ' ----------------------------------'/) 1658 201 FORMAT ( ' Timestep: variable maximum value: ',F6.3,' s', &1664 201 FORMAT ( ' Timestep: variable maximum value: ',F6.3,' s', & 1659 1665 ' CFL-factor: ',F4.2) 1660 202 FORMAT ( ' Timestep: dt = ',F6.3,' s'/)1661 203 FORMAT ( ' Start time: ',F9.3,' s'/ &1662 ' End time: ',F9.3,' s')1666 202 FORMAT ( ' Timestep: dt = ',F6.3,' s'/) 1667 203 FORMAT ( ' Start time: ',F9.3,' s'/ & 1668 ' End time: ',F9.3,' s') 1663 1669 204 FORMAT ( A,F9.3,' s') 1664 1670 205 FORMAT ( A,F9.3,' s',5X,'restart every',17X,F9.3,' s') 1665 206 FORMAT (/' Time reached: ',F9.3,' s'/ &1666 ' CPU-time used: ',F9.3,' s per timestep: ', &1667 ' ',F9.3,' s'/ &1668 ' per second of simulated tim', &1671 206 FORMAT (/' Time reached: ',F9.3,' s'/ & 1672 ' CPU-time used: ',F9.3,' s per timestep: ', & 1673 ' ',F9.3,' s'/ & 1674 ' per second of simulated tim', & 1669 1675 'e: ',F9.3,' s') 1670 207 FORMAT ( A/' Coupling start time:',F9.3,' s')1676 207 FORMAT ( ' Coupling start time: ',F9.3,' s') 1671 1677 250 FORMAT (//' Computational grid and domain size:'/ & 1672 1678 ' ----------------------------------'// & -
palm/trunk/SOURCE/microphysics.f90
r1093 r1106 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! small changes in code formatting 23 23 ! 24 24 ! Former revisions: … … 111 111 END SUBROUTINE dsd_properties 112 112 113 113 114 SUBROUTINE autoconversion 114 115 … … 133 134 END SUBROUTINE autoconversion 134 135 136 135 137 SUBROUTINE accretion 136 138 … … 155 157 END SUBROUTINE accretion 156 158 159 157 160 SUBROUTINE selfcollection_breakup 158 161 … … 177 180 END SUBROUTINE selfcollection_breakup 178 181 182 179 183 SUBROUTINE evaporation_rain 180 184 … … 199 203 END SUBROUTINE evaporation_rain 200 204 205 201 206 SUBROUTINE sedimentation_cloud 202 207 … … 221 226 END SUBROUTINE sedimentation_cloud 222 227 228 223 229 SUBROUTINE sedimentation_rain 224 230 … … 240 246 ENDDO 241 247 ENDDO 242 243 248 244 249 END SUBROUTINE sedimentation_rain … … 293 298 END SUBROUTINE dsd_properties_ij 294 299 300 295 301 SUBROUTINE autoconversion_ij( i, j ) 296 302 … … 308 314 REAL :: k_au, autocon, phi_au, tau_cloud, xc, nu_c, rc, & 309 315 l_mix, re_lambda, alpha_cc, r_cc, sigma_cc, epsilon 316 310 317 311 318 k_au = k_cc / ( 20.0 * x0 ) … … 370 377 !-- Tendencies for q, qr, nr, pt: 371 378 tend_qr(k,j,i) = tend_qr(k,j,i) + autocon 372 tend_q(k,j,i) = tend_q(k,j,i) - autocon379 tend_q(k,j,i) = tend_q(k,j,i) - autocon 373 380 tend_nr(k,j,i) = tend_nr(k,j,i) + autocon / x0 * hyrho(k) 374 tend_pt(k,j,i) = tend_pt(k,j,i) + autocon * l_d_cp * pt_d_t(k) 381 tend_pt(k,j,i) = tend_pt(k,j,i) + autocon * l_d_cp * pt_d_t(k) 382 375 383 ENDIF 376 384 … … 378 386 379 387 END SUBROUTINE autoconversion_ij 388 380 389 381 390 SUBROUTINE accretion_ij( i, j ) … … 394 403 395 404 DO k = nzb_2d(j,i)+1, nzt 405 396 406 IF ( ( ql(k,j,i) > 0.0 ) .AND. ( qr(k,j,i) > eps_sb ) ) THEN 397 407 ! … … 422 432 !-- Tendencies for q, qr, pt: 423 433 tend_qr(k,j,i) = tend_qr(k,j,i) + accr 424 tend_q(k,j,i) = tend_q(k,j,i) - accr434 tend_q(k,j,i) = tend_q(k,j,i) - accr 425 435 tend_pt(k,j,i) = tend_pt(k,j,i) + accr * l_d_cp * pt_d_t(k) 436 426 437 ENDIF 438 427 439 ENDDO 428 440 … … 444 456 REAL :: selfcoll, breakup, phi_br, phi_sc 445 457 458 446 459 DO k = nzb_2d(j,i)+1, nzt 460 447 461 IF ( qr(k,j,i) > eps_sb ) THEN 448 462 ! … … 469 483 !-- Tendency for nr: 470 484 tend_nr(k,j,i) = tend_nr(k,j,i) + selfcoll 485 471 486 ENDIF 487 472 488 ENDDO 473 489 474 490 END SUBROUTINE selfcollection_breakup_ij 491 475 492 476 493 SUBROUTINE evaporation_rain_ij( i, j ) … … 492 509 mu_r_2, mu_r_5d2, nr_0 493 510 511 494 512 DO k = nzb_2d(j,i)+1, nzt 513 495 514 IF ( qr(k,j,i) > eps_sb ) THEN 496 515 ! … … 506 525 q_s = q_s * ( 1.0 + alpha * q(k,j,i) ) / ( 1.0 + alpha * q_s ) 507 526 ! 508 !-- Oversaturation:527 !-- Supersaturation: 509 528 sat = MIN( 0.0, ( q(k,j,i) - ql(k,j,i) ) / q_s - 1.0 ) 510 529 ! … … 550 569 evap = 2.0 * pi * nr_0 * g_evap * f_vent * sat / & 551 570 hyrho(k) 552 evap = MAX( evap, -qr(k,j,i) / ( dt_3d * 571 evap = MAX( evap, -qr(k,j,i) / ( dt_3d * & 553 572 weight_substep(intermediate_timestep_count) ) ) 554 573 ! 555 574 !-- Tendencies for q, qr, nr, pt: 556 575 tend_qr(k,j,i) = tend_qr(k,j,i) + evap 557 tend_q(k,j,i) = tend_q(k,j,i) - evap576 tend_q(k,j,i) = tend_q(k,j,i) - evap 558 577 tend_nr(k,j,i) = tend_nr(k,j,i) + c_evap * evap / xr(k) * hyrho(k) 559 578 tend_pt(k,j,i) = tend_pt(k,j,i) + evap * l_d_cp * pt_d_t(k) 579 560 580 ENDIF 581 561 582 ENDDO 562 583 563 584 END SUBROUTINE evaporation_rain_ij 585 564 586 565 587 SUBROUTINE sedimentation_cloud_ij( i, j ) … … 575 597 INTEGER :: i, j, k 576 598 REAL :: sed_q_const, sigma_gc = 1.3, k_st = 1.2E8 599 577 600 ! 578 601 !-- Sedimentation of cloud droplets (Heus et al., 2010): … … 589 612 ENDDO 590 613 ! 591 !-- Tendency for q, pt:614 !-- Tendency for q, pt: 592 615 DO k = nzb_2d(j,i)+1, nzt 593 616 tend_q(k,j,i) = tend_q(k,j,i) + ( sed_q(k+1) - sed_q(k) ) * & … … 599 622 END SUBROUTINE sedimentation_cloud_ij 600 623 624 601 625 SUBROUTINE sedimentation_rain_ij( i, j ) 602 626 … … 618 642 !-- Computation of sedimentation flux. Implementation according to Stevens 619 643 !-- and Seifert (2008). 620 621 644 IF ( intermediate_timestep_count == 1 ) prr(:,j,i) = 0.0 622 645 … … 678 701 679 702 ELSE 703 680 704 nr_slope = 0.0 681 705 qr_slope = 0.0 706 682 707 ENDIF 683 708 ! … … 715 740 k_run = k 716 741 c_run = MIN( 1.0, c_qr(k) ) 742 717 743 DO WHILE ( c_run > 0.0 .AND. k_run <= nzt-1 ) 744 718 745 flux = flux + hyrho(k_run) * & 719 746 ( qr(k_run,j,i) + qr_slope(k_run) * ( 1.0 - c_run ) * & … … 722 749 k_run = k_run + 1 723 750 c_run = MIN( 1.0, c_qr(k_run) - z_run * ddzu(k_run) ) 751 724 752 ENDDO 725 753 ! … … 748 776 749 777 END SUBROUTINE sedimentation_rain_ij 778 750 779 751 780 ! … … 761 790 REAL :: gamm, ser, tmp, x_gamm, xx, y_gamm 762 791 INTEGER :: j 792 763 793 764 794 x_gamm = xx … … 767 797 tmp = ( x_gamm + 0.5 ) * LOG( tmp ) - tmp 768 798 ser = 1.000000000190015 769 do j = 1, 6 799 800 DO j = 1, 6 770 801 y_gamm = y_gamm + 1.0 771 802 ser = ser + cof( j ) / y_gamm 772 enddo 803 ENDDO 804 773 805 ! 774 806 !-- Until this point the algorithm computes the logarithm of the gamma … … 776 808 ! gamm = EXP( tmp + LOG( stp * ser / x_gamm ) ) 777 809 gamm = EXP( tmp ) * stp * ser / x_gamm 810 778 811 RETURN 779 812 -
palm/trunk/SOURCE/modules.f90
r1096 r1106 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! test: different dpk definition for CUDA FFT 22 ! test: different dpk definition for CUDA FFT removed!!!! delete this comment 23 ! before next check in 24 ! array_kind renamed precision_kind, pdims defined in serial code 25 ! bugfix: default value assigned to coupling_start_time 23 26 ! 24 27 ! Former revisions: … … 366 369 367 370 368 MODULE array_kind371 MODULE precision_kind 369 372 370 373 !------------------------------------------------------------------------------! … … 378 381 spk = SELECTED_REAL_KIND( 6 ) 379 382 380 !-- test for CUDA FFT 381 ! INTEGER, PARAMETER :: single_kind = KIND( 0.0 ), & 382 ! double_kind = KIND( 0.0D0 ) 383 384 ! INTEGER, PARAMETER :: dpk = double_kind 385 386 SAVE 387 388 END MODULE array_kind 383 SAVE 384 385 END MODULE precision_kind 389 386 390 387 … … 399 396 !------------------------------------------------------------------------------! 400 397 401 USE array_kind398 USE precision_kind 402 399 403 400 REAL, DIMENSION(:), ALLOCATABLE :: & … … 781 778 canyon_wall_left = 9999999.9, canyon_wall_south = 9999999.9, & 782 779 cthf = 0.0, cfl_factor = -1.0, cos_alpha_surface, & 783 coupling_start_time , disturbance_amplitude = 0.25, &780 coupling_start_time = 0.0, disturbance_amplitude = 0.25, & 784 781 disturbance_energy_limit = 0.01, & 785 782 disturbance_level_b = -9999999.9, & … … 1367 1364 !------------------------------------------------------------------------------! 1368 1365 1369 USE array_kind1366 USE precision_kind 1370 1367 1371 1368 CHARACTER (LEN=15) :: bc_par_lr = 'cyclic', bc_par_ns = 'cyclic', & … … 1475 1472 #endif 1476 1473 #endif 1477 CHARACTER(LEN=5) :: myid_char = '' 1478 INTEGER :: acc_rank, id_inflow = 0, id_recycling = 0, & 1479 myid = 0, num_acc_per_node = 0, & 1480 target_id, npex = -1, npey = -1, numprocs = 1, & 1481 numprocs_previous_run = -1, & 1482 tasks_per_node = -9999, threads_per_task = 1 1474 CHARACTER(LEN=5) :: myid_char = '' 1475 INTEGER :: acc_rank, id_inflow = 0, id_recycling = 0, & 1476 myid = 0, num_acc_per_node = 0, & 1477 target_id, npex = -1, npey = -1, numprocs = 1, & 1478 numprocs_previous_run = -1, & 1479 tasks_per_node = -9999, threads_per_task = 1 1480 1481 INTEGER :: pdims(2) = 1 1483 1482 1484 1483 INTEGER, DIMENSION(:,:), ALLOCATABLE :: hor_index_bounds, & … … 1498 1497 type_x, type_x_int, type_xy, type_y, type_y_int 1499 1498 1500 INTEGER :: ibuf(12), pcoord(2) , pdims(2)1499 INTEGER :: ibuf(12), pcoord(2) 1501 1500 1502 1501 #if ! defined ( __check ) -
palm/trunk/SOURCE/poisfft.f90
r1104 r1106 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! routines fftx, ffty, fftxp, fftyp removed, calls replaced by fft_x, fft_y, 23 ! in the 1D-decomposition routines fft_x, ffty are replaced by fft_x_1d, 24 ! fft_y_1d 23 25 ! 24 26 ! Former revisions: … … 237 239 238 240 CALL cpu_log( log_point_s(4), 'fft_x', 'start' ) 239 CALL fft xp( ar, 'forward' )241 CALL fft_x( ar, 'forward' ) 240 242 CALL cpu_log( log_point_s(4), 'fft_x', 'pause' ) 241 243 … … 247 249 248 250 CALL cpu_log( log_point_s(7), 'fft_y', 'start' ) 249 CALL fft yp( ar, 'forward' )251 CALL fft_y( ar, 'forward' ) 250 252 CALL cpu_log( log_point_s(7), 'fft_y', 'pause' ) 251 253 … … 257 259 258 260 ! 259 !-- Solve the Poisson equation in z-direction in cartesian space.261 !-- Solve the tridiagonal equation system along z 260 262 CALL cpu_log( log_point_s(6), 'tridia', 'start' ) 261 263 CALL tridia( ar ) … … 270 272 271 273 CALL cpu_log( log_point_s(7), 'fft_y', 'continue' ) 272 CALL fft yp( ar, 'backward' )274 CALL fft_y( ar, 'backward' ) 273 275 CALL cpu_log( log_point_s(7), 'fft_y', 'stop' ) 274 276 … … 280 282 281 283 CALL cpu_log( log_point_s(4), 'fft_x', 'continue' ) 282 CALL fft xp( ar, 'backward' )284 CALL fft_x( ar, 'backward' ) 283 285 CALL cpu_log( log_point_s(4), 'fft_x', 'stop' ) 284 286 … … 295 297 ! 296 298 !-- Two-dimensional Fourier Transformation along x- and y-direction. 299 CALL cpu_log( log_point_s(5), 'transpo forward', 'start' ) 300 !$acc data copyin( ar, work ) 301 CALL transpose_zx( ar, work, ar ) 302 !$acc update host( ar ) 303 CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' ) 304 305 297 306 CALL cpu_log( log_point_s(4), 'fft_x', 'start' ) 298 CALL fft x( ar, 'forward' )307 CALL fft_x( ar, 'forward' ) 299 308 CALL cpu_log( log_point_s(4), 'fft_x', 'pause' ) 309 310 CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' ) 311 CALL transpose_xy( ar, work, ar ) 312 CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' ) 313 300 314 CALL cpu_log( log_point_s(7), 'fft_y', 'start' ) 301 CALL fft y( ar, 'forward' )315 CALL fft_y( ar, 'forward' ) 302 316 CALL cpu_log( log_point_s(7), 'fft_y', 'pause' ) 303 317 304 318 ! 305 !-- Solve the Poisson equation in z-direction in cartesian space. 319 !-- Solve the tridiagonal equation system along z 320 CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' ) 321 CALL transpose_yz( ar, work, ar ) 322 CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' ) 323 306 324 CALL cpu_log( log_point_s(6), 'tridia', 'start' ) 307 325 CALL tridia( ar ) 308 326 CALL cpu_log( log_point_s(6), 'tridia', 'stop' ) 309 327 328 CALL cpu_log( log_point_s(8), 'transpo invers', 'start' ) 329 CALL transpose_zy( ar, work, ar ) 330 CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' ) 331 310 332 ! 311 333 !-- Inverse Fourier Transformation. 312 334 CALL cpu_log( log_point_s(7), 'fft_y', 'continue' ) 313 CALL fft y( ar, 'backward' )335 CALL fft_y( ar, 'backward' ) 314 336 CALL cpu_log( log_point_s(7), 'fft_y', 'stop' ) 337 338 CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' ) 339 CALL transpose_yx( ar, work, ar ) 340 CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' ) 341 315 342 CALL cpu_log( log_point_s(4), 'fft_x', 'continue' ) 316 CALL fft x( ar, 'backward' )343 CALL fft_x( ar, 'backward' ) 317 344 CALL cpu_log( log_point_s(4), 'fft_x', 'stop' ) 345 346 CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' ) 347 CALL transpose_xz( ar, work, ar ) 348 CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' ) 349 350 !$acc end data 318 351 319 352 #endif … … 346 379 REAL, DIMENSION(5,nxl_z:nxr_z,0:nz-1) :: tri 347 380 348 #if defined( __parallel )349 381 REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz) 350 #else351 REAL :: ar(1:nz,nys_z:nyn_z,nxl_z:nxr_z)352 #endif353 382 354 383 … … 494 523 !-- Forward substitution. 495 524 DO i = nxl_z, nxr_z 496 #if defined( __parallel )497 525 ar1(i,0) = ar(i,j,1) 498 #else499 ar1(i,0) = ar(1,j,i)500 #endif501 526 ENDDO 502 527 DO k = 1, nz - 1 503 528 DO i = nxl_z, nxr_z 504 #if defined( __parallel )505 529 ar1(i,k) = ar(i,j,k+1) - tri(5,i,k) * ar1(i,k-1) 506 #else507 ar1(i,k) = ar(k+1,j,i) - tri(5,i,k) * ar1(i,k-1)508 #endif509 530 ENDDO 510 531 ENDDO … … 516 537 !-- the model domain. 517 538 DO i = nxl_z, nxr_z 518 #if defined( __parallel )519 539 ar(i,j,nz) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 ) 520 #else521 ar(nz,j,i) = ar1(i,nz-1) / ( tri(4,i,nz-1) + 1.0E-20 )522 #endif523 540 ENDDO 524 541 DO k = nz-2, 0, -1 525 542 DO i = nxl_z, nxr_z 526 #if defined( __parallel )527 543 ar(i,j,k+1) = ( ar1(i,k) - tri(3,i,k) * ar(i,j,k+2) ) & 528 544 / tri(4,i,k) 529 #else530 ar(k+1,j,i) = ( ar1(i,k) - tri(3,i,k) * ar(k+2,j,i) ) &531 / tri(4,i,k)532 #endif533 545 ENDDO 534 546 ENDDO … … 540 552 IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN 541 553 IF ( j == 0 .AND. nxl_z == 0 ) THEN 542 #if defined( __parallel )543 554 DO k = 1, nz 544 555 ar(nxl_z,j,k) = 0.0 545 556 ENDDO 546 #else547 DO k = 1, nz548 ar(k,j,nxl_z) = 0.0549 ENDDO550 #endif551 557 ENDIF 552 558 ENDIF … … 581 587 END SUBROUTINE tridia 582 588 583 584 #if defined( __parallel )585 SUBROUTINE fftxp( ar, direction )586 587 !------------------------------------------------------------------------------!588 ! Fourier-transformation along x-direction Parallelized version589 !------------------------------------------------------------------------------!590 591 IMPLICIT NONE592 593 CHARACTER (LEN=*) :: direction594 INTEGER :: j, k595 REAL :: ar(0:nx,nys_x:nyn_x,nzb_x:nzt_x)596 597 !598 !-- Performing the fft with one of the methods implemented599 !$OMP PARALLEL PRIVATE ( j, k )600 !$OMP DO601 DO k = nzb_x, nzt_x602 DO j = nys_x, nyn_x603 CALL fft_x( ar(0:nx,j,k), direction )604 ENDDO605 ENDDO606 !$OMP END PARALLEL607 608 END SUBROUTINE fftxp609 610 #else611 SUBROUTINE fftx( ar, direction )612 613 !------------------------------------------------------------------------------!614 ! Fourier-transformation along x-direction Non parallel version615 !------------------------------------------------------------------------------!616 617 IMPLICIT NONE618 619 CHARACTER (LEN=*) :: direction620 INTEGER :: i, j, k621 REAL :: ar(1:nz,0:ny,0:nx)622 623 !624 !-- Performing the fft with one of the methods implemented625 !$OMP PARALLEL PRIVATE ( j, k )626 !$OMP DO627 DO k = 1, nz628 DO j = 0, ny629 CALL fft_x( ar(k,j,0:nx), direction )630 ENDDO631 ENDDO632 !$OMP END PARALLEL633 634 END SUBROUTINE fftx635 #endif636 637 638 #if defined( __parallel )639 SUBROUTINE fftyp( ar, direction )640 641 !------------------------------------------------------------------------------!642 ! Fourier-transformation along y-direction Parallelized version643 !------------------------------------------------------------------------------!644 645 IMPLICIT NONE646 647 CHARACTER (LEN=*) :: direction648 INTEGER :: i, k649 REAL :: ar(0:ny,nxl_y:nxr_y,nzb_y:nzt_y)650 651 !652 !-- Performing the fft with one of the methods implemented653 !$OMP PARALLEL PRIVATE ( i, k )654 !$OMP DO655 DO k = nzb_y, nzt_y656 DO i = nxl_y, nxr_y657 CALL fft_y( ar(0:ny,i,k), direction )658 ENDDO659 ENDDO660 !$OMP END PARALLEL661 662 END SUBROUTINE fftyp663 664 #else665 SUBROUTINE ffty( ar, direction )666 667 !------------------------------------------------------------------------------!668 ! Fourier-transformation along y-direction Non parallel version669 !------------------------------------------------------------------------------!670 671 IMPLICIT NONE672 673 CHARACTER (LEN=*) :: direction674 INTEGER :: i, k675 REAL :: ar(1:nz,0:ny,0:nx)676 677 !678 !-- Performing the fft with one of the methods implemented679 !$OMP PARALLEL PRIVATE ( i, k )680 !$OMP DO681 DO k = 1, nz682 DO i = 0, nx683 CALL fft_y( ar(k,0:ny,i), direction )684 ENDDO685 ENDDO686 !$OMP END PARALLEL687 688 END SUBROUTINE ffty689 #endif690 589 691 590 #if defined( __parallel ) … … 729 628 !-- 1d-decomposition along x. Resort the data in a way that x becomes 730 629 !-- the first index. 731 CALL cpu_log( log_point_s(7), 'fft_y ', 'start' )630 CALL cpu_log( log_point_s(7), 'fft_y_1d', 'start' ) 732 631 733 632 IF ( host(1:3) == 'nec' ) THEN … … 782 681 ! 783 682 !-- FFT along y 784 CALL fft_y ( work_ffty(:,ir), 'forward' )683 CALL fft_y_1d( work_ffty(:,ir), 'forward' ) 785 684 786 685 ENDDO … … 800 699 801 700 ENDIF 802 CALL cpu_log( log_point_s(7), 'fft_y ', 'pause' )701 CALL cpu_log( log_point_s(7), 'fft_y_1d', 'pause' ) 803 702 804 703 ! … … 853 752 !-- Resort the data in a way that y becomes the first index and carry out the 854 753 !-- backward fft along y. 855 CALL cpu_log( log_point_s(7), 'fft_y ', 'continue' )754 CALL cpu_log( log_point_s(7), 'fft_y_1d', 'continue' ) 856 755 857 756 IF ( host(1:3) == 'nec' ) THEN … … 910 809 !-- FFT along y 911 810 ir = i-iouter+1 ! counter within a stride 912 CALL fft_y ( work_ffty(:,ir), 'backward' )811 CALL fft_y_1d( work_ffty(:,ir), 'backward' ) 913 812 914 813 DO j = 0, ny … … 924 823 ENDIF 925 824 926 CALL cpu_log( log_point_s(7), 'fft_y ', 'stop' )825 CALL cpu_log( log_point_s(7), 'fft_y_1d', 'stop' ) 927 826 928 827 END SUBROUTINE tr_xy_ffty … … 958 857 959 858 960 CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )859 CALL cpu_log( log_point_s(33), 'fft_x_1d + tridia', 'start' ) 961 860 962 861 ALLOCATE( tri(5,0:nx,0:nz-1,0:threads_per_task-1) ) … … 998 897 ENDDO 999 898 1000 CALL fft_x ( work_fftx, 'forward' )899 CALL fft_x_1d( work_fftx, 'forward' ) 1001 900 1002 901 DO i = 0, nx … … 1038 937 ENDDO 1039 938 1040 CALL fft_x ( work_fftx, 'backward' )939 CALL fft_x_1d( work_fftx, 'backward' ) 1041 940 1042 941 m = 0 … … 1056 955 DEALLOCATE( tri ) 1057 956 1058 CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )957 CALL cpu_log( log_point_s(33), 'fft_x_1d + tridia', 'stop' ) 1059 958 1060 959 END SUBROUTINE fftx_tri_fftx … … 1092 991 !-- 1d-decomposition along y. Resort the data in a way that y becomes 1093 992 !-- the first index. 1094 CALL cpu_log( log_point_s(4), 'fft_x ', 'start' )993 CALL cpu_log( log_point_s(4), 'fft_x_1d', 'start' ) 1095 994 1096 995 IF ( host(1:3) == 'nec' ) THEN … … 1144 1043 DO k = 1, nz 1145 1044 1146 CALL fft_x ( work_fftx(0:nx,k,j), 'forward' )1045 CALL fft_x_1d( work_fftx(0:nx,k,j), 'forward' ) 1147 1046 1148 1047 DO i = 0, nx … … 1155 1054 1156 1055 ENDIF 1157 CALL cpu_log( log_point_s(4), 'fft_x ', 'pause' )1056 CALL cpu_log( log_point_s(4), 'fft_x_1d', 'pause' ) 1158 1057 1159 1058 ! … … 1205 1104 !-- 1d-decomposition along y. Resort the data in a way that y becomes 1206 1105 !-- the first index. 1207 CALL cpu_log( log_point_s(4), 'fft_x ', 'continue' )1106 CALL cpu_log( log_point_s(4), 'fft_x_1d', 'continue' ) 1208 1107 1209 1108 IF ( host(1:3) == 'nec' ) THEN … … 1248 1147 ENDDO 1249 1148 1250 CALL fft_x ( work_fftx(0:nx,k,j), 'backward' )1149 CALL fft_x_1d( work_fftx(0:nx,k,j), 'backward' ) 1251 1150 1252 1151 ENDDO … … 1264 1163 1265 1164 ENDIF 1266 CALL cpu_log( log_point_s(4), 'fft_x ', 'stop' )1165 CALL cpu_log( log_point_s(4), 'fft_x_1d', 'stop' ) 1267 1166 1268 1167 END SUBROUTINE tr_yx_fftx … … 1298 1197 1299 1198 1300 CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'start' )1199 CALL cpu_log( log_point_s(39), 'fft_y_1d + tridia', 'start' ) 1301 1200 1302 1201 ALLOCATE( tri(5,0:ny,0:nz-1,0:threads_per_task-1) ) … … 1338 1237 ENDDO 1339 1238 1340 CALL fft_y ( work_ffty, 'forward' )1239 CALL fft_y_1d( work_ffty, 'forward' ) 1341 1240 1342 1241 DO j = 0, ny … … 1378 1277 ENDDO 1379 1278 1380 CALL fft_y ( work_ffty, 'backward' )1279 CALL fft_y_1d( work_ffty, 'backward' ) 1381 1280 1382 1281 m = 0 … … 1396 1295 DEALLOCATE( tri ) 1397 1296 1398 CALL cpu_log( log_point_s(39), 'fft_y + tridia', 'stop' )1297 CALL cpu_log( log_point_s(39), 'fft_y_1d + tridia', 'stop' ) 1399 1298 1400 1299 END SUBROUTINE ffty_tri_ffty -
palm/trunk/SOURCE/poisfft_hybrid.f90
r1037 r1106 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! calls of fft_x, fft_y replaced by fft_x_1d, fft_y_1d 23 23 ! 24 24 ! Former revisions: … … 334 334 CALL cpu_log( log_point_s(30), 'poisfft_hybrid_omp', 'start' ) 335 335 336 CALL cpu_log( log_point_s(7), 'fft_y ', 'start' )336 CALL cpu_log( log_point_s(7), 'fft_y_1d', 'start' ) 337 337 338 338 !$OMP PARALLEL PRIVATE (i,iouter,ii,ir,iei,j,k,m,n,ffty_ar) … … 353 353 ENDDO 354 354 355 CALL fft_y ( ffty_ar(:,ir), 'forward' )355 CALL fft_y_1d( ffty_ar(:,ir), 'forward' ) 356 356 ENDDO 357 357 … … 371 371 !$OMP END PARALLEL 372 372 373 CALL cpu_log( log_point_s(7), 'fft_y ', 'pause' )373 CALL cpu_log( log_point_s(7), 'fft_y_1d', 'pause' ) 374 374 375 375 #if defined( __parallel ) … … 385 385 #endif 386 386 387 CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )387 CALL cpu_log( log_point_s(33), 'fft_x_1d + tridia', 'start' ) 388 388 389 389 #if defined( __KKMP ) … … 406 406 ENDDO 407 407 408 CALL fft_x ( fftx_ar, 'forward' )408 CALL fft_x_1d( fftx_ar, 'forward' ) 409 409 410 410 DO i = nxl_a, nxr_a … … 422 422 ENDDO 423 423 424 CALL fft_x ( fftx_ar, 'backward' )424 CALL fft_x_1d( fftx_ar, 'backward' ) 425 425 426 426 m = nxl_a … … 438 438 #endif 439 439 440 CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )440 CALL cpu_log( log_point_s(33), 'fft_x_1d + tridia', 'stop' ) 441 441 442 442 #if defined( __parallel ) … … 453 453 #endif 454 454 455 CALL cpu_log( log_point_s(7), 'fft_y ', 'continue' )455 CALL cpu_log( log_point_s(7), 'fft_y_1d', 'continue' ) 456 456 457 457 !$OMP PARALLEL PRIVATE (i,iouter,ii,ir,iei,j,k,m,n,ffty_ar) … … 475 475 ii = nxl + i 476 476 ir = i - iouter + 1 477 CALL fft_y ( ffty_ar(:,ir), 'backward' )477 CALL fft_y_1d( ffty_ar(:,ir), 'backward' ) 478 478 479 479 DO j = nys_a, nyn_a … … 486 486 !$OMP END PARALLEL 487 487 488 CALL cpu_log( log_point_s(7), 'fft_y ', 'stop' )488 CALL cpu_log( log_point_s(7), 'fft_y_1d', 'stop' ) 489 489 490 490 CALL cpu_log( log_point_s(30), 'poisfft_hybrid_omp', 'stop' ) … … 702 702 CALL cpu_log( log_point_s(30), 'poisfft_hybrid_nodes', 'start' ) 703 703 704 CALL cpu_log( log_point_s(7), 'fft_y ', 'start' )704 CALL cpu_log( log_point_s(7), 'fft_y_1d', 'start' ) 705 705 706 706 ! … … 719 719 ENDDO 720 720 721 CALL fft_y ( ffty_ar(:,ir), 'forward' )721 CALL fft_y_1d( ffty_ar(:,ir), 'forward' ) 722 722 ENDDO 723 723 … … 738 738 ENDDO 739 739 740 CALL cpu_log( log_point_s(7), 'fft_y ', 'pause' )740 CALL cpu_log( log_point_s(7), 'fft_y_1d', 'pause' ) 741 741 742 742 CALL cpu_log( log_point_s(32), 'alltoall_task', 'start' ) … … 767 767 CALL cascade( 2, j, nys_p, nyn_p ) 768 768 769 CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )769 CALL cpu_log( log_point_s(33), 'fft_x_1d + tridia', 'start' ) 770 770 DO k = 1, nz 771 771 … … 780 780 ENDDO 781 781 782 CALL fft_x ( fftx_ar, 'forward' )782 CALL fft_x_1d( fftx_ar, 'forward' ) 783 783 784 784 DO i = nxl_a, nxr_a … … 796 796 ENDDO 797 797 798 CALL fft_x ( fftx_ar, 'backward' )798 CALL fft_x_1d( fftx_ar, 'backward' ) 799 799 800 800 m = nxl_a … … 809 809 ENDDO 810 810 811 CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )811 CALL cpu_log( log_point_s(33), 'fft_x_1d + tridia', 'stop' ) 812 812 nw2 = nw1 * SIZE( work1, 3 ) 813 813 CALL cpu_log( log_point_s(37), 'alltoall_node', 'continue' ) … … 833 833 CALL cpu_log( log_point_s(32), 'alltoall_task', 'stop' ) 834 834 835 CALL cpu_log( log_point_s(7), 'fft_y ', 'continue' )835 CALL cpu_log( log_point_s(7), 'fft_y_1d', 'continue' ) 836 836 837 837 DO iouter = nxl_p, nxr_p, istride … … 855 855 ii = nxl + i 856 856 ir = i - iouter + 1 857 CALL fft_y ( ffty_ar(:,ir), 'backward' )857 CALL fft_y_1d( ffty_ar(:,ir), 'backward' ) 858 858 859 859 DO j = nys_a, nyn_a … … 865 865 ENDDO 866 866 867 CALL cpu_log( log_point_s(7), 'fft_y ', 'stop' )867 CALL cpu_log( log_point_s(7), 'fft_y_1d', 'stop' ) 868 868 869 869 CALL cpu_log( log_point_s(30), 'poisfft_hybrid_nodes', 'stop' ) -
palm/trunk/SOURCE/prognostic_equations.f90
r1093 r1106 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! small changes in code formatting 23 23 ! 24 24 ! Former revisions: … … 418 418 419 419 ! 420 !-- If required, calculate tendencies for total water content, rain water 421 !-- content, rain drop concentration and liquid temperature 422 423 IF ( cloud_physics .AND. icloud_scheme == 0 ) THEN 424 425 tend_q(:,j,i) = 0.0 426 tend_qr(:,j,i) = 0.0 427 tend_nr(:,j,i) = 0.0 428 tend_pt(:,j,i) = 0.0 429 ! 430 !-- Droplet size distribution (dsd) properties are needed for the 431 !-- computation of selfcollection, breakup, evaporation and 432 !-- sedimentation of rain. 433 IF ( precipitation ) THEN 434 CALL dsd_properties( i,j ) 435 CALL autoconversion( i,j ) 436 CALL accretion( i,j ) 437 CALL selfcollection_breakup( i,j ) 438 CALL evaporation_rain( i,j ) 439 CALL sedimentation_rain( i,j ) 440 ENDIF 441 442 IF ( drizzle ) CALL sedimentation_cloud( i,j ) 443 444 ENDIF 420 !-- If required, calculate tendencies for total water content, rain water 421 !-- content, rain drop concentration and liquid temperature 422 IF ( cloud_physics .AND. icloud_scheme == 0 ) THEN 423 424 tend_q(:,j,i) = 0.0 425 tend_qr(:,j,i) = 0.0 426 tend_nr(:,j,i) = 0.0 427 tend_pt(:,j,i) = 0.0 428 ! 429 !-- Droplet size distribution (dsd) properties are needed for the 430 !-- computation of selfcollection, breakup, evaporation and 431 !-- sedimentation of rain 432 IF ( precipitation ) THEN 433 CALL dsd_properties( i,j ) 434 CALL autoconversion( i,j ) 435 CALL accretion( i,j ) 436 CALL selfcollection_breakup( i,j ) 437 CALL evaporation_rain( i,j ) 438 CALL sedimentation_rain( i,j ) 439 ENDIF 440 441 IF ( drizzle ) CALL sedimentation_cloud( i,j ) 442 443 ENDIF 445 444 446 445 ! … … 469 468 ENDIF 470 469 470 ! 471 471 !-- Using microphysical tendencies (latent heat) 472 472 IF ( cloud_physics ) THEN 473 473 IF ( icloud_scheme == 0 ) THEN 474 474 tend(:,j,i) = tend(:,j,i) + tend_pt(:,j,i) 475 ELSEIF ( icloud_scheme == 1 .AND. precipitation) THEN475 ELSEIF ( icloud_scheme == 1 .AND. precipitation ) THEN 476 476 CALL impact_of_latent_heat( i, j ) 477 477 ENDIF … … 480 480 ! 481 481 !-- Consideration of heat sources within the plant canopy 482 IF ( plant_canopy .AND. ( cthf /= 0.0 ) )THEN482 IF ( plant_canopy .AND. cthf /= 0.0 ) THEN 483 483 CALL plant_canopy_model( i, j, 4 ) 484 484 ENDIF 485 485 486 486 ! 487 !-- If required, compute influenceof large-scale subsidence/ascent487 !-- If required, compute effect of large-scale subsidence/ascent 488 488 IF ( large_scale_subsidence ) THEN 489 489 CALL subsidence( i, j, tend, pt, pt_init ) 490 490 ENDIF 491 492 491 493 492 CALL user_actions( i, j, 'pt-tendency' ) … … 600 599 IF ( icloud_scheme == 0 ) THEN 601 600 tend(:,j,i) = tend(:,j,i) + tend_q(:,j,i) 602 ELSEIF ( icloud_scheme == 1 .AND.precipitation ) THEN601 ELSEIF ( icloud_scheme == 1 .AND. precipitation ) THEN 603 602 CALL calc_precipitation( i, j ) 604 603 ENDIF … … 606 605 ! 607 606 !-- Sink or source of scalar concentration due to canopy elements 608 IF ( plant_canopy ) CALL plant_canopy_model( i, j, 5 )607 IF ( plant_canopy ) CALL plant_canopy_model( i, j, 5 ) 609 608 610 609 ! … … 645 644 !-- If required, calculate prognostic equations for rain water content 646 645 !-- and rain drop concentration 647 IF ( cloud_physics .AND.icloud_scheme == 0 ) THEN646 IF ( cloud_physics .AND. icloud_scheme == 0 ) THEN 648 647 ! 649 648 !-- Calculate prognostic equation for rain water content … … 706 705 IF ( timestep_scheme(1:5) == 'runge' ) THEN 707 706 IF ( ws_scheme_sca ) THEN 708 CALL advec_s_ws( i, j, nr, 'nr', flux_s_nr, &709 diss_s_nr, flux_l_nr, diss_l_nr, &710 i_omp_start, tn )707 CALL advec_s_ws( i, j, nr, 'nr', flux_s_nr, & 708 diss_s_nr, flux_l_nr, diss_l_nr, & 709 i_omp_start, tn ) 711 710 ELSE 712 711 CALL advec_s_pw( i, j, nr ) -
palm/trunk/SOURCE/transpose.f90
r1093 r1106 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! preprocessor lines rearranged so that routines can also be used in serial 23 ! (non-parallel) mode 23 24 ! 24 25 ! Former revisions: … … 84 85 work(nnx*nny*nnz) 85 86 86 #if defined( __parallel )87 88 87 ! 89 88 !-- Rearrange indices of input array in order to make data to be send … … 100 99 !$OMP END PARALLEL 101 100 102 ! 103 !-- Transpose array 104 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 105 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 106 CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 107 work(1), sendrecvcount_xy, MPI_REAL, & 108 comm1dy, ierr ) 109 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 110 111 ! 112 !-- Reorder transposed array 101 IF ( numprocs /= 1 ) THEN 102 103 #if defined( __parallel ) 104 ! 105 !-- Transpose array 106 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 107 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 108 CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 109 work(1), sendrecvcount_xy, MPI_REAL, & 110 comm1dy, ierr ) 111 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 112 113 ! 114 !-- Reorder transposed array 113 115 !$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys ) 114 116 !$OMP DO 115 DO l = 0, pdims(2) - 1 116 m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * & 117 ( nyn_x - nys_x + 1 ) 118 ys = 0 + l * ( nyn_x - nys_x + 1 ) 119 DO i = nxl_y, nxr_y 120 DO k = nzb_y, nzt_y 121 DO j = ys, ys + nyn_x - nys_x 122 m = m + 1 123 f_out(j,i,k) = work(m) 124 ENDDO 125 ENDDO 126 ENDDO 127 ENDDO 128 !$OMP END PARALLEL 129 117 DO l = 0, pdims(2) - 1 118 m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * & 119 ( nyn_x - nys_x + 1 ) 120 ys = 0 + l * ( nyn_x - nys_x + 1 ) 121 DO i = nxl_y, nxr_y 122 DO k = nzb_y, nzt_y 123 DO j = ys, ys + nyn_x - nys_x 124 m = m + 1 125 f_out(j,i,k) = work(m) 126 ENDDO 127 ENDDO 128 ENDDO 129 ENDDO 130 !$OMP END PARALLEL 130 131 #endif 132 133 ELSE 134 135 ! 136 !-- Reorder transposed array 137 !$OMP PARALLEL PRIVATE ( i, j, k ) 138 !$OMP DO 139 DO k = nzb_y, nzt_y 140 DO i = nxl_y, nxr_y 141 DO j = 0, ny 142 f_out(j,i,k) = f_inv(j,k,i) 143 ENDDO 144 ENDDO 145 ENDDO 146 !$OMP END PARALLEL 147 148 ENDIF 131 149 132 150 END SUBROUTINE transpose_xy … … 158 176 work(nnx*nny*nnz) 159 177 160 #if defined( __parallel )161 178 162 179 ! … … 164 181 !-- reordered locally and therefore no transposition has to be done. 165 182 IF ( pdims(1) /= 1 ) THEN 183 184 #if defined( __parallel ) 166 185 ! 167 186 !-- Reorder input array for transposition … … 203 222 ENDDO 204 223 !$OMP END PARALLEL 224 #endif 225 205 226 ELSE 227 206 228 ! 207 229 !-- Reorder the array in a way that the z index is in first position … … 229 251 230 252 ENDIF 231 232 233 #endif234 253 235 254 END SUBROUTINE transpose_xz … … 261 280 work(nnx*nny*nnz) 262 281 282 IF ( numprocs /= 1 ) THEN 283 263 284 #if defined( __parallel ) 264 265 ! 266 !-- Reorder input array for transposition 285 ! 286 !-- Reorder input array for transposition 267 287 !$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys ) 268 288 !$OMP DO 269 DO l = 0, pdims(2) - 1 270 m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * & 271 ( nyn_x - nys_x + 1 ) 272 ys = 0 + l * ( nyn_x - nys_x + 1 ) 289 DO l = 0, pdims(2) - 1 290 m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * & 291 ( nyn_x - nys_x + 1 ) 292 ys = 0 + l * ( nyn_x - nys_x + 1 ) 293 DO i = nxl_y, nxr_y 294 DO k = nzb_y, nzt_y 295 DO j = ys, ys + nyn_x - nys_x 296 m = m + 1 297 work(m) = f_in(j,i,k) 298 ENDDO 299 ENDDO 300 ENDDO 301 ENDDO 302 !$OMP END PARALLEL 303 304 ! 305 !-- Transpose array 306 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 307 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 308 CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, & 309 f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 310 comm1dy, ierr ) 311 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 312 #endif 313 314 ELSE 315 316 ! 317 !-- Reorder array f_in the same way as ALLTOALL did it 318 !$OMP PARALLEL PRIVATE ( i, j, k ) 319 !$OMP DO 273 320 DO i = nxl_y, nxr_y 274 321 DO k = nzb_y, nzt_y 275 DO j = ys, ys + nyn_x - nys_x 276 m = m + 1 277 work(m) = f_in(j,i,k) 278 ENDDO 279 ENDDO 280 ENDDO 281 ENDDO 282 !$OMP END PARALLEL 283 284 ! 285 !-- Transpose array 286 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 287 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 288 CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, & 289 f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 290 comm1dy, ierr ) 291 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 322 DO j = 0, ny 323 f_inv(j,k,i) = f_in(j,i,k) 324 ENDDO 325 ENDDO 326 ENDDO 327 !$OMP END PARALLEL 328 329 ENDIF 292 330 293 331 ! … … 303 341 ENDDO 304 342 !$OMP END PARALLEL 305 306 #endif307 343 308 344 END SUBROUTINE transpose_yx … … 402 438 work(nnx*nny*nnz) 403 439 404 #if defined( __parallel )405 406 440 ! 407 441 !-- Rearrange indices of input array in order to make data to be send … … 424 458 !-- of the data is necessary and no transposition has to be done. 425 459 IF ( pdims(1) == 1 ) THEN 460 426 461 !$OMP PARALLEL PRIVATE ( i, j, k ) 427 462 !$OMP DO … … 434 469 ENDDO 435 470 !$OMP END PARALLEL 436 RETURN 437 ENDIF 438 439 ! 440 !-- Transpose array 441 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 442 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 443 CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 444 work(1), sendrecvcount_yz, MPI_REAL, & 445 comm1dx, ierr ) 446 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 447 448 ! 449 !-- Reorder transposed array 471 472 ELSE 473 474 #if defined( __parallel ) 475 ! 476 !-- Transpose array 477 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 478 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 479 CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 480 work(1), sendrecvcount_yz, MPI_REAL, & 481 comm1dx, ierr ) 482 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 483 484 ! 485 !-- Reorder transposed array 450 486 !$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs ) 451 487 !$OMP DO 452 DO l = 0, pdims(1) - 1 453 m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * & 454 ( nxr_z - nxl_z + 1 ) 455 zs = 1 + l * ( nzt_y - nzb_y + 1 ) 456 DO j = nys_z, nyn_z 457 DO k = zs, zs + nzt_y - nzb_y 458 DO i = nxl_z, nxr_z 459 m = m + 1 460 f_out(i,j,k) = work(m) 461 ENDDO 462 ENDDO 463 ENDDO 464 ENDDO 465 !$OMP END PARALLEL 466 488 DO l = 0, pdims(1) - 1 489 m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * & 490 ( nxr_z - nxl_z + 1 ) 491 zs = 1 + l * ( nzt_y - nzb_y + 1 ) 492 DO j = nys_z, nyn_z 493 DO k = zs, zs + nzt_y - nzb_y 494 DO i = nxl_z, nxr_z 495 m = m + 1 496 f_out(i,j,k) = work(m) 497 ENDDO 498 ENDDO 499 ENDDO 500 ENDDO 501 !$OMP END PARALLEL 467 502 #endif 503 504 ENDIF 468 505 469 506 END SUBROUTINE transpose_yz … … 490 527 INTEGER :: i, j, k, l, m, xs 491 528 492 REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), & 493 f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), & 529 REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), & 494 530 work(nnx*nny*nnz) 495 531 496 #if defined( __parallel ) 532 !$acc declare create ( f_inv ) 533 REAL :: f_inv(nys:nyn,nxl:nxr,1:nz) 534 497 535 498 536 ! … … 501 539 !$OMP PARALLEL PRIVATE ( i, j, k ) 502 540 !$OMP DO 541 !$acc kernels present( f_in ) 542 !$acc loop 503 543 DO k = 1,nz 504 544 DO i = nxl, nxr 545 !$acc loop vector( 32 ) 505 546 DO j = nys, nyn 506 547 f_inv(j,i,k) = f_in(k,j,i) … … 516 557 !-- of the data is necessary and no transposition has to be done. 517 558 IF ( pdims(1) == 1 ) THEN 518 !$OMP PARALLEL PRIVATE ( i, j, k ) 519 !$OMP DO 559 560 !$OMP PARALLEL PRIVATE ( i, j, k ) 561 !$OMP DO 562 !$acc kernels present( f_out ) 563 !$acc loop 520 564 DO k = 1, nz 521 565 DO i = nxl, nxr 566 !$acc loop vector( 32 ) 522 567 DO j = nys, nyn 523 568 f_out(i,j,k) = f_inv(j,i,k) … … 526 571 ENDDO 527 572 !$OMP END PARALLEL 528 RETURN 573 574 ELSE 575 576 #if defined( __parallel ) 577 ! 578 !-- Transpose array 579 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 580 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 581 CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & 582 work(1), sendrecvcount_zx, MPI_REAL, & 583 comm1dx, ierr ) 584 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 585 586 ! 587 !-- Reorder transposed array 588 !$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs ) 589 !$OMP DO 590 DO l = 0, pdims(1) - 1 591 m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 ) 592 xs = 0 + l * nnx 593 DO k = nzb_x, nzt_x 594 DO i = xs, xs + nnx - 1 595 DO j = nys_x, nyn_x 596 m = m + 1 597 f_out(i,j,k) = work(m) 598 ENDDO 599 ENDDO 600 ENDDO 601 ENDDO 602 !$OMP END PARALLEL 603 #endif 604 529 605 ENDIF 530 531 !532 !-- Transpose array533 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )534 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )535 CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &536 work(1), sendrecvcount_zx, MPI_REAL, &537 comm1dx, ierr )538 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )539 540 !541 !-- Reorder transposed array542 !$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )543 !$OMP DO544 DO l = 0, pdims(1) - 1545 m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )546 xs = 0 + l * nnx547 DO k = nzb_x, nzt_x548 DO i = xs, xs + nnx - 1549 DO j = nys_x, nyn_x550 m = m + 1551 f_out(i,j,k) = work(m)552 ENDDO553 ENDDO554 ENDDO555 ENDDO556 !$OMP END PARALLEL557 558 #endif559 606 560 607 END SUBROUTINE transpose_zx … … 586 633 work(nnx*nny*nnz) 587 634 588 #if defined( __parallel )589 590 635 ! 591 636 !-- If the PE grid is one-dimensional along y, the array has only to be 592 637 !-- reordered locally and therefore no transposition has to be done. 593 638 IF ( pdims(1) /= 1 ) THEN 639 640 #if defined( __parallel ) 594 641 ! 595 642 !-- Reorder input array for transposition … … 619 666 comm1dx, ierr ) 620 667 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 621 622 ! 623 !-- Reorder transposed array in a way that the y index is in first position 624 !$OMP PARALLEL PRIVATE ( i, j, k ) 625 !$OMP DO 626 DO j = 0, ny 627 DO k = nzb_y, nzt_y 628 DO i = nxl_y, nxr_y 629 f_out(j,i,k) = f_inv(i,k,j) 630 ENDDO 631 ENDDO 632 ENDDO 633 !$OMP END PARALLEL 668 #endif 669 634 670 ELSE 635 671 ! 636 !-- Reorder the array in a way that the y index is in first position672 !-- Reorder the array in the same way like ALLTOALL did it 637 673 !$OMP PARALLEL PRIVATE ( i, j, k ) 638 674 !$OMP DO … … 645 681 ENDDO 646 682 !$OMP END PARALLEL 647 !648 !-- Move data to output array649 !$OMP PARALLEL PRIVATE ( i, j, k )650 !$OMP DO651 DO k = nzb_y, nzt_y652 DO i = nxl_y, nxr_y653 DO j = 0, ny654 f_out(j,i,k) = f_inv(i,k,j)655 ENDDO656 ENDDO657 ENDDO658 !$OMP END PARALLEL659 683 660 684 ENDIF 661 685 662 #endif 686 ! 687 !-- Reorder transposed array in a way that the y index is in first position 688 !$OMP PARALLEL PRIVATE ( i, j, k ) 689 !$OMP DO 690 DO k = nzb_y, nzt_y 691 DO i = nxl_y, nxr_y 692 DO j = 0, ny 693 f_out(j,i,k) = f_inv(i,k,j) 694 ENDDO 695 ENDDO 696 ENDDO 697 !$OMP END PARALLEL 663 698 664 699 END SUBROUTINE transpose_zy -
palm/trunk/SOURCE/user_data_output_3d.f90
r1037 r1106 19 19 ! 20 20 ! Current revisions: 21 ! ----------------- 21 ! ------------------ 22 ! array_kind renamed precision_kind 22 23 ! 23 24 ! Former revisions: … … 40 41 !------------------------------------------------------------------------------! 41 42 42 USE array_kind43 43 USE indices 44 USE precision_kind 44 45 USE user 45 46
Note: See TracChangeset
for help on using the changeset viewer.