Ignore:
Timestamp:
Mar 4, 2013 5:31:38 AM (11 years ago)
Author:
raasch
Message:

New:
---

Porting of FFT-solver for serial runs to GPU using CUDA FFT,
preprocessor lines in transpose routines rearranged, so that routines can also
be used in serial (non-parallel) mode,
transpositions also carried out in serial mode, routines fftx, fftxp replaced
by calls of fft_x, fft_x replaced by fft_x_1d in the 1D-decomposition routines
(Makefile, Makefile_check, fft_xy, poisfft, poisfft_hybrid, transpose, new: cuda_fft_interfaces)

--stdin argument for mpiexec on lckyuh, -y and -Y settings output to header (mrun)

Changed:


Module array_kind renamed precision_kind
(check_open, data_output_3d, fft_xy, modules, user_data_output_3d)

some format changes for coupled atmosphere-ocean runs (header)
small changes in code formatting (microphysics, prognostic_equations)

Errors:


bugfix: default value (0) assigned to coupling_start_time (modules)
bugfix: initial time for preruns of coupled runs is output as -coupling_start_time (data_output_profiles)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SOURCE/poisfft_hybrid.f90

    r1037 r1106  
    2020! Current revisions:
    2121! -----------------
    22 !
     22! calls of fft_x, fft_y replaced by fft_x_1d, fft_y_1d
    2323!
    2424! Former revisions:
     
    334334       CALL cpu_log( log_point_s(30), 'poisfft_hybrid_omp', 'start' )
    335335
    336        CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
     336       CALL cpu_log( log_point_s(7), 'fft_y_1d', 'start' )
    337337
    338338!$OMP  PARALLEL PRIVATE (i,iouter,ii,ir,iei,j,k,m,n,ffty_ar)
     
    353353                ENDDO
    354354   
    355                 CALL fft_y( ffty_ar(:,ir), 'forward' )
     355                CALL fft_y_1d( ffty_ar(:,ir), 'forward' )
    356356             ENDDO
    357357
     
    371371!$OMP  END PARALLEL
    372372
    373        CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
     373       CALL cpu_log( log_point_s(7), 'fft_y_1d', 'pause' )
    374374
    375375#if defined( __parallel )
     
    385385#endif
    386386
    387        CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
     387       CALL cpu_log( log_point_s(33), 'fft_x_1d + tridia', 'start' )
    388388
    389389#if defined( __KKMP )
     
    406406             ENDDO
    407407
    408              CALL fft_x( fftx_ar, 'forward' )
     408             CALL fft_x_1d( fftx_ar, 'forward' )
    409409
    410410             DO  i = nxl_a, nxr_a
     
    422422             ENDDO
    423423
    424              CALL fft_x( fftx_ar, 'backward' )
     424             CALL fft_x_1d( fftx_ar, 'backward' )
    425425
    426426             m = nxl_a
     
    438438#endif
    439439
    440        CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
     440       CALL cpu_log( log_point_s(33), 'fft_x_1d + tridia', 'stop' )
    441441
    442442#if defined( __parallel )
     
    453453#endif
    454454
    455        CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
     455       CALL cpu_log( log_point_s(7), 'fft_y_1d', 'continue' )
    456456
    457457!$OMP  PARALLEL PRIVATE (i,iouter,ii,ir,iei,j,k,m,n,ffty_ar)
     
    475475                ii = nxl + i
    476476                ir = i - iouter + 1
    477                 CALL fft_y( ffty_ar(:,ir), 'backward' )
     477                CALL fft_y_1d( ffty_ar(:,ir), 'backward' )
    478478
    479479                DO  j = nys_a, nyn_a
     
    486486!$OMP  END PARALLEL
    487487
    488        CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
     488       CALL cpu_log( log_point_s(7), 'fft_y_1d', 'stop' )
    489489
    490490       CALL cpu_log( log_point_s(30), 'poisfft_hybrid_omp', 'stop' )
     
    702702       CALL cpu_log( log_point_s(30), 'poisfft_hybrid_nodes', 'start' )
    703703
    704        CALL cpu_log( log_point_s(7), 'fft_y', 'start' )
     704       CALL cpu_log( log_point_s(7), 'fft_y_1d', 'start' )
    705705
    706706!
     
    719719                ENDDO
    720720   
    721                 CALL fft_y( ffty_ar(:,ir), 'forward' )
     721                CALL fft_y_1d( ffty_ar(:,ir), 'forward' )
    722722             ENDDO
    723723
     
    738738       ENDDO
    739739
    740        CALL cpu_log( log_point_s(7), 'fft_y', 'pause' )
     740       CALL cpu_log( log_point_s(7), 'fft_y_1d', 'pause' )
    741741
    742742       CALL cpu_log( log_point_s(32), 'alltoall_task', 'start' )
     
    767767          CALL cascade( 2, j, nys_p, nyn_p )
    768768
    769           CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'start' )
     769          CALL cpu_log( log_point_s(33), 'fft_x_1d + tridia', 'start' )
    770770          DO  k = 1, nz
    771771
     
    780780             ENDDO
    781781
    782              CALL fft_x( fftx_ar, 'forward' )
     782             CALL fft_x_1d( fftx_ar, 'forward' )
    783783
    784784             DO  i = nxl_a, nxr_a
     
    796796             ENDDO
    797797
    798              CALL fft_x( fftx_ar, 'backward' )
     798             CALL fft_x_1d( fftx_ar, 'backward' )
    799799
    800800             m = nxl_a
     
    809809          ENDDO
    810810
    811           CALL cpu_log( log_point_s(33), 'fft_x + tridia', 'stop' )
     811          CALL cpu_log( log_point_s(33), 'fft_x_1d + tridia', 'stop' )
    812812          nw2 = nw1 * SIZE( work1, 3 )
    813813          CALL cpu_log( log_point_s(37), 'alltoall_node', 'continue' )
     
    833833       CALL cpu_log( log_point_s(32), 'alltoall_task', 'stop' )
    834834
    835        CALL cpu_log( log_point_s(7), 'fft_y', 'continue' )
     835       CALL cpu_log( log_point_s(7), 'fft_y_1d', 'continue' )
    836836
    837837       DO  iouter = nxl_p, nxr_p, istride
     
    855855                ii = nxl + i
    856856                ir = i - iouter + 1
    857                 CALL fft_y( ffty_ar(:,ir), 'backward' )
     857                CALL fft_y_1d( ffty_ar(:,ir), 'backward' )
    858858
    859859                DO  j = nys_a, nyn_a
     
    865865       ENDDO
    866866
    867        CALL cpu_log( log_point_s(7), 'fft_y', 'stop' )
     867       CALL cpu_log( log_point_s(7), 'fft_y_1d', 'stop' )
    868868
    869869       CALL cpu_log( log_point_s(30), 'poisfft_hybrid_nodes', 'stop' )
Note: See TracChangeset for help on using the changeset viewer.