Ignore:
Timestamp:
Sep 10, 2013 8:59:13 AM (11 years ago)
Author:
raasch
Message:

New:


openACC porting of reduction operations
additional 3D-flag arrays for replacing the 2D-index arrays nzb_s_inner and nzb_diff_s_inner
(flow_statistics, init_grid, init_3d_model, modules, palm, pres, time_integration)

Changed:


for PGI/openACC performance reasons set default compile options for openACC to "-ta=nocache",
and set environment variable PGI_ACC_SYNCHRONOUS=1
(MAKE.inc.pgi.openacc, palm_simple_run)

wall_flags_0 changed to 32bit INTEGER, additional array wall_flags_00 introduced to hold
bits 32-63
(advec_ws, init_grid, modules, palm)

Errors:


dummy argument tri in 1d-routines replaced by tri_for_1d because of name
conflict with arry tri in module arrays_3d
(tridia_solver)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SOURCE/advec_ws.f90

    r1132 r1221  
    2020! Current revisions:
    2121! ------------------
    22 !
     22! wall_flags_00 introduced, which holds bits 32-...
    2323!
    2424! Former revisions:
     
    16481648
    16491649          DO  k = nzb+1, nzb_max
    1650              ibit32 = IBITS(wall_flags_0(k,j,i),32,1)
     1650             ibit32 = IBITS(wall_flags_00(k,j,i),0,1)
    16511651             ibit31 = IBITS(wall_flags_0(k,j,i),31,1)
    16521652             ibit30 = IBITS(wall_flags_0(k,j,i),30,1)
     
    18081808                                        )
    18091809
    1810           ibit32 = IBITS(wall_flags_0(k,j,i),32,1)
     1810          ibit32 = IBITS(wall_flags_00(k,j,i),0,1)
    18111811          ibit31 = IBITS(wall_flags_0(k,j,i),31,1)
    18121812          ibit30 = IBITS(wall_flags_0(k,j,i),30,1)
     
    18451845!--       k index has to be modified near bottom and top, else array
    18461846!--       subscripts will be exceeded.
    1847           ibit35 = IBITS(wall_flags_0(k,j,i),35,1)
    1848           ibit34 = IBITS(wall_flags_0(k,j,i),34,1)
    1849           ibit33 = IBITS(wall_flags_0(k,j,i),33,1)
     1847          ibit35 = IBITS(wall_flags_00(k,j,i),3,1)
     1848          ibit34 = IBITS(wall_flags_00(k,j,i),2,1)
     1849          ibit33 = IBITS(wall_flags_00(k,j,i),1,1)
    18501850
    18511851          k_ppp = k + 3 * ibit35
     
    19421942!--       k index has to be modified near bottom and top, else array
    19431943!--       subscripts will be exceeded.
    1944           ibit35 = IBITS(wall_flags_0(k,j,i),35,1)
    1945           ibit34 = IBITS(wall_flags_0(k,j,i),34,1)
    1946           ibit33 = IBITS(wall_flags_0(k,j,i),33,1)
     1944          ibit35 = IBITS(wall_flags_00(k,j,i),3,1)
     1945          ibit34 = IBITS(wall_flags_00(k,j,i),2,1)
     1946          ibit33 = IBITS(wall_flags_00(k,j,i),1,1)
    19471947
    19481948          k_ppp = k + 3 * ibit35
     
    24542454!
    24552455!--    Computation of fluxes and tendency terms
    2456        !$acc kernels present( ddzw, sk, tend, u, v, w, wall_flags_0 )
     2456       !$acc kernels present( ddzw, sk, tend, u, v, w, wall_flags_0, wall_flags_00 )
    24572457       !$acc loop
    24582458       DO  i = i_left, i_right
     
    31553155!
    31563156!--    Computation of fluxes and tendency terms
    3157        !$acc  kernels present( ddzw, tend, u, v, w, wall_flags_0 )
     3157       !$acc  kernels present( ddzw, tend, u, v, w, wall_flags_0, wall_flags_00 )
    31583158       !$acc  loop
    31593159       DO i = i_left, i_right
     
    38723872!
    38733873!--    Computation of fluxes and tendency terms
    3874        !$acc kernels present( ddzw, tend, u, v, w, wall_flags_0 )
     3874       !$acc kernels present( ddzw, tend, u, v, w, wall_flags_0, wall_flags_00 )
    38753875       !$acc loop
    38763876       DO  i = i_left, i_right
     
    42264226          DO  k = nzb+1, nzb_max
    42274227
    4228              ibit32 = IBITS(wall_flags_0(k,j,i),32,1)
     4228             ibit32 = IBITS(wall_flags_00(k,j,i),0,1)
    42294229             ibit31 = IBITS(wall_flags_0(k,j,i),31,1)
    42304230             ibit30 = IBITS(wall_flags_0(k,j,i),30,1)
     
    43274327                                              )
    43284328
    4329                 ibit32 = IBITS(wall_flags_0(k,j,i),32,1)
     4329                ibit32 = IBITS(wall_flags_00(k,j,i),0,1)
    43304330                ibit31 = IBITS(wall_flags_0(k,j,i),31,1)
    43314331                ibit30 = IBITS(wall_flags_0(k,j,i),30,1)
     
    43644364!--             k index has to be modified near bottom and top, else array
    43654365!--             subscripts will be exceeded.
    4366                 ibit35 = IBITS(wall_flags_0(k,j,i),35,1)
    4367                 ibit34 = IBITS(wall_flags_0(k,j,i),34,1)
    4368                 ibit33 = IBITS(wall_flags_0(k,j,i),33,1)
     4366                ibit35 = IBITS(wall_flags_00(k,j,i),3,1)
     4367                ibit34 = IBITS(wall_flags_00(k,j,i),2,1)
     4368                ibit33 = IBITS(wall_flags_00(k,j,i),1,1)
    43694369
    43704370                k_ppp = k + 3 * ibit35
     
    44634463!--             k index has to be modified near bottom and top, else array
    44644464!--             subscripts will be exceeded.
    4465                 ibit35 = IBITS(wall_flags_0(k,j,i),35,1)
    4466                 ibit34 = IBITS(wall_flags_0(k,j,i),34,1)
    4467                 ibit33 = IBITS(wall_flags_0(k,j,i),33,1)
     4465                ibit35 = IBITS(wall_flags_00(k,j,i),3,1)
     4466                ibit34 = IBITS(wall_flags_00(k,j,i),2,1)
     4467                ibit33 = IBITS(wall_flags_00(k,j,i),1,1)
    44684468
    44694469                k_ppp = k + 3 * ibit35
     
    45654565       gv = 2.0 * v_gtrans
    45664566
     4567
    45674568!
    45684569!--    Computation of fluxes and tendency terms
    4569        !$acc kernels present( ddzu, tend, u, v, w, wall_flags_0 )
     4570       !$acc kernels present( ddzu, tend, u, v, w, wall_flags_0, wall_flags_00 )
    45704571       !$acc loop
    45714572       DO i = i_left, i_right
     
    45744575             DO  k = nzb+1, nzt
    45754576
     4577                ibit27 = IBITS(wall_flags_0(k,j,i),27,1)
     4578                ibit28 = IBITS(wall_flags_0(k,j,i),28,1)
    45764579                ibit29 = IBITS(wall_flags_0(k,j,i),29,1)
    4577                 ibit28 = IBITS(wall_flags_0(k,j,i),28,1)
    4578                 ibit27 = IBITS(wall_flags_0(k,j,i),27,1)
    4579 
    45804580
    45814581                u_comp_l                 = u(k+1,j,i) + u(k,j,i) - gu
     
    46404640                                 ( w(k,j,i+3) - w(k,j,i-2) )                 &
    46414641                                              )
    4642 
    4643                 ibit32 = IBITS(wall_flags_0(k,j,i),32,1)
     4642                ibit32 = IBITS(wall_flags_00(k,j,i),0,1)
    46444643                ibit31 = IBITS(wall_flags_0(k,j,i),31,1)
    46454644                ibit30 = IBITS(wall_flags_0(k,j,i),30,1)
     
    47074706                                              )
    47084707
    4709 
    4710                 ibit35 = IBITS(wall_flags_0(k-1,j,i),35,1)
    4711                 ibit34 = IBITS(wall_flags_0(k-1,j,i),34,1)
    4712                 ibit33 = IBITS(wall_flags_0(k-1,j,i),33,1)
     4708                ibit35 = IBITS(wall_flags_00(k-1,j,i),3,1)
     4709                ibit34 = IBITS(wall_flags_00(k-1,j,i),2,1)
     4710                ibit33 = IBITS(wall_flags_00(k-1,j,i),1,1)
    47134711
    47144712                k_pp  = k + 2 * ibit35
     
    47504748!--             k index has to be modified near bottom and top, else array
    47514749!--             subscripts will be exceeded.
    4752                 ibit35 = IBITS(wall_flags_0(k,j,i),35,1)
    4753                 ibit34 = IBITS(wall_flags_0(k,j,i),34,1)
    4754                 ibit33 = IBITS(wall_flags_0(k,j,i),33,1)
     4750                ibit35 = IBITS(wall_flags_00(k,j,i),3,1)
     4751                ibit34 = IBITS(wall_flags_00(k,j,i),2,1)
     4752                ibit33 = IBITS(wall_flags_00(k,j,i),1,1)
    47554753
    47564754                k_ppp = k + 3 * ibit35
Note: See TracChangeset for help on using the changeset viewer.