Changeset 696 for palm/trunk
- Timestamp:
- Mar 18, 2011 7:03:49 AM (14 years ago)
- Location:
- palm/trunk
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SCRIPTS/mrun
r693 r696 219 219 # 02/02/10 - Siggi - further adjustments on Tsubame and concerning openMP 220 220 # usage 221 # 09/0 2/10 - Siggi - adjustments for ibmkisti, mpt bugfix for netCDF4221 # 09/03/10 - Siggi - adjustments for ibmkisti, mpt bugfix for netCDF4 222 222 # usage, totalview usage extended 223 # 17/03/11 - Siggi - adjustments for openmp usage on ibmkisti 223 224 224 225 … … 3131 3132 export LANG=en_US 3132 3133 export MP_SHARED_MEMORY=yes 3133 export MP_SINGLE_THREAD=yes 3134 export MEMORY_AFFINITY=MCM 3134 if [[ $threads_per_task = 1 ]] 3135 then 3136 export MP_SINGLE_THREAD=yes 3137 export MEMORY_AFFINITY=MCM 3138 else 3139 export OMP_NUM_THREADS=$threads_per_task 3140 fi 3135 3141 fi 3136 3142 … … 3291 3297 (( ii = $numprocs / $threads_per_task )) 3292 3298 export OMP_NUM_THREADS=$threads_per_task 3293 # echo "OMP_NUM_THREADS=$OMP_NUM_THREADS"3299 # echo "*** OMP_NUM_THREADS=$OMP_NUM_THREADS" 3294 3300 if [[ $threads_per_task != 1 ]] 3295 3301 then -
palm/trunk/SCRIPTS/subjob
r693 r696 128 128 # 02/02/11 - Siggi - further asjustments for Tsubame concerning openMP 129 129 # 06/03/11 - Siggi - adjustments for ibmkisti 130 # 17/03/11 - Siggi - adjustments for openmp usage on ibmkisti 130 131 131 132 … … 152 153 153 154 typeset -i cputime=0 memory=0 Memory=0 minuten resttime sekunden stunden 154 typeset -i inumprocs nodes=0 processes_per_node=0 tasks_per_node=0 threads_per_task=1155 typeset -i inumprocs mpi_tasks=0 nodes=0 processes_per_node=0 tasks_per_node=0 threads_per_task=1 155 156 typeset -L20 spalte1 156 157 typeset -R40 spalte2 … … 548 549 (( processes_per_node = tasks_per_node * threads_per_task )) 549 550 551 # Calculate number of MPI tasks 552 (( mpi_tasks = numprocs / threads_per_task )) 553 550 554 551 555 # HEADER-AUSGABE … … 638 642 network_to_use="# @ network.MPI = sn_all,shared,US" 639 643 wall_clock_limit="# @ wall_clock_limit = $timestring" 640 rset="# @ rset = RSET_MCM_AFFINITY" 641 mcm_affinity_options="# @ mcm_affinity_options = mcm_mem_pref mcm_sni_none mcm_distribute" 644 if [[ $threads_per_task = 1 ]] 645 then 646 rset="# @ rset = RSET_MCM_AFFINITY" 647 mcm_affinity_options="# @ mcm_affinity_options = mcm_mem_pref mcm_sni_none mcm_distribute" 648 fi 642 649 environment="" 643 650 use_shell="" … … 701 708 702 709 cat >> $job_to_send << %%END%% 703 # @ total_tasks = $ numprocs710 # @ total_tasks = $mpi_tasks 704 711 # @ blocking = unlimited 705 712 # @ queue … … 733 740 fi 734 741 742 fi 743 744 # workaround because of silly job filter on ibmkisti 745 if [[ $remote_host = ibmkisti && $threads_per_task != 1 ]] 746 then 747 echo "export OMP_NUM_THREADS=$threads_per_task" >> $job_to_send 735 748 fi 736 749 -
palm/trunk/SOURCE/flow_statistics.f90
r679 r696 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! Bugfix: Summation of Wicker-Skamarock scheme fluxes and variances for all 7 ! threads 7 8 ! 8 9 ! Former revisions: … … 155 156 !-- the local array sums_l() for further computations 156 157 IF ( ws_scheme_mom ) THEN 158 157 159 ! 158 160 !-- According to the Neumann bc for the horizontal velocity components, … … 162 164 sums_vs2_ws_l(nzt+1,sr) = sums_vs2_ws_l(nzt,sr) 163 165 ENDIF 166 167 DO i = 0, threads_per_task-1 164 168 ! 165 !-- Swap the turbulent quantities evaluated in advec_ws. 166 sums_l(:,13,0) = sums_wsus_ws_l(:,sr) ! w*u* 167 sums_l(:,15,0) = sums_wsvs_ws_l(:,sr) ! w*v* 168 sums_l(:,30,0) = sums_us2_ws_l(:,sr) ! u*2 169 sums_l(:,31,0) = sums_vs2_ws_l(:,sr) ! v*2 170 sums_l(:,32,0) = sums_ws2_ws_l(:,sr) ! w*2 171 sums_l(:,34,0) = sums_l(:,34,0) + 0.5 * & 172 (sums_us2_ws_l(:,sr) + sums_vs2_ws_l(:,sr) & 173 + sums_ws2_ws_l(:,sr)) ! e* 174 DO k = nzb, nzt 175 sums_l(nzb+5,pr_palm,0) = sums_l(nzb+5,pr_palm,0) + 0.5 * ( & 176 sums_us2_ws_l(k,sr) + sums_vs2_ws_l(k,sr) + & 177 sums_ws2_ws_l(k,sr)) 178 ENDDO 179 ENDIF 169 !-- Swap the turbulent quantities evaluated in advec_ws. 170 sums_l(:,13,i) = sums_wsus_ws_l(:,sr) ! w*u* 171 sums_l(:,15,i) = sums_wsvs_ws_l(:,sr) ! w*v* 172 sums_l(:,30,i) = sums_us2_ws_l(:,sr) ! u*2 173 sums_l(:,31,i) = sums_vs2_ws_l(:,sr) ! v*2 174 sums_l(:,32,i) = sums_ws2_ws_l(:,sr) ! w*2 175 sums_l(:,34,i) = sums_l(:,34,i) + 0.5 * & 176 ( sums_us2_ws_l(:,sr) + sums_vs2_ws_l(:,sr) + & 177 sums_ws2_ws_l(:,sr) ) ! e* 178 DO k = nzb, nzt 179 sums_l(nzb+5,pr_palm,i) = sums_l(nzb+5,pr_palm,i) + 0.5 * ( & 180 sums_us2_ws_l(k,sr) + & 181 sums_vs2_ws_l(k,sr) + & 182 sums_ws2_ws_l(k,sr) ) 183 ENDDO 184 ENDDO 185 186 ENDIF 187 180 188 IF ( ws_scheme_sca ) THEN 181 sums_l(:,17,0) = sums_wspts_ws_l(:,sr) ! w*pt* from advec_s_ws 182 IF ( ocean ) sums_l(:,66,0) = sums_wssas_ws_l(:,sr) ! w*sa* 183 IF ( humidity .OR. passive_scalar ) sums_l(:,49,0) = & 184 sums_wsqs_ws_l(:,sr) !w*q* 189 190 DO i = 0, threads_per_task-1 191 sums_l(:,17,i) = sums_wspts_ws_l(:,sr) ! w*pt* from advec_s_ws 192 IF ( ocean ) sums_l(:,66,i) = sums_wssas_ws_l(:,sr) ! w*sa* 193 IF ( humidity .OR. passive_scalar ) sums_l(:,49,i) = & 194 sums_wsqs_ws_l(:,sr) !w*q* 195 ENDDO 196 185 197 ENDIF 186 198 ! -
palm/trunk/SOURCE/poisfft.f90
r684 r696 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! work_fftx removed from PRIVATE clauses in fftx_tr_xy and tr_yx_fftx 7 7 ! 8 8 ! Former revisions: … … 1070 1070 !-- Cache optimized code (there might be still a potential for better 1071 1071 !-- optimization). 1072 !$OMP PARALLEL PRIVATE (i,j,k ,work_fftx)1072 !$OMP PARALLEL PRIVATE (i,j,k) 1073 1073 !$OMP DO 1074 1074 DO i = 0, nx … … 1181 1181 !-- Cache optimized code (there might be still a potential for better 1182 1182 !-- optimization). 1183 !$OMP PARALLEL PRIVATE (i,j,k ,work_fftx)1183 !$OMP PARALLEL PRIVATE (i,j,k) 1184 1184 !$OMP DO 1185 1185 DO j = nys, nyn … … 1245 1245 1246 1246 tn = 0 ! Default thread number in case of one thread 1247 !$OMP PARALLEL PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy ) 1248 !$OMP DO 1247 !$OMP PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy ) 1249 1248 DO i = nxl_y, nxr_y 1250 1249 … … 1336 1335 1337 1336 ENDDO 1338 !$OMP END PARALLEL1339 1337 1340 1338 DEALLOCATE( tri )
Note: See TracChangeset
for help on using the changeset viewer.