Changeset 622 for palm

palm/trunk/SCRIPTS/mbuild

-                      r562
+                      r622
      #                    for lcxt4
      # 07/09/10 - Siggi - bugfix for wrong netcdf/3.6.3 module on lcsgi
+     # 08/12/10 - Siggi - initialization of the module command changed for
+     #                    SGI-ICE/lcsgi
+     #                    adjustments for Kyushu Univ. (lcrte, ibmku)
 …
     case  $remote_host  in
         (lcmuk)          remote_addres=130.75.105.2;;
+        (lcrte)          remote_addres=133.5.185.60;;
         (lcsgib)         remote_addres=130.73.232.102;;
         (lcsgih)         remote_addres=130.75.4.102;;
 …
         (decalpha)       remote_addres=165.132.26.56;;
         (ibmh)           remote_addres=136.172.40.15;;
+        (ibmku)          remote_addres=133.5.4.129;;
         (ibms)           remote_addres=150.183.5.101;;
         (ibmy)           remote_addres=165.132.26.58;;
 …
           then
+             print ". /usr/share/modules/init/bash; $module_calls  cd ${remote_md}; echo $make_call_string > LAST_MAKE_CALL; chmod u+x LAST_MAKE_CALL; $make_call_string; [[ \$? != 0 ]] && echo MAKE_ERROR" | ssh  ${remote_username}@${remote_addres} 2>&1 | tee ${remote_host}_last_make_protokoll
+#             print ". /usr/share/modules/init/bash; $module_calls  cd ${remote_md}; echo $make_call_string > LAST_MAKE_CALL; chmod u+x LAST_MAKE_CALL; $make_call_string; [[ \$? != 0 ]] && echo MAKE_ERROR" | ssh  ${remote_username}@${remote_addres} 2>&1 | tee ${remote_host}_last_make_protokoll
+             print "eval \`/sw/swdist/bin/modulesinit\`; $module_calls  cd ${remote_md}; echo $make_call_string > LAST_MAKE_CALL; chmod u+x LAST_MAKE_CALL; $make_call_string; [[ \$? != 0 ]] && echo MAKE_ERROR" | ssh  ${remote_username}@${remote_addres} 2>&1 | tee ${remote_host}_last_make_protokoll
           elif [[ $remote_host = lctit ]]

palm/trunk/SCRIPTS/mrun

-                      r592
+                      r622
      # 17/08/10 - BjornM - adjustments for interactive runs on lcxt4
      # 07/09/10 - Siggi  - bugfix for wrong netcdf/3.6.3 module on lcsgi
+     # 08/12/10 - Siggi  - new handling of openmp/hybrid runs, option -O
+     #                     has now argument threads_per_task
+     #                     adjustments for Kyushu Univ. (lcrte, ibmku)
 …
  read_from_config=""
  restart_run=false
+ return_addres=$(nslookup `hostname` 2>&1 | grep "Address:" | tail -1 | awk '{print $2}')
+ if [[ $return_addres = 130.75.105.158 ]]
+ then
+    return_addres=172.20.25.41
+    echo "+++ WARNING: return_addres changed to $return_addres !!!!!"
+ if [[ `hostname` = rte10 ]]
+ then
+   return_addres=133.5.185.60
+   echo "+++ WARNING: return_addres changed to $return_addres !!!!!"
+ else
+    return_addres=$(nslookup `hostname` 2>&1 | grep "Address:" | tail -1 | awk '{print $2}')
  fi
  return_password=""
 …
     # SHELLSCRIPT-OPTIONEN EINLESEN UND KOMMANDO NEU ZUSAMMENSETZEN, FALLS ES
     # FUER FOLGEJOBS BENOETIGT WIRD
  while  getopts  :a:AbBc:Cd:D:Fg:G:h:H:i:IkK:m:M:n:o:Op:P:q:r:R:s:St:T:u:U:vxX:yY: option
+ while  getopts  :a:AbBc:Cd:D:Fg:G:h:H:i:IkK:m:M:n:o:O:p:P:q:r:R:s:St:T:u:U:vxX:yY: option
  do
    case  $option  in
 …
        (n)   node_usage=$OPTARG; mc="$mc -n$OPTARG";;
        (o)   output_list=$OPTARG; mc="$mc -o'$OPTARG'";;
        (O)   use_openmp=true; mc="$mc -O";;
+       (O)   use_openmp=true; threads_per_task=$OPTARG; mc="$mc -O$OPTARG";;
        (p)   package_list=$OPTARG; mc="$mc -p'$OPTARG'";;
        (P)   return_password=$OPTARG; mc="$mc -P$OPTARG";;
 …
     do_remote=true
     case  $host  in
         (ibm|ibmb|ibmh|ibms|ibmy|nech|necriam|lckyoto|lcsgib|lcsgih|lctit|unics|lcxt4|lcxt5m|lck)  true;;
+        (ibm|ibmh|ibmku|ibms|ibmy|nech|necriam|lckyoto|lcsgib|lcsgih|lctit|unics|lcxt4|lcxt5m|lck)  true;;
         (*)  printf "\n"
              printf "\n  +++ sorry: execution of batch jobs on remote host \"$host\""
 …
                    do_remote=true
                    case  $host  in
                        (ibm|ibms|ibmy|lckyoto|lcsgib|lcsgih|lctit|nech|necriam|unics|lcxt4|lcxt5m|lck)  true;;
+                       (ibm|ibmh|ibmku|ibms|ibmy|lckyoto|lcsgib|lcsgih|lctit|nech|necriam|unics|lcxt4|lcxt5m|lck)  true;;
                        (*)  printf "\n  +++ sorry: execution of batch jobs on remote host \"$host\""
                             printf "\n      is not available"
 …
     do_remote=true
     case  $host  in
         (ibm|ibmb|ibmh|ibms|ibmy|lckyoto|lcsgib|lcsgih|lctit|nech|necriam|unics|lcxt4|lcxt5m|lck)  true;;
+        (ibm|ibmh|ibmku|ibms|ibmy|lckyoto|lcsgib|lcsgih|lctit|nech|necriam|unics|lcxt4|lcxt5m|lck)  true;;
         (*)  printf "\n"
              printf "\n  +++ sorry: execution of batch jobs on remote host \"$host\""
 …
        # DEFAULT-WERT SETZEN) UND OB SIE EIN GANZZAHLIGER TEILER DER
        # GESAMTPROZESSORANZAHL IST
     if [[ $host = nech  ||  $host = necriam  ||  $host = ibmh  ||  $host = ibmb  ||  $host = ibms ]]
+    if [[ $host = nech  ||  $host = necriam  ||  $host = ibmh  ||  $host = ibms ]]
     then
        [[ "$tasks_per_node" = "" ]]  &&  tasks_per_node=6
 …
        # FALLS OPENMP PARALLELISIERUNG VERWENDET WERDEN SOLL, ANZAHL VON THREADS
        # SETZEN UND ZAHL DER TASKS PRO KNOTEN AUF 1 SETZEN
     if [[ $use_openmp = true ]]
     then
        threads_per_task=$tasks_per_node
        tasks_per_node=1
     fi
+#    if [[ $use_openmp = true ]]
+#    then
+#       threads_per_task=$tasks_per_node
+#       tasks_per_node=1
+#    fi
        # SETTINGS FOR SUBJOB-COMMAND
+    if [[ $(echo $host | cut -c1-5) = lcsgi ]]
+    then
+       (( tp1 = tasks_per_node * threads_per_task ))
+       TOPT="-T $tp1"
+    else
+       TOPT="-T $tasks_per_node"
+    fi
+    TOPT="-T $tasks_per_node"
     OOPT="-O $threads_per_task"
 …
  then
     case  $host  in
-        (ibmb)       if [[ $node_usage = shared ]]
-                     then
-                        queue=cshare
-                     else
-                        queue=csolo
-                     fi;;
         (ibmh)       queue=no_class;;
         (ibmy)       queue=parallel;;
 …
  if [[ "$tmp_data_catalog" = "" ]]
  then
+    if [[ $localhost = ibmb ]]
+    then
+       tmp_data_catalog=$WORK/mrun_restart_data
+    elif [[ $localhost = nech ]]
+    if [[ $localhost = nech ]]
     then
        tmp_data_catalog=$WRKSHR/mrun_restart_data
 …
     printf "| $spalte1$spalte2 | \n"
  fi
  if [[ $threads_per_task != 1 ]]
+ if [[ $use_openmp = true ]]
  then
     spalte1="threads per task:"; spalte2="$threads_per_task"
 …
        then
           dxladebug  a.out
        elif [[ $localhost = ibmb  ||  $localhost = ibmh ]]
+       elif [[ $localhost = ibmh ]]
        then
 …
           exit
        fi
+       # end debug mode
     else
+          # normal execution
        if [[ -n $numprocs ]]
        then
 …
                 fi
              else
                 if [[ $localhost = ibmb  ||  $localhost = ibmh  ||  $localhost = ibms ]]
+                if [[ $localhost = ibmh  ||  $localhost = ibms ]]
                 then
                    poe  a.out  -procs $numprocs  -nodes 1  -rmpool 0  $ROPTS
                 elif [[ $localhost = ibmy ]]
+                elif [[ $localhost = ibmku  ||  $localhost = ibmy ]]
                 then
                    if [[ -f $hostfile ]]
 …
                       echo "coupled_run $iia $iio"  >  runfile_atmos
                    fi
+                   ./a.out  -procs $tasks_per_node  $ROPTS  <  runfile_atmos
+                   if [[ $localhost = ibmy ]]
+                   then
+                      ./a.out  -procs $tasks_per_node  $ROPTS  <  runfile_atmos
+                   else
+                      poe  ./a.out  -procs $numprocs $ROPTS  <  runfile_atmos
+                   fi
                 else
 …
                             export MPI_DSM_CPULIST="0,1,4,5,2,3,6,7:allhosts"
                          fi
+                      else
+                         unset MPI_DSM_CPULIST
                       fi
                           # MPI_IB_RAILS: use both IB rails on ICE2
 …
                           # next is test for openmp usage
+                      # mpiexec -n $ii -pernode  ./a.out  $ROPTS  < runfile_atmos
+                  #     echo "mpiexec -npernode $tasks_per_node  ./a.out  $ROPTS  < runfile_atmos"
+                  #     mpiexec -npernode $tasks_per_node  ./a.out  $ROPTS  < runfile_atmos
                    elif [[ $( echo $mpilib | cut -c1-3 ) = mva ]]
                    then
 …
                #          export MV2_CPU_MAPPING=0,1,4,5,2,3,6,7
                #       fi
+                      [[ $use_openmp = true ]]  &&  unset MV2_CPU_MAPPING
                       echo "*** MV2_CPU_MAPPING=$MV2_CPU_MAPPING"
                       if [[ $threads_per_task != 1 ]]
+                      if [[ $use_openmp = true ]]
                       then
                          mpiexec -npernode 1  ./a.out  $ROPTS  <  runfile_atmos
+                         mpiexec -npernode $tasks_per_node  ./a.out  $ROPTS  <  runfile_atmos
                       else
                          mpiexec -np $ii  ./a.out  $ROPTS  < runfile_atmos
 …
                    cst="/"
                 fi
                 if [[ $localhost = ibmb  ||  $localhost = nech ]]
+                if [[ $localhost = nech ]]
                 then
 …
              if [[ $localhost != $fromhost ]]
              then
                 if [[ $localhost = ibmh  ||  $localhost = ibmb  ||  $localhost = nech ]]
+                if [[ $localhost = ibmh  ||  $localhost = nech ]]
                 then
 …
           then
              if [[ $localhost = lcsgih  ||  $localhost = lcsgib  ||  $localhost = nech  ||  $localhost = ibmb  ||  $localhost = ibmh  ||  $localhost = ibms  ||  $localhost = lctit ]]
+             if [[ $localhost = lcsgih  ||  $localhost = lcsgib  ||  $localhost = nech  ||  $localhost = ibmh  ||  $localhost = ibmku  ||  $localhost = ibms  ||  $localhost = lctit ]]
              then
                 echo "*** ssh will be used to initiate restart-runs!"
 …
     if [[ $use_openmp = true ]]
     then
+       mrun_com=${mrun_com}" -O"
+       [[ "$tasks_per_node" != "" ]] &&  mrun_com=${mrun_com}" -T $threads_per_task"
+    else
+       [[ "$tasks_per_node" != "" ]] &&  mrun_com=${mrun_com}" -T $tasks_per_node"
+    fi
+       mrun_com=${mrun_com}" -O $threads_per_task"
+    fi
+    [[ "$tasks_per_node" != "" ]] &&  mrun_com=${mrun_com}" -T $tasks_per_node"
     [[ $store_on_archive_system = true ]]  &&  mrun_com=${mrun_com}" -A"
     [[ $package_list != "" ]]     &&  mrun_com=${mrun_com}" -p \"$package_list\""

palm/trunk/SCRIPTS/subjob

-                      r555
+                      r622
      # 25/08/10 - Siggi - new variable project_account in pbs-statements for
      #                    lcxt4
+     # 08/12/10 - Siggi - initialization of the module command changed for
+     #                    SGI-ICE/lcsgi
+     #                    adjustments for Kyushu Univ. (lcrte, ibmku)
 …
  typeset  -i   cputime=0  memory=0  Memory=0  minuten  resttime  sekunden  stunden
  typeset  -i   inumprocs  nodes=0  tasks_per_node=0  threads_per_task=1
+ typeset  -i   inumprocs  nodes=0  processes_per_node=0 tasks_per_node=0  threads_per_task=1
  typeset  -L20 spalte1
  typeset  -R40 spalte2
 …
      (b01*|bicegate1)        local_addres=130.73.232.102; local_host=lcsgib;;
      (bicegate2)             local_addres=130.73.232.103; local_host=lcsgib;;
-     (breg*-en0|berni*-en0)  local_addres=130.73.230.10;  local_host=ibmb;;
      (breva)                 local_addres=130.75.105.98;  local_host=lcmuk;;
      (bicegate2)             local_addres=130.73.232.103; local_host=lcsgib;;
 …
      (paesano)               local_addres=130.75.105.46;  local_host=lcmuk;;
      (quanero)               local_addres=130.75.105.107; local_host=lcmuk;;
+     (rte*)                  local_addres=133.5.185.60;   local_host=lcrte;;
      (scirocco)              local_addres=172.20.25.41;   local_host=lcmuk;;
      (sun1|sun2)             local_addres=130.75.6.1;     local_host=unics;;
 …
      (tgg*)                  local_addres=172.17.75.161;  local_host=lctit;;
      (vorias)                local_addres=172.20.25.43;   local_host=lcmuk;;
+     (*.cc.kyushu-u.ac.jp)   local_addres=133.5.4.129;    local_host=ibmku;;
      (*)                     printf "\n  +++ \"$local_host\" unknown";
                              printf "\n      please inform S. Raasch!";
 …
     printf "\n        -D    only the job-file will be created   ---"
     printf "\n        -h    execution host, available hosts:    $remote_host"
     printf "\n              ibm, ibmb, ibmh, ibms, ibmy, lcmuk,"
+    printf "\n              ibm, ibmh, ibmku, ibms, ibmy, lc...,"
     printf "\n              lctit, nech, necriam, unics"
     printf "\n        -m    memory demand per process in MByte  ---"
 …
     case  $remote_host  in
         (ibm)     queue=p690_standard; remote_addres=134.76.99.81; submcom=/usr/lpp/LoadL/full/bin/llsubmit;;
-        (ibmb)    queue=cpar; remote_addres=130.73.230.10; submcom=/usr/lpp/LoadL/full/bin/llsubmit;;
         (ibmh)    queue=no_class; remote_addres=136.172.40.15; submcom=/usr/lpp/LoadL/full/bin/llsubmit;;
+        (ibmku)   queue=s4; remote_addres=133.5.4.129; submcom=/usr/local/bin/llsubmit;;
         (ibms)    queue=p_normal; remote_addres=150.183.5.101; submcom=/usr/lpp/LoadL/full/bin/llsubmit;;
         (ibmy)    queue=parallel; remote_addres=165.132.26.58; submcom=/usr/lpp/LoadL/full/bin/llsubmit;;
 …
                      (*)                                     error=true;;
                  esac;;
         (ibmb)   case  $ndq  in
                      (cdata|cdev|cexp|c1|cshare|csolo|cspec) error=false;;
+        (ibmh)   case  $ndq  in
+                     (no_class)  error=false;;
                      (*)                                     error=true;;
                  esac;;
         (ibmh)   case  $ndq  in
                      (no_class)  error=false;;
+        (ibmku)  case  $ndq  in
+                     (sdbg1|sdbg2|sdbg4|s4|s16|s32|s32-s)    error=false;;
                      (*)                                     error=true;;
                  esac;;
 …
-    # KNOTENNUTZUNG IN ENTWICKLERQUEUE MUSS SHARED SEIN
- if [[ $node_usage != shared  &&  $queue = cdev ]]
- then
-    node_usage=shared
- fi
     # PRUEFEN DER CPU-ZEIT, ZEIT NACH STUNDEN, MINUTEN UND SEKUNDEN
     # AUFTEILEN
 …
  if (( tasks_per_node != 0 ))
  then
+    if [[ $(echo $remote_host | cut -c1-5) = lcsgi ]]
+    then
+       (( nodes = numprocs / tasks_per_node ))
+    else
+       (( nodes = numprocs / ( tasks_per_node * threads_per_task ) ))
+    fi
+ fi
+    (( nodes = numprocs / ( tasks_per_node * threads_per_task ) ))
+ fi
+    # Calculate number of processes per node
+ (( processes_per_node = tasks_per_node * threads_per_task ))
 …
     # QSUB- ODER LL-KOMMANDOS BZW. SKRIPTE  GENERIEREN
+    # Generate the batch job scripts (qsub/msub/LoadLeveler)
  if [[ $(echo $remote_host | cut -c1-3) = ibm  &&  $numprocs != 0 ]]
  then
+    if [[ $remote_host = ibmy ]]
+    then
+       consumable_memory=""
+    else
+       consumable_memory="ConsumableMemory($memory mb)"
+    fi
+       # General LoadLeveler settings
+    execute_in_shell="#!/bin/ksh"
+    use_shell="# @ shell = /bin/ksh"
+    consumable_memory="ConsumableMemory($memory mb)"
+    class="# @ class = $queue"
+    environment="# @ environment = OMP_NUM_THREADS=$threads_per_task; MP_SHARED_MEMORY=yes"
+    network_to_use="# @ network.mpi = sn_all,shared,us"
+    data_limit="# @ data_limit = 1.76gb"
+    image_size="# @ image_size = 50"
     if [[ $remote_host = ibmh ]]
 …
        class=""
        environment=""
+    else
+       class="# @ class = $queue"
+       environment="# @ environment = OMP_NUM_THREADS=$threads_per_task; MP_SHARED_MEMORY=yes"
+       if [[ $queue = cdev ]]
+       then
+          data_limit="# @ data_limit = 1.76gb"
+          network_to_use="# @ network.mpi = sn_all,shared,ip"
+       else
+          if [[ $remote_host = ibms ]]
+          then
+             network_to_use="# @ network.mpi = csss,shared,us"
+          elif [[ $remote_host = ibmy ]]
+          then
+             network_to_use=""
+          else
+             network_to_use="# @ network.mpi = sn_all,shared,us"
+             data_limit="# @ data_limit = 1.76gb"
+          fi
+       fi
+    elif [[ $remote_host = ibmku ]]
+    then
+       execute_in_shell="#!/usr/bin/ksh"
+       use_shell="# @ shell = /usr/bin/ksh"
+       consumable_memory=""
+       environment=""
+       network_to_use="# @ network.mpi = sn_all,shared,us"
+       data_limit=""
+       image_size=""
+    elif [[ $remote_host = ibms ]]
+    then
+       network_to_use="# @ network.mpi = csss,shared,us"
+    elif [[ $remote_host = ibmy ]]
+    then
+       consumable_memory=""
+       network_to_use=""
     fi
     cat > $job_to_send << %%END%%
+#!/bin/ksh
+# @ shell = /bin/ksh
+$execute_in_shell
+$use_shell
 # @ job_type = parallel
 …
 # @ output = $remote_dayfile
 # @ error = $remote_dayfile
+# @ image_size = 50
+$image_size
 $class
 $environment
 …
        cat >> $job_to_send << %%END%%
 # @ node = $nodes
 # @ tasks_per_node = $tasks_per_node
+# @ tasks_per_node = $processes_per_node
 # @ node_usage = $node_usage
 # @ queue
 …
 #PBS -A $project_account
 #PBS -l walltime=$timestring
 #PBS -l nodes=${nodes}:ppn=$tasks_per_node
+#PBS -l nodes=${nodes}:ppn=$processes_per_node
 #PBS -l pmem=${memory}mb
 #PBS -m abe
 …
 #PBS -N $job_name
 #PBS -l walltime=$timestring
 #PBS -l nodes=$nodes:ppn=${tasks_per_node}
+#PBS -l nodes=$nodes:ppn=${processes_per_node}
 #PBS -l naccesspolicy=$node_usage
 #PBS -o $remote_dayfile
 …
 $email_directive
+. /usr/share/modules/init/bash
+eval \`/sw/swdist/bin/modulesinit\`
+#. /usr/share/modules/init/bash
 $module_calls
 …
 $email_directive
+. /usr/share/modules/init/bash
+eval \`/sw/swdist/bin/modulesinit\`
+#. /usr/share/modules/init/bash
 $module_calls
 …
 #PBS -l walltime=$timestring
 #PBS -l mppwidth=${numprocs}
 #PBS -l mppnppn=${tasks_per_node}
+#PBS -l mppnppn=${processes_per_node}
 #PBS -m abe
 #PBS -o $remote_dayfile
 …
 #PBS -l walltime=$timestring
 #PBS -l mppwidth=${numprocs}
 #PBS -l mppnppn=${tasks_per_node}
+#PBS -l mppnppn=${processes_per_node}
 #PBS -m abe
 #PBS -o $remote_dayfile
 …
        cat > $job_to_send << %%END%%
 #!/bin/ksh
 #PBS -l cpunum_prc=$tasks_per_node,cputim_job=$cputime
+#PBS -l cpunum_prc=$processes_per_node,cputim_job=$cputime
 #PBS -l ${qsubmem}=${Memory}gb
 #PBS -b $nodes
 …
        cat > $job_to_send << %%END%%
 #!/bin/ksh
 #PBS -l cpunum_prc=$tasks_per_node,cputim_job=$cputime
+#PBS -l cpunum_prc=$processes_per_node,cputim_job=$cputime
 #PBS -l ${qsubmem}=${Memory}gb
 #PBS -o $remote_dayfile
 …
     if [[ $(echo $remote_host | cut -c1-3) = ibm  ||  $(echo $remote_host | cut -c1-5) = lcsgi  ||  $(echo $remote_host | cut -c1-3) = nec  ||  $remote_host = lctit ]]
     then
        if [[ $remote_host = ibmb  ||  $remote_host = ibmh ]]
+       if [[ $remote_host = ibmh ]]
        then
           return_queue=c1
+       elif [[ $remote_host = ibmku ]]
+       then
+          return_queue=sdbg2
        elif [[ $remote_host = ibms ]]
        then
 …
        then
+          echo "echo \"#!/bin/ksh\" >> scpjob.$kennung"               >>  $job_to_send
+          if [[ $remote_host = ibmku ]]
+          then
+             echo "echo \"#!/usr/bin/ksh\" >> scpjob.$kennung"            >>  $job_to_send
+             echo "echo \"# @ shell = /usr/bin/ksh\" >> scpjob.$kennung"  >>  $job_to_send
+          else
+             echo "echo \"#!/bin/ksh\" >> scpjob.$kennung"                >>  $job_to_send
+          fi
           echo "echo \"# @ job_type = serial\" >> scpjob.$kennung"    >>  $job_to_send
           echo "echo \"# @ job_name = transfer\" >> scpjob.$kennung"  >>  $job_to_send
 …
           echo "echo \"set -x\" >> scpjob.$kennung"                   >>  $job_to_send
           echo "echo \"batch_scp  -d  -w 10  -u $local_user  $local_addres  ${job_catalog}/$remote_dayfile  \\\"$job_catalog\\\"  $local_dayfile\" >> scpjob.$kennung"  >>  $job_to_send
+          if [[ $remote_host = ibmku ]]
+          then
+             echo "echo \"rm  scpjob.$kennung\" >> scpjob.$kennung"   >>  $job_to_send
+          fi
           echo "echo \"exit\" >> scpjob.$kennung"                     >>  $job_to_send
 …
           echo "qsub  scpjob.$kennung"          >>  $job_to_send
        fi
+       echo "rm  scpjob.$kennung"               >>  $job_to_send
+       if [[ $remote_host != ibmku ]]
+       then
+          echo "rm  scpjob.$kennung"            >>  $job_to_send
+       fi
        if [[ $remote_host = nech ]]
        then
 …
     echo "exit"      >>  $job_to_send
  fi
  if [[ $remote_host = lctit ]]
+ if [[ $remote_host = lctit  ||  $remote_host = ibmku ]]
  then
     echo " "                               >>  $job_to_send
 …
              printf "\n >>> submit with HLRN qos-feature hiprio...\n"
              ssh  $remote_addres  -l $remote_user  "cd $job_catalog; $submcom -l qos=hiprio $job_on_remhost; rm $job_on_remhost"
+          elif [[ $remote_host = ibmku ]]
+          then
+             ssh  $remote_addres  -l $remote_user  "cd $job_catalog; $submcom $job_on_remhost"
           else
              ssh  $remote_addres  -l $remote_user  "cd $job_catalog; $submcom $job_on_remhost; rm $job_on_remhost"
           fi
        else
              # TIT ERLAUBT NUR DIE AUSFï¿œHRUNG GANZ BESTIMMTER KOMMANDOS
+             # TIT ERLAUBT NUR DIE AUSFUEHRUNG GANZ BESTIMMTER KOMMANDOS
              # MIT SSH, DESHALB AUFRUF PER PIPE
              # UEBERGANGSWEISE CHECK, OB N1GE ENVIRONMENT WIRKLICH VERFUEGBAR
 …
           qsub  $job_on_remhost
        fi
+          # JOBFILE DARF AUF LCTIT NICHT GELOESCHT WERDEN!! GESCHIEHT ERST AM JOBENDE
+       [[ $local_host != lctit ]]  &&  rm  $job_on_remhost
+          # Jobfile must not be deleted on lctit/ibmku!! This will be done
+          # only at the end of the job.
+       if [[ $local_host != lctit  &&  $local_host != ibmku ]]
+       then
+          rm  $job_on_remhost
+       fi
        cd  -  > /dev/null
     fi

palm/trunk/SOURCE/advec_particles.f90

-                      r559
+                      r622
 ! Current revisions:
 ! -----------------
+! optional barriers included in order to speed up collective operations
 ! TEST: PRINT statements on unit 9 (commented out)
+!
 …
+!
 !--       Compute total sum from local sums
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,1,0), sums(nzb,1), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,2,0), sums(nzb,2), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
 …
+!
 !--       Compute total sum from local sums
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,8,0), sums(nzb,8), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,30,0), sums(nzb,30), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,31,0), sums(nzb,31), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,32,0), sums(nzb,32), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
 …
 !--    and set the switch corespondingly
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( dt_3d_reached_l, dt_3d_reached, 1, MPI_LOGICAL, &
                            MPI_LAND, comm2d, ierr )

palm/trunk/SOURCE/advec_s_bc.f90

-                      r392
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
     ENDDO
 #if defined( __parallel )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( fmax_l, fmax, 2, MPI_REAL, MPI_MAX, comm2d, ierr )
 #else
 …
     ENDDO
 #if defined( __parallel )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( fmax_l, fmax, 2, MPI_REAL, MPI_MAX, comm2d, ierr )
 #else
 …
     ENDDO
 #if defined( __parallel )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( fmax_l, fmax, 2, MPI_REAL, MPI_MAX, comm2d, ierr )
 #else

palm/trunk/SOURCE/buoyancy.f90

-                      r516
+                      r622
 ! Currrent revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,pr,0), sums(nzb,pr), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )

palm/trunk/SOURCE/check_for_restart.f90

-                      r392
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 !-- Make a logical OR for all processes. Stop the model run if at least
 !-- one processor has reached the time limit.
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( terminate_run_l, terminate_run, 1, MPI_LOGICAL, &
                         MPI_LOR, comm2d, ierr )

palm/trunk/SOURCE/cpu_statistics.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! output of handling of collective operations
+!
 ! Former revisions:
 …
+!
+!--    Output handling of collective operations
+       IF ( collective_wait )  THEN
+          WRITE ( 18, 103 )
+       ELSE
+          WRITE ( 18, 104 )
+       ENDIF
+!
 !--    Empty lines in order to create a gap to the results of the model
 !--    continuation runs
        WRITE ( 18, 103 )
+       WRITE ( 18, 105 )
+!
 …
 FORMAT (A20,2X,F9.3,2X,F7.2,1X,I7,3(1X,F9.3))
+FORMAT (//)
+FORMAT (/'Barriers are set in front of collective operations')
+FORMAT (/'No barriers are set in front of collective operations')
+FORMAT (//)
  END SUBROUTINE cpu_statistics

palm/trunk/SOURCE/data_output_2d.f90

-                      r559
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
+!
 !--                   Now do the averaging over all PEs along y
+                      IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
                       CALL MPI_ALLREDUCE( local_2d_l(nxl-1,nzb),              &
                                           local_2d(nxl-1,nzb), ngp, MPI_REAL, &
 …
 !--                      Distribute data over all PEs along y
                          ngp = ( nxr-nxl+3 ) * ( nzt-nzb+2 )
+                         IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
                          CALL MPI_ALLREDUCE( local_2d_l(nxl-1,nzb),            &
                                              local_2d(nxl-1,nzb), ngp,         &
 …
+!
 !--                   Now do the averaging over all PEs along x
+                      IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
                       CALL MPI_ALLREDUCE( local_2d_l(nys-1,nzb),              &
                                           local_2d(nys-1,nzb), ngp, MPI_REAL, &
 …
 !--                      Distribute data over all PEs along x
                          ngp = ( nyn-nys+3 ) * ( nzt-nzb+2 )
+                         IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
                          CALL MPI_ALLREDUCE( local_2d_l(nys-1,nzb),            &
                                              local_2d(nys-1,nzb), ngp,         &

palm/trunk/SOURCE/data_output_ptseries.f90

-                      r392
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
     inum = number_of_particle_groups + 1
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( pts_value_l(0,1), pts_value(0,1), 14*inum, MPI_REAL, &
                         MPI_SUM, comm2d, ierr )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( pts_value_l(0,15), pts_value(0,15), inum, MPI_REAL, &
                         MPI_MAX, comm2d, ierr )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( pts_value_l(0,16), pts_value(0,16), inum, MPI_REAL, &
                         MPI_MIN, comm2d, ierr )
 …
     inum = number_of_particle_groups + 1
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( pts_value_l(0,17), pts_value(0,17), inum*10, MPI_REAL, &
                         MPI_SUM, comm2d, ierr )

palm/trunk/SOURCE/flow_statistics.f90

-                      r550
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
+!
 !--    Compute total sum from local sums
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( sums_l(nzb,1,0), sums(nzb,1), nzt+2-nzb, MPI_REAL, &
                            MPI_SUM, comm2d, ierr )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( sums_l(nzb,2,0), sums(nzb,2), nzt+2-nzb, MPI_REAL, &
                            MPI_SUM, comm2d, ierr )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( sums_l(nzb,4,0), sums(nzb,4), nzt+2-nzb, MPI_REAL, &
                            MPI_SUM, comm2d, ierr )
        IF ( ocean )  THEN
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,23,0), sums(nzb,23), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
        ENDIF
        IF ( humidity ) THEN
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,44,0), sums(nzb,44), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,41,0), sums(nzb,41), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
           IF ( cloud_physics ) THEN
+             IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
              CALL MPI_ALLREDUCE( sums_l(nzb,42,0), sums(nzb,42), nzt+2-nzb, &
                                  MPI_REAL, MPI_SUM, comm2d, ierr )
+             IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
              CALL MPI_ALLREDUCE( sums_l(nzb,43,0), sums(nzb,43), nzt+2-nzb, &
                                  MPI_REAL, MPI_SUM, comm2d, ierr )
 …
        IF ( passive_scalar )  THEN
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( sums_l(nzb,41,0), sums(nzb,41), nzt+2-nzb, &
                               MPI_REAL, MPI_SUM, comm2d, ierr )
 …
+!
 !--    Compute total sum from local sums
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( sums_l(nzb,1,0), sums(nzb,1), ngp_sums, MPI_REAL, &
                            MPI_SUM, comm2d, ierr )

palm/trunk/SOURCE/global_min_max.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 #if defined( __parallel )
        fmin_l(2)  = myid
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( fmin_l, fmin, 1, MPI_2REAL, MPI_MINLOC, comm2d, ierr )
 …
 #if defined( __parallel )
        fmax_l(2)  = myid
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( fmax_l, fmax, 1, MPI_2REAL, MPI_MAXLOC, comm2d, ierr )
 …
 #if defined( __parallel )
        fmax_l(2)  = myid
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( fmax_l, fmax, 1, MPI_2REAL, MPI_MAXLOC, comm2d, &
                            ierr )

palm/trunk/SOURCE/inflow_turbulence.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
+!
 !-- Now, averaging over all PEs
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( avpr_l(nzb,1), avpr(nzb,1), ngp_pr, MPI_REAL, MPI_SUM, &
                         comm2d, ierr )
 …
 #if defined( __parallel )
+!       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
 !       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
 !                           MPI_SUM, comm1dy, ierr )

palm/trunk/SOURCE/init_3d_model.f90

-                      r561
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( volume_flow_initial_l(1), volume_flow_initial(1),&
 , MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( volume_flow_area_l(1), volume_flow_area(1),      &
 , MPI_REAL, MPI_SUM, comm2d, ierr )
 …
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( volume_flow_initial_l(1), volume_flow_initial(1),&
 , MPI_REAL, MPI_SUM, comm2d, ierr )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( volume_flow_area_l(1), volume_flow_area(1),      &
 , MPI_REAL, MPI_SUM, comm2d, ierr )
 …
     sr = statistic_regions + 1
 #if defined( __parallel )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( ngp_2dh_l(0), ngp_2dh(0), sr, MPI_INTEGER, MPI_SUM,   &
                         comm2d, ierr )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( ngp_2dh_outer_l(0,0), ngp_2dh_outer(0,0), (nz+2)*sr,  &
                         MPI_INTEGER, MPI_SUM, comm2d, ierr )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( ngp_2dh_s_inner_l(0,0), ngp_2dh_s_inner(0,0),         &
                         (nz+2)*sr, MPI_INTEGER, MPI_SUM, comm2d, ierr )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( ngp_3d_inner_l(0), ngp_3d_inner_tmp(0), sr, MPI_REAL, &
                         MPI_SUM, comm2d, ierr )

palm/trunk/SOURCE/init_particles.f90

-                      r392
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 !--    Calculate the number of particles and tails of the total domain
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( number_of_particles, total_number_of_particles, 1, &
                            MPI_INTEGER, MPI_SUM, comm2d, ierr )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( number_of_tails, total_number_of_tails, 1, &
                            MPI_INTEGER, MPI_SUM, comm2d, ierr )
 …
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( uniform_particles_l, uniform_particles, 1, &
                               MPI_LOGICAL, MPI_LAND, comm2d, ierr )

palm/trunk/SOURCE/init_pegrid.f90

-                      r482
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
 ! ATTENTION: nnz_x undefined problem still has to be solved!!!!!!!!
 ! TEST OUTPUT (TO BE REMOVED) logging mpi2 ierr values
 …
        CALL message( 'init_pegrid', 'PA0223', 1, 2, 0, 6, 0 )
     ENDIF
+!
+!-- For communication speedup, set barriers in front of collective
+!-- communications by default on SGI-type systems
+    IF ( host(3:5) == 'sgi' )  collective_wait = .TRUE.
+!
 …
        id_inflow_l = 0
     ENDIF
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( id_inflow_l, id_inflow, 1, MPI_INTEGER, MPI_SUM, &
                         comm1dx, ierr )
 …
 !-- Broadcast the id of the recycling plane
 !-- WARNING: needs to be adjusted in case of inflows other than from left side!
     IF ( ( recycling_width / dx ) >= nxl  .AND.  ( recycling_width / dx ) <= nxr ) &
     THEN
+    IF ( ( recycling_width / dx ) >= nxl  .AND. &
+         ( recycling_width / dx ) <= nxr )  THEN
        id_recycling_l = myidx
     ELSE
        id_recycling_l = 0
     ENDIF
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( id_recycling_l, id_recycling, 1, MPI_INTEGER, MPI_SUM, &
                         comm1dx, ierr )

palm/trunk/SOURCE/init_slope.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
              ENDDO
           ENDDO
       ENDDO
+       ENDDO
 #if defined( __parallel )
+      CALL MPI_ALLREDUCE( pt_init_local, pt_init, nzt+2-nzb, MPI_REAL, &
+                           MPI_SUM, comm2d, ierr )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
+       CALL MPI_ALLREDUCE( pt_init_local, pt_init, nzt+2-nzb, MPI_REAL, &
+                            MPI_SUM, comm2d, ierr )
 #else
       pt_init = pt_init_local
+       pt_init = pt_init_local
 #endif
       pt_init = pt_init / ngp_2dh(0)
       DEALLOCATE( pt_init_local )
+       pt_init = pt_init / ngp_2dh(0)
+       DEALLOCATE( pt_init_local )
    ENDIF
+    ENDIF
  END SUBROUTINE init_slope

palm/trunk/SOURCE/modules.f90

-                      r601
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! +collective_wait in pegrid
+!
 ! Former revisions:
 …
     INTEGER, DIMENSION(:), ALLOCATABLE ::  ngp_yz, type_xz
     LOGICAL ::  reorder = .TRUE.
+    LOGICAL ::  collective_wait = .FALSE., reorder = .TRUE.
     LOGICAL, DIMENSION(2) ::  cyclic = (/ .TRUE. , .TRUE. /), &
                               remain_dims

palm/trunk/SOURCE/parin.f90

-                      r601
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! +collective_wait in inipar
+!
 ! Former revisions:
 …
              canyon_width_x, canyon_width_y, canyon_wall_left, &
              canyon_wall_south, cfl_factor, cloud_droplets, cloud_physics, &
              conserve_volume_flow, conserve_volume_flow_mode, &
+             collective_wait, conserve_volume_flow, conserve_volume_flow_mode, &
              coupling_start_time, cthf, cut_spline_overshoot, &
              cycle_mg, damp_level_1d, dissipation_1d, dp_external, dp_level_b, &

palm/trunk/SOURCE/poisfft.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 !--    Transpose array
        CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLTOALL( work(nxl,1,0),      sendrecvcount_xy, MPI_REAL, &
                           f_out(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
 …
 !--    Transpose array
        CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLTOALL( f_in(1,1,nys_x,1), sendrecvcount_xy, MPI_REAL, &
                           work(nxl,1,0),     sendrecvcount_xy, MPI_REAL, &
 …
 !--    Transpose array
        CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLTOALL( work(nys,1,0),      sendrecvcount_xy, MPI_REAL, &
                           f_out(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
 …
 !--    Transpose array
        CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLTOALL( f_in(1,1,nxl_y,1), sendrecvcount_xy, MPI_REAL, &
                           work(nys,1,0),     sendrecvcount_xy, MPI_REAL, &

palm/trunk/SOURCE/poismg.f90

-                      r392
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
           maxerror = SUM( r(nzb+1:nzt,nys:nyn,nxl:nxr)**2 )
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( maxerror, residual_norm, 1, MPI_REAL, MPI_SUM, &
                               comm2d, ierr)

palm/trunk/SOURCE/pres.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
                            MPI_SUM, comm1dy, ierr )
 …
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
                            MPI_SUM, comm1dx, ierr )
 …
           ENDDO
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, comm2d, &
                               ierr )
 …
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
                            MPI_SUM, comm2d, ierr )

palm/trunk/SOURCE/read_var_list.f90

-                      r601
+                      r622
 !------------------------------------------------------------------------------!
 ! Current revisions:
 ! -----------------_
+!
+! ------------------
+! +collective_wait
+!
 ! Former revisions:
 …
           CASE ( 'cloud_physics' )
              READ ( 13 )  cloud_physics
+          CASE ( 'collective_wait' )
+             READ ( 13 )  collective_wait
           CASE ( 'conserve_volume_flow' )
              READ ( 13 )  conserve_volume_flow

palm/trunk/SOURCE/set_particle_attributes.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( sums_l(nzb,4,0), sums(nzb,4), nzt+2-nzb, &
                            MPI_REAL, MPI_SUM, comm2d, ierr )

palm/trunk/SOURCE/timestep.f90

-                      r392
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
           uv_gtrans_l = uv_gtrans_l / REAL( (nxr-nxl+1)*(nyn-nys+1)*(nzt-nzb) )
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( uv_gtrans_l, uv_gtrans, 2, MPI_REAL, MPI_SUM, &
                               comm2d, ierr )
 …
 !$OMP END PARALLEL
 #if defined( __parallel )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLREDUCE( dt_diff_l, dt_diff, 1, MPI_REAL, MPI_MIN, comm2d, &
                            ierr )
 …
 !--       Determine the global minumum
 #if defined( __parallel )
+          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
           CALL MPI_ALLREDUCE( dt_plant_canopy_l, dt_plant_canopy, 1, MPI_REAL,  &
                               MPI_MIN, comm2d, ierr )

palm/trunk/SOURCE/transpose.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 !-- Transpose array
     CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
                        work(1),              sendrecvcount_xy, MPI_REAL, &
 …
 !--    Transpose array
        CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLTOALL( work(1),          sendrecvcount_zx, MPI_REAL, &
                           f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
 …
 !-- Transpose array
     CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLTOALL( work(1),              sendrecvcount_xy, MPI_REAL, &
                        f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, &
 …
 !-- Transpose array
     CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, &
                        work(1),          sendrecvcount_xy, MPI_REAL, &
 …
 !-- Transpose array
     CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
                        work(1),              sendrecvcount_yz, MPI_REAL, &
 …
 !-- Transpose array
     CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &
                        work(1),          sendrecvcount_zx, MPI_REAL, &
 …
 !--    Transpose array
        CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
        CALL MPI_ALLTOALL( work(1),              sendrecvcount_yz, MPI_REAL, &
                           f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, &
 …
 !-- Transpose array
     CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, &
                        work(1),          sendrecvcount_zyd, MPI_REAL, &

palm/trunk/SOURCE/user_statistics.f90

-                      r556
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 !--           assign ts_value(dots_num_palm+1:,sr) = ts_value_l directly.
 !#if defined( __parallel )
+!       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
 !       CALL MPI_ALLREDUCE( ts_value_l(dots_num_palm+1),                       &
 !                           ts_value(dots_num_palm+1,sr),                      &

palm/trunk/SOURCE/write_compressed.f90

-                      r484
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! optional barriers included in order to speed up collective operations
+!
 ! Former revisions:
 …
 #if defined( __parallel )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( ifieldmax_l, ifieldmax, 1, MPI_INTEGER, MPI_MAX, &
                         comm2d, ierr )
+    IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
     CALL MPI_ALLREDUCE( ifieldmin_l, ifieldmin, 1, MPI_INTEGER, MPI_MIN, &
                         comm2d, ierr )

palm/trunk/SOURCE/write_var_list.f90

-                      r601
+                      r622
 ! Current revisions:
 ! -----------------
+!
+! +collective_wait
+!
 ! Former revisions:
 …
     WRITE ( 14 )  'cloud_physics                 '
     WRITE ( 14 )  cloud_physics
+    WRITE ( 14 )  'collective_wait               '
+    WRITE ( 14 )  collective_wait
     WRITE ( 14 )  'conserve_volume_flow          '
     WRITE ( 14 )  conserve_volume_flow

Context Navigation

Legend:

palm/trunk/SCRIPTS/mbuild

palm/trunk/SCRIPTS/mrun

palm/trunk/SCRIPTS/subjob

palm/trunk/SOURCE/advec_particles.f90

palm/trunk/SOURCE/advec_s_bc.f90

palm/trunk/SOURCE/buoyancy.f90

palm/trunk/SOURCE/check_for_restart.f90

palm/trunk/SOURCE/cpu_statistics.f90

palm/trunk/SOURCE/data_output_2d.f90

palm/trunk/SOURCE/data_output_ptseries.f90

palm/trunk/SOURCE/flow_statistics.f90

palm/trunk/SOURCE/global_min_max.f90

palm/trunk/SOURCE/inflow_turbulence.f90

palm/trunk/SOURCE/init_3d_model.f90

palm/trunk/SOURCE/init_particles.f90

palm/trunk/SOURCE/init_pegrid.f90

palm/trunk/SOURCE/init_slope.f90

palm/trunk/SOURCE/modules.f90

palm/trunk/SOURCE/parin.f90

palm/trunk/SOURCE/poisfft.f90

palm/trunk/SOURCE/poismg.f90

palm/trunk/SOURCE/pres.f90

palm/trunk/SOURCE/read_var_list.f90

palm/trunk/SOURCE/set_particle_attributes.f90

palm/trunk/SOURCE/timestep.f90

palm/trunk/SOURCE/transpose.f90

palm/trunk/SOURCE/user_statistics.f90

palm/trunk/SOURCE/write_compressed.f90

palm/trunk/SOURCE/write_var_list.f90

Download in other formats: