#!/bin/bash # mrun - script for running PALM jobs #--------------------------------------------------------------------------------# # This file is part of PALM. # # PALM is free software: you can redistribute it and/or modify it under the terms # of the GNU General Public License as published by the Free Software Foundation, # either version 3 of the License, or (at your option) any later version. # # PALM is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # PALM. If not, see . # # Copyright 1997-2015 Leibniz Universitaet Hannover #--------------------------------------------------------------------------------# # # Current revisions: # ------------------ # # # Former revisions: # ----------------- # $Id: mrun 2365 2017-08-21 14:59:59Z Giersch $ # Added lckea & lckeam. KIT/IMK-IFU Garmisch cluster. LRZ (SadiqHuq) # Vertical grid nesting: set vnested_mode. -N Procs for fine and coarse grid. # # 2303 2017-07-04 12:26:18Z raasch # bugfix: setting default value for write_binary # # 2298 2017-06-29 09:28:18Z raasch # write_binary in ENVPAR has type LOGICAL now, # MPI2 coupling removed # # 2297 2017-06-28 14:35:57Z scharf # adjustments for using lcgeohu (cirrus @ HUB) # cpp_opts removed # # 2290 2017-06-19 11:03:06Z raasch # further test output removed # # 2286 2017-06-15 13:31:57Z raasch # test output for cycle number removed # # 2268 2017-06-09 10:34:09Z raasch # bugfix for calculating cycle numbers for output files in r2261 # # 2262 2017-06-08 14:52:23Z raasch # bugfix for r2261 # # 2261 2017-06-08 14:25:57Z raasch # unified cycle numbers for output files are used, # paths and filenames are allowed to contain arbitrary numbers of dots ".", # creation of file OUTPUT_FILE_CONNECTIONS removed, # archive feature completely removed from the script, # nech related parts completely removed # # 2257 2017-06-07 14:07:05Z witha # adjustments for lceddy, removed lcflow-specific code # # 2186 2017-03-21 05:36:31Z raasch # # 2185 2017-03-21 05:09:29Z raasch # adjustment for lykyo*-hosts (PJM_ENVIRONMENT defines BATCH) # # 2148 2017-02-09 16:56:42Z scharf # changed allocation of resources for lcbullhh in srun command # # 2141 2017-02-06 10:19:32Z gronemeier # made check for existing files language independent # # 1944 2016-06-15 06:29:00Z raasch # adjustments for using HLRN ssh-keys # # 1940 2016-06-14 05:15:20Z raasch $ # adjustments for lckiaps # # 1866 2016-04-15 06:50:59Z raasch # adjusted for lcocean # # 1841 2016-04-07 19:14:06Z raasch # script now running under bash # # 1804 2016-04-05 16:30:18Z maronga # test: implementing an execute mechanism where the execute command is given in the # configuration file # ROPTS removed from execution commands # Warnings about missing optional files or user code changed to informative messages # Removed parameter file check # # 1755 2016-02-22 13:53:39Z maronga # Minor bugfix to last commit # # 1753 2016-02-22 13:49:49Z maronga # Bugfix: use random job number when PBS job nummer retrieval fails on lccray* # # 1622 2015-07-20 06:21:36Z heinze # bugfix for Mistral (lcbullhh) # # 1621 2015-07-17 11:39:33Z heinze # adjustments for Mistral at DKRZ Hamburg (lcbullhh) # # 1609 2015-07-03 15:37:58Z maronga # Modified the random number of the temporary working directory to match the # queuing system number on HLRN-III (lccray*). # # 1604 2015-06-23 15:48:03Z suehring # Enable coupled runs for lcmuk # # 1598 2015-05-29 06:45:40Z raasch # bugfix for creating hostfile and total number of MPI tasks if run in openmp-mode on one node # # 1549 2015-01-30 14:26:16Z suehring # Enable interactive debug sessions with allinea debugger # # 1547 2015-01-29 15:09:12Z witha # adjustments for ForWind computing cluster (lcflow) # # 1491 2014-11-12 08:12:57Z heinze # correct setting of batch_job allowed for ibm-machines # # 1468 2014-09-24 14:06:57Z maronga # Typo removed (addres->address) # Bugfix: returning files to IMUK via ssh did not work for lccrayh and lcycrayb # Added support for restart runs (remote) for lcxe6 # # 1442 2014-07-28 07:09:10Z raasch # default queues for HLRN-III (lccrayb/lccrayh) defined # # 1422 2014-07-02 10:01:25Z kanani # automatic restarts enabled for lccrayb, # adjustment of path to compilervars.sh at IMUK (see automatic restarts) # # 1420 2014-06-10 06:24:14Z raasch # -j1 option added to aprung command for lccrayf (CSC) # # 1402 2014-05-09 14:25:13Z raasch # batch_job added to envpar-NAMELIST # # 1357 2014-04-11 15:02:03Z witha # adjustments for lccrayh (automatic restarts on lccrayh outside of IMUK are now # possible) # # 1320 2014-03-20 08:40:49Z raasch # check namelist file set false by default # # 1304 2014-03-12 10:29:42Z raasch # ulimit option changed from -Ss to -s # # bugfix: missing "fi" in r1289 # # 1289 2014-03-04 07:12:34Z raasch # comments translated to English # necriam-, fimm-, ibmy-, and sgi-specific code removed # export of variables for palm and interpret_config removed # # 1281 2014-02-01 07:55:49Z raasch # rsync-copy restricted to Cray machines, since command is unavailable on some # other systems # # 1279 2014-01-28 12:10:14Z raasch # tasks_per_node must not be an integral divisor of numprocs any more. This was done # in order to remove annoying restrictions concerning the number of processors which # appear on machines with larger nodes (e.g. containing 24 cores). Now without this # restriction, one of the nodes will be filled with less than the given number of # tasks per node. A respective warning is given. # # 1274 2014-01-09 13:14:54Z heinze # adjustments for lccrayh # # 1272 2014-01-08 10:19:32Z witha # small adjustment for lcflow # # 1270 2013-12-16 11:05:01Z fricke # call of combine_plot_fields adjusted for lccrayb/lccrayh # # 1255 2013-11-07 14:43:35Z raasch # further adjustments for lccrayb remote access # # 1241 2013-10-30 11:36:58Z heinze # Enable use of nudging input and input of large scale forcing from # external files # # 1229 2013-09-20 06:55:19Z raasch # further adjustments for lccrayb # # 1224 2013-09-16 07:27:23Z raasch # first adjustments for lccrayb # # 1210 2013-08-14 10:58:20Z raasch # fftw support added # # 1201 2013-07-10 16:17:59Z witha # adjustments for Forwind cluster (lcflow) # # 1199 2013-07-05 14:52:22Z raasch # adjustments for CSC Helsinki (lccrayf), # executables for batch jobs can be created in advance, in order to avoid calling # the compiler within the batch job (only works if batch jobs are submitted on # local host) # # 1190 2013-06-25 09:39:21Z heinze # enable use of automatic restarts for ibmh # use of cluster/express queue enabled (ibmh) # # 1124 2013-04-09 15:46:52Z raasch # variable "memory" is exported via typeset option -x, because otherwise an unknown # side effect may lead to data loss while getopts is reading the script-option arguments # # 1122 2013-04-09 08:37:16Z heinze # Bugfix: change type of variable last_char # # 1119 2013-04-05 15:11:19Z raasch # Bugfix for setting -T option for subjob # # 1108 2013-03-05 07:03:32Z raasch # bugfix for coupled runs on lckyut/lckyuh # # 1106 2013-03-04 05:31:38Z raasch # --stdin argument for mpiexec on lckyuh # -y and -Y settings output to header # # 1103 2013-02-20 02:15:53Z raasch # default script runs again under ksh, because of unsolved problems with read # from stdin: when bash script is called from a ksh, message "read error: 0: # Resource temporarily unavailable" appears and script does not stop, # further bash compatibility adjustments, # shebang line replaced by /bin/bash when running jobs on lckyuh; no restarts # on lckyuh, but mrun does not terminate and issues a warning instead # # 1101 2013-02-17 10:20:21Z raasch # script now running under bash instead of ksh, which required small adjustments # (output formatting with printf instead "typeset -L/-R", print replaced by echo, # read from stdin), # cross compilername on lckyuh compute nodes replaced by real compiler name # # 1099 2013-02-10 01:47:43Z raasch # adjustments for Kyushu-University computing center (lckyuh - hayaka) # and for Forwind cluster (lcflow) # small further adjustments for lckyut # # 1094 2013-02-03 01:52:12Z raasch # explicit ssh/scp port can be set in config file with environment variable # scp_port. This port is handled to all ssh/scp/batch_scp calls. # decalpha parts (yonsei) removed # # 2013-02-02 07:06:13Z raasch # adjustments for Kyushu-University computing center (lckyut - tatara) # # 1083 2013-01-04 10:22:09Z maronga # bugfix in parameter file check (read %cpp_options was missing) # # 1069 2012-11-28 16:18:43Z maronga # bugfix: coupling mode was always set to mpi2, typos removed # # 1058 2012-11-21 07:00:35Z raasch # Intel inspector (inspxe) is given the number of PEs instead of the number of # nodes # # 1046 2012-11-09 14:38:45Z maronga # code put under GPL (PALM 3.9) # # 21/03/94 - Siggi - first version finished # 03/03/94 - Siggi - script development started # #--------------------------------------------------------------------------------# # mrun - script for running PALM jobs #--------------------------------------------------------------------------------# # DECLARATION OF VARIABLES AND THEIR DEFUALT VALUES set +o allexport # SUPPRESS EXPORT OF ALL VARIABLES, SINCE IN THE PAST THIS # LES TO PROBLEMS IN ROUTINES CALLED BY MRUN # (TOO MANY ARGUMENTS - PROBLEM) set +o noclobber # EXISTING FILES ARE ALLOWED TO BE OVERWRITTEN AddFilenames="" additional_conditions="" add_source_path="" afname="" check_namelist_files=false combine_plot_fields=true compiler_name="" cond1="" cond2="" config_file=.mrun.config coupled_dist="" cpp_options="" cpumax=0 cpurest=0 create_executable_for_batch=false delete_temporary_catalog=true do_batch=false do_compile=true do_remote=false do_stagein=true do_stageout=true do_trace=false email_notification="none" exclude="" executable="" execute_command="none" execution_error=false fftw_inc="" fftw_lib="" fftw_support=false fname=test fromhost="" global_revision="" group_number=none host="" host_file="" hp="" input_list="" interpreted_config_file="" job_catalog="~/job_queue" job_on_file="" keep_data_from_previous_run=false link_local_input=false link_local_output=false localhost_realname=$(hostname) local_dvrserver_running=.FALSE. locat=normal mainprog="" makefile="" max_par_io_str="" mc=$0 while [[ $(echo $mc | grep -c "/") != 0 ]] do mc=`echo $mc | cut -f2- -d"/"` done module_calls="" mrun_script_name=$mc vnested_dist="" vnested_mode="vnested_twi" netcdf_inc="" netcdf_lib="" netcdf_support=false node_usage=default numprocs="" numprocs_atmos=0 numprocs_ocean=0 numprocs_crse=0 numprocs_fine=0 OOPT="" openmp=false output_list="" package_list="" queue=none read_from_config="" restart_run=false if [[ `hostname` = rte10 ]] then return_address=133.5.185.60 echo "+++ WARNING: fixed return_address = $return_address is used !!!!!" elif [[ `hostname` = climate0 ]] then return_address=165.132.26.68 echo "+++ WARNING: fixed return_address = $return_address is used !!!!!" elif [[ `hostname` = "schultzl-Latitude-E6540" ]] then return_address="schultzl-Latitude-E6540" echo "+++ WARNING: fixed return_address = $return_address is used !!!!!" elif [[ `hostname` = urban00 ]] then return_address=147.46.30.151 echo "+++ WARNING: fixed return_address = $return_address is used !!!!!" else return_address=$(nslookup `hostname` 2>&1 | grep "Address:" | tail -1 | awk '{print $2}') fi return_password="" return_username=$LOGNAME remotecall=false remote_username="" run_coupled_model=false run_mode="" run_vnested_model=false dashes=" ----------------------------------------------------------------------------" silent=false source_list="" source_path=SOURCE tasks_per_node="" threads_per_task=1 tmpcreate=false tmp_data_catalog="" transfer_problems=false usern=$LOGNAME use_openmp=false version="MRUN 2.1 Rev$Rev: 2365 $" working_directory=`pwd` write_binary=false TOPT="" XOPT="" zeit=$( date | cut -c 12-19 ) typeset -i iec=0 iic=0 iin=0 ioc=0 iout=0 stagein_anz=0 stageout_anz=0 typeset -x -i memory=0 # HAS TO BE EXPORTED HERE, OTHERWISE AN UNKNOWN SIDE # SIDE EFFECT MAY CAUSE DATA LOSS WHEN GETOPTS IS READING THE # SCRIPT-OPTION ARGUMENTS typeset -i cputime i ii iia iii iio icycle inode ival jobges jobsek last_char_int maxcycle minuten nodes pes remaining_pes run_number sekunden tp1 # ERROR HANDLING IN CASE OF EXIT trap 'rm -rf $working_directory/tmp_mrun if [[ $locat != normal && $locat != control_c ]] then # CARRY OUT ERROR-COMMANDS GIVEN IN THE CONFIGURATION FILE (EC:) (( i = 0 )) while (( i < iec )) do (( i = i + 1 )) printf "\n *** Execution of ERROR-command:\n" printf " >>> ${err_command[$i]}\n" eval ${err_command[$i]} done if [[ -n $interpreted_config_file ]] then rm -rf $interpreted_config_file fi if [[ -n .mrun_environment ]] then rm -rf .mrun_environment fi if [[ $tmpcreate = true ]] then # printf "\n *** Contents of \"$TEMPDIR\":\n" # ls -al; cd [[ $delete_temporary_catalog = true ]] && rm -rf $TEMPDIR fi if [[ "$dvrserver_id" != "" ]] then echo "+++ killing dvrserver_id=$dvrserver_id" kill $dvrserver_id fi if [[ -f ~/job_queue/JOBINFO.$QSUB_REQID ]] then rm -rf ~/job_queue/JOBINFO.$QSUB_REQID fi printf "\n\n+++ MRUN killed \n\n" elif [[ $locat != control_c ]] then printf "\n\n --> all actions finished\n\n" printf " Bye, bye $usern !!\n\n" fi' exit # ACTIONS IN CASE OF TERMINAL-BREAK (CONTROL-C): trap 'rm -rf $working_directory/tmp_mrun rm -rf $working_directory/tmp_check_namelist_files [[ $tmpcreate = true ]] && (cd; rm -rf $TEMPDIR) if [[ -f ~/job_queue/JOBINFO.$QSUB_REQID ]] then rm -rf ~/job_queue/JOBINFO.$QSUB_REQID fi if [[ "$dvrserver_id" != "" ]] then echo "+++ killing dvrserver_id=$dvrserver_id" kill $dvrserver_id fi printf "\n+++ MRUN killed by \"^C\" \n\n" locat=control_c exit ' 2 # CHECK IF THE PATH FOR THE PALM BINARIES (SCRIPTS+UTILITY-PROGRAMS) HAS # BEEN SET if [[ "$PALM_BIN" = "" ]] then printf "\n +++ environment variable PALM_BIN has not been set" printf "\n please set it to the directory where the PALM scripts are located" locat=palm_bin; exit fi export PATH=$PALM_BIN:$PATH # READ SHELLSCRIPT-OPTIONS AND REBUILD THE MRUN-COMMAND STRING (MC), # WHICH WILL BE USED TO START RESTART-JOBS while getopts :a:bBc:Cd:Fg:G:h:H:i:kK:m:M:n:N:o:O:p:P:q:r:R:s:St:T:u:U:vw:xX:yY:zZ option do case $option in (a) afname=$OPTARG;; (b) do_batch=true; mc="$mc -b";; (B) delete_temporary_catalog=false; mc="$mc -B";; (c) config_file=$OPTARG; mc="$mc -c$OPTARG";; (C) restart_run=true; mc="$mc -C";; (d) fname=$OPTARG; mc="$mc -d$OPTARG";; (F) job_on_file="-D"; mc="$mc -F";; (g) group_number=$OPTARG; mc="$mc -g$OPTARG";; (G) global_revision=$OPTARG; mc="$mc -G'$OPTARG'";; (h) host=$OPTARG; mc="$mc -h$OPTARG";; (H) fromhost=$OPTARG; mc="$mc -H$OPTARG";; (i) input_list=$OPTARG; mc="$mc -i'$OPTARG'";; (k) keep_data_from_previous_run=true; mc="$mc -k";; (K) additional_conditions="$OPTARG"; mc="$mc -K'$OPTARG'";; (m) memory=$OPTARG; mc="$mc -m$OPTARG";; (M) makefile=$OPTARG; mc="$mc -M$OPTARG";; (n) node_usage=$OPTARG; mc="$mc -n$OPTARG";; (N) run_vnested_model=true; vnested_dist=$OPTARG; mc="$mc -N'$OPTARG'";; (o) output_list=$OPTARG; mc="$mc -o'$OPTARG'";; (O) use_openmp=true; threads_per_task=$OPTARG; mc="$mc -O$OPTARG";; (p) package_list=$OPTARG; mc="$mc -p'$OPTARG'";; (P) return_password=$OPTARG; mc="$mc -P$OPTARG";; (q) queue=$OPTARG; mc="$mc -q$OPTARG";; (r) run_mode=$OPTARG; mc="$mc -r'$OPTARG'";; (R) remotecall=true;return_address=$OPTARG; mc="$mc -R$OPTARG";; (s) source_list=$OPTARG; mc="$mc -s'$OPTARG'";; (S) read_from_config=false; mc="$mc -S";; (t) cpumax=$OPTARG; mc="$mc -t$OPTARG";; (T) mrun_tasks_per_node=$OPTARG; mc="$mc -T$OPTARG";; (u) remote_username=$OPTARG; mc="$mc -u$OPTARG";; (U) return_username=$OPTARG; mc="$mc -U$OPTARG";; (v) silent=true; mc="$mc -v";; (w) max_par_io_str=$OPTARG; mc="$mc -w$OPTARG";; (x) do_trace=true;set -x; mc="$mc -x";; (X) numprocs=$OPTARG; mc="$mc -X$OPTARG";; (y) ocean_file_appendix=true; mc="$mc -y";; (Y) run_coupled_model=true; coupled_dist=$OPTARG; mc="$mc -Y'$OPTARG'";; (z) check_namelist_files=false; mc="$mc -z";; (Z) combine_plot_fields=false; mc="$mc -Z";; (\?) printf "\n +++ unknown option $OPTARG \n" printf "\n --> type \"$0 ?\" for available options \n" locat=parameter;exit;; esac done # SKIP GIVEN OPTIONS TO READ POSITIONAL PARAMETER, IF GIVEN # CURRENTLY ONLY PARAMETER ? (TO OUTPUT A SHORT COMMAND INFO) IS ALLOWED (( to_shift = $OPTIND - 1 )) shift $to_shift # PRINT SHORT DESCRIPTION OF MRUN OPTIONS if [[ "$1" = "?" ]] then (printf "\n *** mrun can be called as follows:\n" printf "\n $mrun_script_name -b -c.. -d.. -f.. -F -h.. -i.. -I -K.. -m.. -o.. -p.. -r.. -R -s.. -t.. -T.. -v -x -X.. -y -Y.. -Z \n" printf "\n Description of available options:\n" printf "\n Option Description Default-Value" printf "\n -a base name of input files equiv. -d" printf "\n -b batch-job on local machine ---" printf "\n -B do not delete temporary directory at end ---" printf "\n -c configuration file .mrun.config" printf "\n -d base name of files attached to program test" printf "\n -F create remote job file only ---" printf "\n -h execution host $localhost_realname" printf "\n -i INPUT control list \"\" " printf "\n -k keep data from previous run" printf "\n -K additional conditions for controling" printf "\n usage of conditional code and" printf "\n env-variables in configuration file \"\" " printf "\n -m memory demand in MB (batch-jobs) 0 MB" printf "\n -M Makefile name Makefile" printf "\n -n node usage (shared/not_shared) depending on -h" printf "\n -N Vertical grid nesting. Number of" printf "\n PE for Coarse and Fine grid" printf "\n -o OUTPUT control list \"\" " printf "\n -O threads per openMP task ---" printf "\n -p software package list \"\" " printf "\n -q queue \"$queue\" " printf "\n -r run control list (combines -i -o) \"\" " printf "\n -s filenames of routines to be compiled \"\" " printf "\n must end with .f, .f90, .F, or .c !" printf "\n use \"..\" for more than one file and wildcards" printf "\n -s LM compiles all locally modified files" printf "\n -S config file interpreted by shellscript ---" printf "\n -t allowed cpu-time in seconds (batch) 0" printf "\n -T tasks per node depending on -h" printf "\n -u username on remote machine \"\" " printf "\n -v no prompt for confirmation ---" printf "\n -w maximum parallel io streams as given by -X" printf "\n -x tracing of mrun for debug purposes ---" printf "\n -X # of processors (on parallel machines) 1" printf "\n -y add appendix \"_O\" to all local output" printf "\n files (ocean precursor runs followed by" printf "\n coupled atmosphere-ocean runs) ---" printf "\n -Y run coupled model, \"#1 #2\" with" printf "\n #1 atmosphere and #2 ocean processors \"#/2 #/2\" depending on -X" printf "\n -Z skip combine_plot_fields at the end of " printf "\n the simulation ---" printf "\n " printf "\n Possible values of positional parameter :" printf "\n \"?\" - this outline \n\n") | more exit elif [[ "$1" != "" ]] then printf "\n +++ positional parameter $1 unknown \n" locat=parameter; exit fi # SHORT STARTING MESSAGE printf "\n*** $version " printf "\n will be executed. Please wait ..." # CHECK, IF CONFIGURATION FILE EXISTS if [[ ! -f $config_file ]] then printf "\n\n +++ configuration file: " printf "\n $config_file" printf "\n does not exist" locat=connect; exit fi # DETERMINE THE HOST-IDENTIFIER (localhost) FROM THE CONFIGURATION FILE line="" grep "%host_identifier" $config_file > tmp_mrun while read line do if [[ "$line" != "" && $(echo $line | cut -c1) != "#" ]] then HOSTNAME=`echo $line | cut -d" " -s -f2` host_identifier=`echo $line | cut -d" " -s -f3` if [[ $localhost_realname = $HOSTNAME ]] then localhost=$host_identifier break fi fi done < tmp_mrun if [[ "$localhost" = "" ]] then printf "\n\n +++ no host identifier found in configuration file \"$config_file\"" printf "\n for local host \"$localhost_realname\"." printf "\n Please add line" printf "\n \"\%host_identifier $localhost_realname \"" printf "\n to the configuration file." locat=localhost; exit fi # ??? THIS SHOULD BE DECRIBED IN THE MRUN DOCUMENTATION ??? # SET BASENAME OF THE INPUT-FILES TO THE GENERAL BASENAME (GIVEN BY OPTION -d), # IF IT HAS NOT BEEN SET BY THE USER DIFFERENTLY (USING OPTION -a) [[ "$afname" = "" ]] && afname=$fname # ADD RUN-FILE ACTIVATION STRINGS (GIVEN BY OPTION -r) TO THE # RESPECTIVE LISTS FOR ACTIVATING THE INPUT/OUTPUT FILE CONNECTION # STATEMENTS IN THE CONFIGURATION FILE if [[ "$run_mode" != "" ]] then input_list="$input_list $run_mode" output_list="$output_list $run_mode" fi # ??? is this (and the respective option -H) really required ??? # STORE HOSTNAME, FROM WHICH THE JOB IS STARTED, # BUT ONLY IF IT HASN'T BEEN ALREADY SET BY OPTION -H # (MRUN IS AUTOMATICALLY SETTING THIS OPTION IN THE MRUN-CALLS WITHIN # THOSE BATCH-JOBS, WHICH ARE CREATED BY MRUN ITSELF) if [[ "$fromhost" = "" ]] then fromhost=$localhost fi # CHECK, IF EXECUTION SHALL TO BE DONE ON THE REMOTE-MACHINE. # VALUE OF do_remote IS IMPORTANT FOR THE FILE CONNECTIONS. # IN CASE OF EXECUTION ON A REMOTE-MACHINE, A BATCH-JOB HAS # TO BE SUBMITTED (I.E. do_batch=true) if [[ -n $host && "$host" != $localhost ]] then do_batch=true do_remote=true case $host in (ibm|ibmh|ibmkisti|ibmku|ibms|lcbullhh|lccrayb|lccrayh|lccrayf|lceddy|lckyoto|lcocean|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb|lcgeohu) true;; (*) printf "\n" printf "\n +++ sorry: execution of batch jobs on remote host \"$host\"" printf "\n is not available" locat=nqs; (( iec = 0 )); exit;; esac else host=$localhost fi # EVALUATE ADDITIONAL CONDITIONS GIVEN BY OPTION -K if [[ -n $additional_conditions ]] then cond1=`echo $additional_conditions | cut -d" " -f1` cond2=`echo $additional_conditions | cut -d" " -s -f2` dummy=`echo $additional_conditions | cut -d" " -s -f3` if [[ -n $dummy ]] then printf "\n +++ more than 2 additional conditions given for Option \"-K\"" locat=options; exit fi block=_$cond1 [[ -n $cond2 ]] && block=${block}_$cond2 fi # EVALUATE MODEL COUPLING FEATURES (OPTION -Y) if [[ $run_coupled_model = true ]] then numprocs_atmos=`echo $coupled_dist | cut -d" " -s -f1` numprocs_ocean=`echo $coupled_dist | cut -d" " -s -f2` if (( $numprocs_ocean + $numprocs_atmos != $numprocs )) then printf "\n +++ number of processors does not fit to specification by \"-Y\"." printf "\n PEs (total) : $numprocs" printf "\n PEs (atmosphere): $numprocs_atmos" printf "\n PEs (ocean) : $numprocs_ocean" locat=coupling; exit fi fi # NESTING (-N) selected and vnesting_mode specified if [[ $run_vnested_model = true ]] then if [[ -n $vnested_dist ]] then numprocs_crse=`echo $vnested_dist | cut -d" " -s -f1` numprocs_fine=`echo $vnested_dist | cut -d" " -s -f2` if (( $numprocs_crse + $numprocs_fine != $numprocs )) then printf "\n +++ number of processors does not fit to specification by \"-N\"." printf "\n PEs (total) : $numprocs" printf "\n PEs (Coarse) : $numprocs_crse" printf "\n PEs (Fine) : $numprocs_fine" locat=vnesting; exit fi else printf "\n +++ "Specify PE for fine and coarse grid: -N "nCGPE nFGPE \"-N\"." locat=vnesting; exit fi vnested_dist=`echo "$numprocs_crse $numprocs_fine"` fi # SAVE VALUES OF MRUN-OPTIONS SICHERN IN ORDER TO OVERWRITE # THOSE VALUES GIVEN IN THE CONFIGURATION-FILE mrun_memory=$memory mrun_group_number=$group_number mrun_cpumax=$cpumax mrun_numprocs=$numprocs # READ AND EVALUATE THE CONFIGURATION-FILE FROM WITHIN THIS SHELLSCRIPT # (OPTION -S). THE DEFAULT IS USING THE ROUTINE <<<< if [[ "$read_from_config" = false ]] then [[ $silent = false ]] && printf "\n Reading the configuration file... " # READ THE CONFIGURATION FILE LINE BY LINE while read zeile do [[ $silent = false ]] && printf "." # FIRST REPLACE ENVIRONMENT-VARIABLES BY THEIR RESPECTIVE VALUES eval zeile=\"$zeile\" # INTERPRET THE LINE if [[ "$(echo $zeile)" = "" ]] then # EMPTY LINE, NO ACTION continue elif [[ "$(echo $zeile | cut -c1)" = "#" ]] then # LINE IS A COMMENT LINE true elif [[ "$(echo $zeile | cut -c1)" = "%" ]] then # LINE DEFINES AN ENVIRONMENT-VARIABLE zeile=$(echo $zeile | cut -c2-) var=`echo $zeile | cut -d" " -f1` value=`echo $zeile | cut -d" " -s -f2` for_host=`echo $zeile | cut -d" " -s -f3` for_cond1=`echo $zeile | cut -d" " -s -f4` for_cond2=`echo $zeile | cut -d" " -s -f5` if [[ "$for_host" = "" || ( "$for_host" = $host && "$for_cond1" = "$cond1" && "$for_cond2" = "$cond2" ) || $(echo "$input_list$output_list"|grep -c "$for_host") != 0 ]] then # REPLACE ":" BY " " IN COMPILER- CPP- OR LINKER-OPTIONS, # "::" IS REPLACED BY ":". value=`echo $value | sed 's/::/%DUM%/g' | sed 's/:/ /g' | sed 's/%DUM%/:/g'` # VALUE FROM THE CONFIGURATION-FILE IS ASSIGNED TO THE # ENVIRONMENT-VARIABLE, BUT ONLY IF NO VALUE HAS BEEN ALREADY # ASSIGNED WITHIN THIS SCRIPT (E.G. BY SCRIPT-OPTIONS). # NON-ASSIGNED VARIABLES HAVE VALUE "" OR 0 (IN CASE OF INTEGER). # HENCE THE GENERAL RULE IS: SCRIPT-OPTION OVERWRITES THE # CONFIGURATION-FILE. if [[ "$(eval echo \$$var)" = "" || "$(eval echo \$$var)" = "0" ]] then eval $var=\$value # TERMINAL OUTPUT OF ENVIRONMENT-VARIABLES, IF TRACEBACK IS SWITCHED on if [[ $do_trace = true ]] then printf "\n*** ENVIRONMENT-VARIABLE $var = $value" fi fi # IF AN ENVIRONMENT-VARIABLE DETERMINES THE HOST, THEN EVALUATE IT IMMEDIATELY: # VALUE OF do-remote IS REQUIRED FOR THE FILE CONNECTIONS (COPY OF I/O FILES). # IF EXECUTION IS SCHEDULED FOR A REMOTE-MASCHINE, A BATCH-JOB # MUST HAVE TO BE STARTED if [[ $var = host ]] then if [[ -n $host && "$host" != $localhost ]] then do_batch=true do_remote=true case $host in (ibm|ibmh|ibmkisti|ibmku|ibms|lcbullhh|lccrayb|lccrayh|lccrayf|lceddy|lckyoto|lcocean|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb|lcgeohu) true;; (*) printf "\n +++ sorry: execution of batch jobs on remote host \"$host\"" printf "\n is not available" locat=nqs; exit;; esac else host=$localhost fi fi # USER-DEFINED ENVIRONMENT VARIABLES MUST BE EXPORTED, # BECAUSE THEY MAY BE REQUIRED IN OTHER SCRIPTS CALLED # BY MRUN (E.G. subjob) export $var fi elif [[ "$(echo $zeile | cut -c1-3)" = "EC:" ]] then # LINE DEFINES ERROR-COMMAND (( iec = iec + 1 )) zeile=$(echo $zeile | cut -c4-) err_command[$iec]="$zeile" elif [[ "$(echo $zeile | cut -c1-3)" = "IC:" ]] then # LINE DEFINES INPUT-COMMAND (( iic = iic + 1 )) zeile=$(echo $zeile | cut -c4-) in_command[$iic]="$zeile" elif [[ "$(echo $zeile | cut -c1-3)" = "OC:" ]] then # LINE DEFINES OUTPUT-COMMAND (( ioc = ioc + 1 )) zeile=$(echo $zeile | cut -c4-) out_command[$ioc]="$zeile" else # LINE DEFINES FILE CONNECTION. READ THE FILE ATTRIBUTES. # s2a: in/out - field # s2b: loc - field (optional) # s2c: tr/ar - field (optional) s1=`echo $zeile | cut -d" " -f1` s2=`echo $zeile | cut -d" " -s -f2` s2a=$(echo $s2 | cut -d":" -f1) if [[ $(echo $s2 | grep -c ":") = 0 ]] then s2b="" s2c="" else s2b=`echo $s2 | cut -d":" -f2 | sed 's/:/ /g'` s2c=`echo $s2 | cut -d":" -s -f3 | sed 's/:/ /g'` fi s3=`echo $zeile | cut -d" " -f3` s4=`echo $zeile | cut -d" " -s -f4` s5=`echo $zeile | cut -d" " -s -f5` s6=`echo $zeile | cut -d" " -s -f6` # STORE FILE CONNECTION, IF ACTIVATED BY ACTIVATION-STRING FROM # INPUT- OR OUTPUT-LIST. # VARIABLE S3 MAY CONTAIN A LIST OF ACTIVATION STRINGS (FIELD-SEPERATOR ":"). # IF EXECUTION IS SCHEDULED FOR A REMOTE-MACHINE AND THE FILE IS ONLY # LOCALLY REQUIRED ON THAT MACHINE (I.E. s2b = loc), THE FILE CONNECTION # IS NOT CHECKED AND STORED. IFSALT="$IFS"; IFS="$IFS:" if [[ "$s2a" = in && ! ( $do_remote = true && ( "$s2b" = loc || "$s2b" = locopt ) ) ]] then found=false for actual in $input_list do for formal in $s3 do [[ $actual = $formal || "$formal" = "-" ]] && found=true done done if [[ $found = true ]] then (( iin = iin + 1 )) localin[$iin]=$s1; transin[$iin]=$s2b; actionin[$iin]=$s2c; typein[$iin]=$s3; pathin[$iin]=$s4; endin[$iin]=$s5; extin[$iin]=$s6 fi elif [[ "$s2a" = out && ! ( $do_remote = true && "$s2b" = loc ) ]] then found=false for actual in $output_list do for formal in $s3 do [[ $actual = $formal || "$formal" = "-" ]] && found=true done done if [[ $found = true ]] then (( iout = iout + 1 )) localout[$iout]=$s1; actionout[$iout]=$s2c; typeout[$iout]=$s3; pathout[$iout]=$s4; endout[$iout]=$s5; extout[$iout]=$s6 fi elif [[ "$s2a" != in && "$s2a" != out ]] then printf "\n +++ I/O-attribute in configuration file $config_file has the invalid" printf "\n value \"$s2\". Only \"in\" and \"out\" are allowed!" locat=connect; exit fi IFS="$IFSALT" fi done < $config_file else # EVALUATE THE CONFIGURATION-FILE BY FORTRAN-PROGRAM [[ $silent = false ]] && printf "..." interpreted_config_file=.icf.$RANDOM # PROVIDE VALUES OF ENVIRONMENT-VARIABLE FOR interpret_config VIA NAMELIST-FILE cat > .mrun_environment << EOF &mrun_environment cond1 = '$cond1', cond2 = '$cond2', config_file = '$config_file', do_remote = '$do_remote', do_trace = '$do_trace', host = '$host', input_list = '$input_list', icf = '$interpreted_config_file', localhost = '$localhost', output_list = '$output_list' / EOF if [[ "$host" != $localhost ]] then # REMOTE JOB FROM LOCAL HOST: JUST TAKE THE FIRST EXECUTABLE FOUND interpret_config_executable=`ls -1 ${PALM_BIN}/interpret_config*.x 2>/dev/null` if [[ $? != 0 ]] then printf "\n\n +++ no interpret_config found" printf "\n run \"mbuild -u -h ...\" to generate utilities for this host" locat=interpret_config; exit fi interpret_config_executable=`echo $interpret_config_executable | cut -d" " -f1` $interpret_config_executable else # CHECK, IF THERE IS AN EXECUTABLE FOR THE BLOCK if [[ ! -f ${PALM_BIN}/interpret_config${block}.x ]] then printf "\n\n +++ no interpret_config found for given block \"$cond1 $cond2\"" printf "\n run \"mbuild -u -h ...\" to generate utilities for this block" locat=interpret_config; exit else interpret_config${block}.x fi fi rm .mrun_environment # EXECUTE SHELL-COMMANDS GENERATED BY interpret_config WITHIN THIS SHELL chmod u+x $interpreted_config_file export PATH=$PATH:. . $interpreted_config_file rm $interpreted_config_file fi # VALUES OF MRUN-OPTIONS OVERWRITE THOSE FROM THE CONFIGURATION-FILE [[ $mrun_memory != 0 ]] && memory=$mrun_memory [[ "$mrun_group_number" != "none" ]] && group_number=$mrun_group_number [[ $mrun_cpumax != 0 ]] && cpumax=$mrun_cpumax [[ "$mrun_numprocs" != "" ]] && numprocs=$mrun_numprocs [[ "$max_par_io_str" != "" ]] && maximum_parallel_io_streams=$max_par_io_str [[ "$mrun_tasks_per_node" != "" ]] && tasks_per_node=$mrun_tasks_per_node # ON THE LOCAL MACHINE, DETERMINE (FROM THE CONFIGURATION-FILE) THE PATH # FOR SOURCE-FILES TO BE COMPILED. # IN A BATCH-JOB, SOURCES (ROUTINES TO BE COMPILED) ARE COMPLETE ALREADY. # BEFORE MRUN IS CALLED IN THE JOB, SOURCES_COMPLETE = true IS ASSIGNED (SEE FURTHER BELOW). if [[ "$SOURCES_COMPLETE" = "" ]] then # FIRST CHECK, IF A GLOBAL SOURCE PATH (TO BE USED FOR ALL HOSTS) # HAS BEEN DEFINED source_path="" line="" grep "%source_path" $config_file > tmp_mrun while read line do if [[ "$line" != "" && $(echo $line | cut -c1) != "#" ]] then if [[ "$(echo $line | cut -d" " -f3)" = "" ]] then global_source_path=`echo $line | cut -d" " -f2` fi fi done < tmp_mrun line="" found=false grep " $localhost" $config_file | grep "%source_path" > tmp_mrun while read line do if [[ "$line" != "" && $(echo $line | cut -c1) != "#" ]] then if [[ $found = true ]] then printf "\n\n +++ more than one source path found in configuration file" printf "\n for local host \"$localhost\" " locat=source_path; exit fi source_path=`echo $line | cut -d" " -f2` found=true fi done < tmp_mrun rm tmp_mrun if [[ "$source_path" = "" ]] then if [[ "$global_source_path" != "" ]] then source_path=$global_source_path else printf "\n\n +++ no source path found in configuration file" printf "\n for local host \"$localhost\" " locat=source_path; exit fi fi eval source_path=$source_path if [[ ! -d $source_path ]] then printf "\n\n +++ source path \"$source_path\" on local host" printf "\n \"$localhost\" does not exist" locat=source_path; exit fi fi # GET THE GLOBAL REVISION-NUMBER OF THE SVN-REPOSITORY # (HANDED OVER TO RESTART-RUNS USING OPTION -G) if [[ "$global_revision" = "" && $host != "ibmkisti" ]] then global_revision=`svnversion $source_path 2>/dev/null` global_revision="Rev: $global_revision" fi # ??? das darf doch eigentlich garnicht passieren, weil optionen config-datei uebersteuern ??? # CHECK AGAIN, IF EXECUTION SHALL BE DONE ON A REMOTE-HOST (BATCH-JOB). # (HOST MAY HAVE CHANGED DUE TO SETTINGS IN THE CONFIGURATION-FILE) if [[ -n $host && "$host" != $localhost ]] then do_batch=true do_remote=true case $host in (ibm|ibmh|ibmkisti|ibmku|ibms|lcbullhh|lccrayb|lccrayh|lccrayf|lceddy|lckyoto|lcocean|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb|lcgeohu) true;; (*) printf "\n" printf "\n +++ sorry: execution of batch jobs on remote host \"$host\"" printf "\n is not available" locat=nqs; (( iec = 0 )); exit;; esac else host=$localhost fi # IN CASE OF PARALLEL EXECUTION, CHECK SOME SPECIFICATIONS CONCERNING PROCESSOR NUMBERS if [[ "$cond1" = parallel || "$cond2" = parallel ]] then # CHECK, IF NUMBER OF CORES TO BE USED HAS BEEN GIVEN if [[ ! -n $numprocs ]] then printf "\n" printf "\n +++ option \"-K parallel\" requires additional specification" printf "\n of the number of processors to be used by" printf "\n mrun-option \"-X\" or by environment-variable" printf "\n \"numprocs\" in the configuration file" locat=numprocs; (( iec = 0 )); exit fi # CHECK, IF THE NUMBER OF CORES PER NODE HAS BEEN GIVEN UND IF IT IS AN # INTEGRAL DIVISOR OF THE TOTAL NUMBER OF CORES GIVEN BY OPTION -X if [[ "$tasks_per_node" = "" && $host != lcxt5m ]] then printf "\n" printf "\n +++ option \"-T\" (tasks per node) is missing" printf "\n set -T option or define tasks_per_node in the config file" locat=tasks_per_node; (( iec = 0 )); exit fi if (( numprocs < tasks_per_node )) then printf "\n" printf "\n +++ tasks per node (-T) cannot exceed total number of processors (-X)" printf "\n given values: -T $tasks_per_node -X $numprocs" locat=tasks_per_node; (( iec = 0 )); exit fi if [[ $host != lcxt5m ]] then (( nodes = numprocs / ( tasks_per_node * threads_per_task ) )) (( ival = $tasks_per_node )) (( pes = numprocs )) (( ii = pes / ival )) (( remaining_pes = pes - ii * ival )) if (( remaining_pes > 0 )) then printf "\n" printf "\n +++ WARNING: tasks per node (option \"-T\") is not an integral" printf "\n divisor of the total number of processors (option \"-X\")" printf "\n values of this mrun-call: \"-T $tasks_per_node\" \"-X $numprocs\"" printf "\n One of the nodes is filled with $remaining_pes instead of $tasks_per_node tasks" (( nodes = nodes + 1 )) fi fi # SETTINGS FOR SUBJOB-COMMAND OOPT="-O $threads_per_task" # SET THE TOTAL NUMBER OF NODES, REQUIRED FOR THE SUBJOB-COMMAND (SEE FURTHER BELOW) if [[ "$tasks_per_node" != "" ]] then TOPT="-T $tasks_per_node" fi # CHECK node_usage FOR ALLOWED VALUES AND SET DEFAULT VALUE, IF NECESSARY if [[ $node_usage = default ]] then if [[ $host = ibms ]] then node_usage=shared else node_usage=not_shared fi fi if [[ $node_usage != shared && $node_usage != not_shared && $node_usage != singlejob && "$(echo $node_usage | cut -c1-3)" != "sla" ]] then printf "\n" printf "\n +++ node usage (option \"-n\") is only allowed to be set" printf "\n \"shared\" or \"not_shared\"" locat=tasks_per_node; (( iec = 0 )); exit fi fi # CHECK IF HOSTFILE EXISTS if [[ -n $hostfile ]] then if [[ ! -f $hostfile ]] then printf "\n" printf "\n +++ hostfile \"$hostfile\" does not exist" locat=hostfile; exit fi fi # SET DEFAULT VALUE FOR THE MAXIMUM NUMBER OF PARALLEL IO STREAMS if [[ "$maximum_parallel_io_streams" = "" ]] then maximum_parallel_io_streams=$numprocs fi # SET PORT NUMBER OPTION FOR CALLS OF SSH/SCP, subjob AND batch_scp SCRIPTS if [[ "$scp_port" != "" ]] then PORTOPT="-P $scp_port" SSH_PORTOPT="-p $scp_port" fi # DETERMINE THE SSH-OPTION IN CASE THAT AN SSH-KEY IS EXPLICITLY GIVEN IN THE # CONFIG-FILE if [[ "$ssh_key" != "" ]] then ssh_key="-i $HOME/.ssh/$ssh_key" fi # SET DEFAULT-QUEUE, IF NOT GIVEN if [[ $queue = none ]] then case $host in (ibmh) queue=cluster;; (ibmkisti) queue=class.32plus;; (ibmmuc*) queue=test;; (lcbwuni) queue=develop;; (lcbullhh) queue=compute;; (lccrayb) queue=mpp1q;; (lccrayh) queue=mpp1q;; (lcgeohu) queue=short;; (lckiaps) queue=normal;; (lckyoto) queue=ph;; (lckyuh) queue=fx-single;; (lckyut) queue=cx-single;; (lclrz) queue=mpp2;; (lctit) queue=S;; (unics) queue=unics;; esac fi # GENERATE FULL FILENAMES OF INPUT-FILES, INCLUDING THEIR PATH # CHECK, IF INPUT-FILES EXIST, AND DETERMINE HIGHEST CYCLE NUMBER (IF CYCLES EXIST) (( i = 0 )) while (( i < iin )) do (( i = i + 1 )) (( maxcycle = 0 )) # GENERATE PATH AND FULL FILE NAME (then-BRANCH: FIXED FULL NAME IS GIVEN, I.E. THE # FILE IDENTIFIER IS NOT PART OF THE FILENAME)) if [[ "${actionin[$i]}" = di ]] then remotepathin[$i]=${pathin[$i]}/${endin[$i]} # EVALUATE REMOTE-PATH ON THE REMOTE # HOST ONLY eval filename=${pathin[$i]}/${endin[$i]} else remotepathin[$i]=${pathin[$i]}/${afname}${endin[$i]} # EVALUATE REMOTE-PATH ON THE REMOTE # HOST ONLY eval filename=${pathin[$i]}/${afname}${endin[$i]} fi eval pathname=${pathin[$i]} # CHECK IF FILE EXISTS if ! ls $filename* 1>/dev/null 2>&1 then # FILES WITH ATTRIBUTE locopt ARE OPTIONAL. NO ABORT, IF THEY DO NOT EXIST. if [[ "${transin[$i]}" != "locopt" ]] then printf "\n\n +++ INPUT-file: " if [[ "${extin[$i]}" = "" || "${extin[$i]}" = " " ]] then printf "\n $filename" else printf "\n $filename.${extin[$i]}" fi printf "\n does not exist\n" locat=input; exit else transin[$i]="unavailable" fi else # DETERMINE THE FILE'S CYCLE NUMBER ls -1 -d $filename > filelist 2>/dev/null ls -1 -d $filename.* >> filelist 2>/dev/null while read zeile do # filename without path (i.e. after the last "/") basefilename=$(basename ${zeile}) # check if there is an extension extension=${basefilename##*.} if [[ "$extension" = "$extin[$i]" ]] then basefilename=${basefilename&.*} fi # check for an existing cycle number cycle=${basefilename##*.} if [[ $cycle =~ ^-?[0-9]+$ ]] then (( icycle = $cycle )) else (( icycle = 0 )) fi if (( icycle > maxcycle )) then (( maxcycle = icycle )) fi # cycle=$(echo $zeile | cut -f2 -d".") # if [[ "$cycle" = "$zeile" ]] # then # (( icycle = 0 )) # elif [[ "$cycle" = "${extin[$i]}" ]] # then # (( icycle = 0 )) # else # (( icycle = $cycle )) # fi # if (( icycle > maxcycle )) # then # (( maxcycle = icycle )) # fi done 0 )) then if [[ "${extin[$i]}" != " " && "${extin[$i]}" != "" ]] then filename=${filename}.$maxcycle.${extin[$i]} else filename=${filename}.$maxcycle fi else if [[ "${extin[$i]}" != " " && "${extin[$i]}" != "" ]] then filename=${filename}.${extin[$i]} fi fi # STORE FILENAME WITHOUT PATH BUT WITH CYCLE NUMBER, # IS LATER USED FOR TRANSFERRING FILES WIHIN THE JOB (SEE END OF FILE) absnamein[$i]=$filename if (( maxcycle > 0 )) then if [[ "${actionin[$i]}" = di ]] then frelin[$i]=${endin[$i]}.$maxcycle else frelin[$i]=${afname}${endin[$i]}.$maxcycle fi else if [[ "${actionin[$i]}" = di ]] then frelin[$i]=${endin[$i]} else frelin[$i]=${afname}${endin[$i]} fi fi fi done # GENERATE FULL FILENAMES OF OUTPUT-FILES (WITHOUT $ OR ~), # CHECK, IF OUTPUT-FILES EXIST, AND DETERMINE HIGHEST CYCLE NUMBER (IF CYCLES EXIST), # OR, IN CASE THAT FILE DOES NOT EXIST, CHECK, IF IT CAN BE CREATED # THESE ACTIONS ARE NOT CARRIED OUT, IF FILES SHALL BE TRASFERRED FROM THE REMOTE TO # THE LOCAL HOST (BECAUSE THEIR IS NO DIRECT ACCESS TO THE LOCAL DIRECTORIES FROM THE # REMOTE HOST) (( i = 0 )) while (( i < iout )) do (( i = i + 1 )) if [[ ! ( $fromhost != $localhost && ( "${actionout[$i]}" = tr || "${actionout[$i]}" = tra || "${actionout[$i]}" = trpe ) ) ]] then if [[ "${actionout[$i]}" = tr ]] then actionout[$i]="" elif [[ "${actionout[$i]}" = trpe ]] then actionout[$i]=pe elif [[ "${actionout[$i]}" = tra ]] then actionout[$i]=a fi (( maxcycle = 0 )) eval filename=${pathout[$i]}/${fname}${endout[$i]} eval catalogname=${pathout[$i]} if ! ls $filename* 1>/dev/null 2>&1 then # IF OUTPUT-FILE DOES NOT EXIST CHECK, IF IT CAN BE CREATED if cat /dev/null > $filename then rm $filename else # CHECK, IF THE DIRECTORY WHERE FILE SHALL BE COPIED TO EXISTS # IF IT DOES NOT EXIST, TRY TO CREATE IT if [[ ! -d $catalogname ]] then if mkdir -p $catalogname then printf "\n\n *** directory:" printf "\n $catalogname" printf "\n was created\n" else printf "\n\n +++ OUTPUT-file:" printf "\n $filename" printf "\n cannot be created, because directory does not exist" printf "\n and cannot be created either" printf "\n" locat=output ; exit fi 2>/dev/null else printf "\n\n +++ OUTPUT-file:" printf "\n $filename" printf "\n cannot be created, although directory exists" printf "\n" locat=output ; exit fi fi 2>/dev/null else # DETERMINE THE CYCLE NUMBER ls -1 -d $filename > filelist 2>/dev/null ls -1 -d $filename.* >> filelist 2>/dev/null while read zeile do # filename without path (i.e. after the last "/") basefilename=$(basename ${zeile}) # check if there is an extension extension=${basefilename##*.} if [[ "$extension" = "${extout[$i]}" ]] then basefilename=${basefilename%.*} fi # check for an existing cycle number cycle=${basefilename##*.} if [[ $cycle =~ ^-?[0-9]+$ ]] then (( icycle = $cycle + 1 )) else (( icycle = 1 )) fi if (( icycle > maxcycle )) then (( maxcycle = icycle )) fi done 0 )) then filename_tmp=${filename}.$maxcycle if cat /dev/null > $filename_tmp then rm $filename_tmp else printf "\n +++ OUTPUT-file:" printf "\n $filename_tmp" printf "\n cannot be created" locat=output ; exit fi fi else (( maxcycle = maxcycle - 1 )) fi (( cycnum[$i] = maxcycle )) pathout[$i]=$filename fi done # THE DVR-PACKAGE REQUIRES ITS OWN LIBRARY if [[ $(echo $package_list | grep -c dvrp_graphics) != 0 ]] then if [[ "$dvr_inc" = "" ]] then printf "\n\n +++ no value for \"dvr_inc\" given in configuration file" printf "\n This is required for the dvrp_graphics package.\n" locat=dvr; exit fi if [[ "$dvr_lib" = "" ]] then printf "\n\n +++ no value for \"dvr_lib\" given in configuration file" printf "\n This is required for the dvrp_graphics package.\n" locat=dvr; exit fi fi # CHECK, WETHER A MAIN PROGRAM OR AN EXECUTABLE HAVE BEEN DECLARED. # EXECUTABLES DO NOT NEED TO BE COMPILED. if [[ "$mainprog" = "" && "$executable" = "" ]] then printf "\n +++ neither main program nor executable defined" locat=source; exit elif [[ "$mainprog" != "" && "$executable" != "" ]] then printf "\n +++ main program as well as executable defined" locat=source; exit elif [[ "$mainprog" = "" && "$executable" != "" ]] then do_compile=false fi # CREATE SOURCE-DIRECTORY TO COLLECT ROUTINES TO BE COMPILED. # LATER THE MRUN-SCRIPT AND CONFIGURATION-FILE WILL ALSO BE COPIED TO THIS DIRECTORY. if [[ $restart_run != true && "$SOURCES_COMPLETE" = "" ]] then rm -rf SOURCES_FOR_RUN_$fname mkdir SOURCES_FOR_RUN_$fname fi # COLLECT ALL ROUTINES TO BE COMPILED # THIS IS NOT REQUIRED WITHIN BATCH-JOBS, BECAUSE ROUTINES HAVE ALREADY BEEN COLLECTED # BY THE MRUN-CALL WHICH CREATED THE BATCH-JOB. if [[ $do_compile = true && "$SOURCES_COMPLETE" = "" ]] then [[ "$source_list" = LM ]] && source_list=LOCALLY_MODIFIED if [[ "$source_list" = LOCALLY_MODIFIED ]] then # DETERMINE MODIFIED FILES OF THE SVN WORKING COPY source_list="" cd $source_path # CHECK, IF DIRECTORY IS UNDER SVN CONTROL if [[ ! -d .svn ]] then printf "\n\n +++ source directory" printf "\n \"$source_path\" " printf "\n is not under control of \"subversion\"." printf "\n Please do not use mrun-option \"-s LOCALLY_MODIFIED\"\n" fi # LIST ALL MODIFIED SOURCE CODE FILES Filenames="" svn status > tmp_mrun while read line do firstc=`echo $line | cut -c1` if [[ $firstc = M || $firstc = "?" ]] then Name=`echo "$line" | cut -c8-` extension=`echo $Name | cut -d. -f2` if [[ "$extension" = f90 || "$extension" = F90 || "$extension" = f || "$extension" = F || "$extension" = c ]] then Filenames="$Filenames "$Name fi fi done < tmp_mrun # COPY FILES TO SOURCES_FOR_RUN_... for dateiname in $Filenames do cp $dateiname $working_directory/SOURCES_FOR_RUN_$fname source_list=$source_list"$dateiname " done cd - > /dev/null # COPY FILES GIVEN BY OPTION -s TO DIRECTORY SOURCES_FOR_RUN_... # AUTOMATIC RESTART RUNS JUST ACCESS THE DIRECTORY CREATED BY THE INITIAL RUN elif [[ "$source_list" != "" && $restart_run != true ]] then cd $source_path for filename in $source_list do # SOURCE CODE FILE IS NOT ALLOWED TO INCLUDE PATH if [[ $(echo $filename | grep -c "/") != 0 ]] then printf "\n +++ source code file: $filename" printf "\n must not contain (\"/\") " locat=source; exit fi if [[ ! -f $filename ]] then printf "\n +++ source code file: $filename" printf "\n does not exist" locat=source; exit else cp $filename $working_directory/SOURCES_FOR_RUN_$fname fi done cd - > /dev/null fi # CHECK, IF A MAIN PROGRAM EXISTS AND IF IT IS PART OF THE LIST OF FILES # TO BE COMPILED. IF NOT, ADD IT TO THE LIST. if [[ $restart_run != true ]] then if [[ ! -f "$source_path/$mainprog" ]] then printf "\n\n +++ main program: $mainprog" printf "\n does not exist in source directory" printf "\n \"$source_path\"\n" locat=source; exit else if [[ $(echo $source_list | grep -c $mainprog) = 0 ]] then cp $source_path/$mainprog SOURCES_FOR_RUN_$fname source_list=${mainprog}" $source_list" fi fi fi # CHECK, IF MAKEFILE EXISTS AND COPY IT TO THE SOURCES_FOR_RUN... DIRECTORY # NOT REQUIRED FOR RESTART RUNS, SOURCES_FOR_RUN... HAS BEEN CREATED BY THE INITIAL RUN if [[ "$restart_run" != true ]] then [[ "$makefile" = "" ]] && makefile=$source_path/Makefile if [[ ! -f $makefile ]] then printf "\n +++ file \"$makefile\" does not exist" locat=make; exit else cp $makefile SOURCES_FOR_RUN_$fname/Makefile fi fi # COPY FILES FROM OPTIONAL SOURCE PATH GIVEN IN THE CONFIGURATION FILE if [[ $restart_run != true && "$add_source_path" != "" ]] then # DOES THE DIRECTORY EXIST? if [[ ! -d $add_source_path ]] then printf "\n\n *** INFORMATIVE: additional source code directory" printf "\n \"$add_source_path\" " printf "\n does not exist or is not a directory." printf "\n No source code will be used from this directory!\n" add_source_path="" if [[ $silent == false ]] then sleep 3 fi else cd $add_source_path found=false Names=$(ls -1 *.f90 2>&1) [[ $(echo $Names | grep -c '*.f90') = 0 ]] && AddFilenames="$Names" Names=$(ls -1 *.F90 2>&1) [[ $(echo $Names | grep -c '*.F90') = 0 ]] && AddFilenames="$AddFilenames $Names" Names=$(ls -1 *.F 2>&1) [[ $(echo $Names | grep -c '*.F') = 0 ]] && AddFilenames="$AddFilenames $Names" Names=$(ls -1 *.f 2>&1) [[ $(echo $Names | grep -c '*.f') = 0 ]] && AddFilenames="$AddFilenames $Names" Names=$(ls -1 *.c 2>&1) [[ $(echo $Names | grep -c '*.c') = 0 ]] && AddFilenames="$AddFilenames $Names" cd - > /dev/null cd SOURCES_FOR_RUN_$fname # COPY MAKEFILE IF EXISTING if [[ -f $add_source_path/Makefile ]] then printf "\n\n *** user Makefile from directory" printf "\n \"$add_source_path\" is used \n" if [[ $silent == false ]] then sleep 1 fi cp $add_source_path/Makefile . fi for dateiname in $AddFilenames do if [[ -f $dateiname ]] then printf "\n +++ source code file \"$dateiname\" found in additional" printf "\n source code directory \"$add_source_path\" " printf "\n but was also given with option \"-s\" which means that it should be taken" printf "\n from directory \"$source_path\"." locat=source; exit fi cp $add_source_path/$dateiname . source_list="$source_list $dateiname" # CHECK IF FILE IS CONTAINED IN MAKEFILE if [[ $(grep -c $dateiname Makefile) = 0 ]] then printf "\n\n +++ user file \"$dateiname\" " printf "\n is not listed in Makefile \n" locat=source; exit else if [[ $found = false ]] then found=true printf "\n\n *** following user file(s) added to the" printf " files to be translated:\n " fi printf "$dateiname " if [[ $silent == false ]] then sleep 0.5 fi fi done [[ $found = true ]] && printf "\n" cd - > /dev/null fi fi # ADD ALL ROUTINES BELONGING TO SOFTWARE PACKAGES (GIVEN BY OPTION -p) # TO THE LIST OF FILES TO BE COMPILED if [[ $restart_run != true && -n $package_list ]] then cd $source_path for package in $package_list do [[ $package = "dvrp_graphics+1PE" ]] && package=dvrp_graphics # DETERMINE FILES BELONGING TO THE PACKAGE # ERROR MESSAGE ARE REDIRECTED TO /dev/null, BECAUSE WILDCARD (*) ALSO GIVES # THE NAME OF THE DIRECTORY package_source_list=`grep "defined( __$package " * 2>/dev/null | cut -f1 -d:` # ADD THESE FILES TO THE LIST OF FILES TO BE COMPILED, # IF THEY ARE NOT ALREADY PART OF THE LIST for source_list_name in $package_source_list do if [[ $(echo $source_list | grep -c $source_list_name) = 0 ]] then # ONLY TAKE FILES WITH VALID SUFFIX ending=`echo $source_list_name | cut -f2 -d.` if [[ "$ending" = f90 || "$ending" = F90 || "$ending" = f || "$ending" = F || "$ending" = c ]] then cp $source_list_name $working_directory/SOURCES_FOR_RUN_$fname source_list="$source_list $source_list_name" fi fi done done cd - > /dev/null fi fi # do_compile=true # IF SOURCE CODE IS TO BE COMPILED, DO SOME MORE CHECKS # AND SET PRE-PROCESSOR DIRECTIVES if [[ $do_compile = true || $create_executable_for_batch = true ]] then # SET PREPROCESSOR-DIRECTIVES TO SELECT OPERATING SYSTEM SPECIFIC CODE if [[ $(echo $localhost | cut -c1-3) = ibm && $localhost != ibmmuc* ]] then cpp_options="${cpp_options},-D__ibm=__ibm" elif [[ $(echo $localhost | cut -c1-3) = nec ]] then cpp_options="$cpp_options -D__nec" elif [[ $(echo $localhost | cut -c1-2) = lc ]] then cpp_options="$cpp_options -D__lc" else cpp_options="$cpp_options -D__$localhost" fi # SET DIRECTIVES GIVEN BY OPTION -K (E.G. parallel) if [[ $(echo $localhost | cut -c1-3) = ibm && $localhost != ibmmuc ]] then [[ -n $cond1 ]] && cpp_options="${cpp_options},-D__$cond1=__$cond1" [[ -n $cond2 ]] && cpp_options="${cpp_options},-D__$cond2=__$cond2" else [[ -n $cond1 ]] && cpp_options="$cpp_options -D__$cond1" [[ -n $cond2 ]] && cpp_options="$cpp_options -D__$cond2" fi # SET DIRECTIVES FOR ACTIVATING SOFTWARE-PACKAGES (OPTION -p) if [[ -n $package_list ]] then for package in $package_list do if [[ $(echo $localhost | cut -c1-3) = ibm ]] then if [[ $package != "dvrp_graphics+1PE" ]] then cpp_options="${cpp_options},-D__$package=__$package" else cpp_options="${cpp_options},-D__dvrp_graphics=__dvrp_graphics" export use_seperate_pe_for_dvrp_output=true fi else if [[ $package != "dvrp_graphics+1PE" ]] then cpp_options="$cpp_options -D__$package" else cpp_options="$cpp_options -D__dvrp_graphics" export use_seperate_pe_for_dvrp_output=true fi fi done fi else # FOR LOCAL RUNS CHECK AGAIN, IF EXECUTABLE EXISTS if [[ $do_remote = false ]] then if [[ ! -f $executable ]] then printf "\n +++ executable file: $executable" printf "\n does not exist" locat=executable; exit fi fi fi # DETERMINE THE JOB MODE if [[ $(echo $localhost | cut -c1-3) = ibm ]] then if [[ "$LOADLBATCH" = yes ]] then batch_job=.TRUE. jobmo=BATCH else batch_job=.FALSE. jobmo=INTERACTIVE fi else if [[ "$ENVIRONMENT" = BATCH || "$PJM_ENVIRONMENT" = BATCH ]] then batch_job=.TRUE. jobmo=BATCH else batch_job=.FALSE. jobmo=INTERACTIVE fi fi # NO INTERACTIVE RUNS ALLOWED ON LCTIT if [[ $host = lctit && $jobmo = INTERACTIVE && $do_batch = false ]] then printf "\n +++ no interactive runs allowed on host \"$host\" " printf "\n please submit batch job using mrun option \"-b\" \n" locat=normal; exit fi # CHECK, IF USER DEFINED A COMPILER if [[ "$compiler_name" = "" ]] then printf "\n +++ no compiler specified for \"$host $cond1 $cond2\"" locat=compiler_name; exit fi # DETERMINE THE NAME OF MRUN'S TEMPORARY WORKING DIRECTORY # ON HLRN-III, USE THE QUEUING NAME. OTHERWISE USE USERNAME AND RANDOM NUMBER if [[ $do_batch = false && $(echo $host | cut -c1-6) = lccray ]] then kennung=$(checkjob $PBS_JOBID | grep Reservation | cut -d" " -s -f2 | cut -d"." -s -f2 | sed "s/['\"]//g") if [[ "$kennung" = "" ]] then kennung=$RANDOM fi else kennung=$RANDOM fi if [[ "$tmp_user_catalog" = "" ]] then if [[ $localhost = ibmh ]] then tmp_user_catalog=$SCRATCH else tmp_user_catalog=/tmp fi fi if [[ $localhost = ibmmuc* ]] then TEMPDIR=$tmp_user_catalog/${USER}.$kennung else TEMPDIR=$tmp_user_catalog/${usern}.$kennung fi # DETERMINE THE NAME OF THE DIRECTORY WHICH IS USED TO TEMPORARILY STORE DATA FOR RESTART RUNS if [[ "$tmp_data_catalog" = "" ]] then tmp_data_catalog=/tmp/mrun_restart_data fi # IN CASE OF LOCAL RUNS REPLACE ENVIRONMENT VARIABLES BY THEIR VALUES if [[ $do_remote = false && $do_compile = true || $create_executable_for_batch = true ]] then eval fopts=\"$fopts\" eval lopts=\"$lopts\" fi # DETERMINE COMPILE- AND LINK-OPTIONS fopts="$fopts $netcdf_inc $fftw_inc $dvr_inc" lopts="$lopts $netcdf_lib $fftw_lib $dvr_lib" XOPT="-X $numprocs" # CHECK THE CPU-LIMIT. IT MUST BE GIVEN FOR BATCH-JOBS AND IS COMMUNICATED TO THE # EXECUTABLE VIA NAMELIST-PARAMETER cputime done=false while [[ $done = false ]] do cputime=$cpumax if (( $cputime == 0 )) then if [[ $do_batch = true ]] then printf "\n +++ cpu-time is undefined" printf "\n >>> Please type CPU-time in seconds as INTEGER:" printf "\n >>> " read cputime 1>/dev/null 2>&1 else cputime=10000000 # NO CPU LIMIT FOR INTERACTIVE RUNS fi else done=true fi cpumax=$cputime done (( minuten = cputime / 60 )) (( sekunden = cputime - minuten * 60 )) # CHECK THE MEMORY DEMAND if [[ $do_batch = true ]] then done=false while [[ $done = false ]] do if (( memory == 0 )) then printf "\n +++ memory demand is undefined" printf "\n >>> Please type memory in MByte per process as INTEGER:" printf "\n >>> " read memory 1>/dev/null 2>&1 else done=true fi done fi # IN CASE OF REMOTE-JOBS CHECK, IF A USERNAME FOR THE REMOTE HOST IS GIVEN if [[ $do_remote = true && -z $remote_username ]] then while [[ -z $remote_username ]] do printf "\n +++ username on remote host \"$host\" is undefined" printf "\n >>> Please type username:" printf "\n >>> " read remote_username done mc="$mc -u$remote_username" fi # CHECK FOR INITIAL COMMANDS AFTER LOGIN if [[ "$login_init_cmd" != "" ]] then export init_cmds="${login_init_cmd};" fi # SET THE MODULE-LOAD COMMAD AND EXPORT IT FOR subjob if [[ "$modules" != "" ]] then if [[ $host = lctit ]] then export module_calls=". $modules" else export module_calls="module load ${modules};" fi fi # OUTPUT OF THE MRUN-HEADER calltime=$(date) printf "\n" printf "#------------------------------------------------------------------------# \n" printf "| %-35s%35s | \n" "$version" "$calltime" printf "| | \n" spalte1="called on:"; spalte2=$localhost_realname printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" if [[ $do_remote = true ]] then spalte1="execution on:"; spalte2="$host (username: $remote_username)" else spalte1="execution on:"; spalte2="$host ($localhost_realname)" fi printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" if [[ -n $numprocs ]] then if [[ $run_coupled_model = false ]] then spalte1="number of PEs:"; spalte2=$numprocs else spalte1="number of PEs:"; spalte2="$numprocs (atmosphere: $numprocs_atmos, ocean: $numprocs_ocean)" fi printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" fi if [[ -n $tasks_per_node ]] then spalte1="tasks per node:"; spalte2="$tasks_per_node (number of nodes: $nodes)" printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" if (( remaining_pes > 0 )) then spalte1=" "; spalte2="one of the nodes only filled with $remaining_pes tasks" printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" fi fi if [[ $maximum_parallel_io_streams != $numprocs ]] then spalte1="max par io streams:"; spalte2="$maximum_parallel_io_streams" printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" fi if [[ $use_openmp = true ]] then spalte1="threads per task:"; spalte2="$threads_per_task" printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" fi printf "| | \n" if [[ $do_compile = true ]] then if [[ "$mopts" != "" ]] then spalte1="make options:"; spalte2=$(echo "$mopts" | cut -c-45) printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" zeile=$(echo "$mopts" | cut -c46-) while [[ "$zeile" != "" ]] do spalte1="" spalte2=$(echo "$zeile" | cut -c-45) printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" zeile=$(echo "$zeile" | cut -c46-) done fi spalte1="cpp directives:"; spalte2=$(echo "$cpp_options" | cut -c-45) printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" zeile=$(echo "$cpp_options" | cut -c46-) while [[ "$zeile" != "" ]] do spalte1="" spalte2=$(echo "$zeile" | cut -c-45) printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" zeile=$(echo "$zeile" | cut -c46-) done spalte1="compiler options:"; spalte2=$(echo "$fopts" | cut -c-45) printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" zeile=$(echo "$fopts" | cut -c46-) while [[ "$zeile" != "" ]] do spalte1="" spalte2=$(echo "$zeile" | cut -c-45) printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" zeile=$(echo "$zeile" | cut -c46-) done spalte1="linker options:"; spalte2=$(echo "$lopts" | cut -c-45) printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" zeile=$(echo "$lopts" | cut -c46-) while [[ "$zeile" != "" ]] do spalte1="" spalte2=$(echo "$zeile" | cut -c-45) printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" zeile=$(echo "$zeile" | cut -c46-) done spalte1="modules to be load:"; spalte2=$(echo "$modules" | cut -c-45) printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" zeile=$(echo "$modules" | cut -c46-) while [[ "$zeile" != "" ]] do spalte1="" spalte2=$(echo "$zeile" | cut -c-45) printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" zeile=$(echo "$zeile" | cut -c46-) done spalte1="main program:"; spalte2=$mainprog printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" else spalte1=executable:; spalte2=$executable printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" fi printf "| | \n" spalte1="base name of files:"; spalte2=$fname printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" if [[ $fname != $afname ]] then spalte1="base name of input files:"; spalte2=$afname printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" fi spalte1="INPUT control list:"; spalte2=$(echo $input_list) printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" spalte1="OUTPUT control list:"; spalte2=$(echo $output_list) printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" if [[ "$ocean_file_appendix" = true ]] then printf "| %-35s%-35s | \n" "suffix \"_O\" is added to local files" " " fi if [[ $do_batch = true || "$LOADLBATCH" = yes ]] then spalte1="memory demand / PE":; spalte2="$memory MB" printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" spalte1=CPU-time:; spalte2="$minuten:$sekunden" printf "| %-25s%-45s | \n" "$spalte1" "$spalte2" fi if [[ $do_compile = true ]] then printf "| | \n" printf "| Files to be compiled: | \n" zeile=$source_list while [[ "$zeile" != "" ]] do linestart=$(echo $zeile | cut -c-70) printf "| %-70s | \n" "$linestart" zeile=$(echo "$zeile" | cut -c71-) done fi printf "#------------------------------------------------------------------------#" # OUTPUT OF FILE CONNECTIONS IN CASE OF TRACEBACK if [[ $do_trace = true ]] then (( i = 0 )) while (( i < iin )) do (( i = i + 1 )) if (( i == 1 )) then printf "\n\n >>> INPUT-file assignments:\n" fi printf "\n ${localin[$i]} : ${absnamein[$i]}" done (( i = 0 )) while (( i < iout )) do (( i = i + 1 )) if (( i == 1 )) then printf "\n\n >>> OUTPUT-file assignments:\n" fi printf "\n ${localout[$i]} : ${pathout[$i]}" done (( i = 0 )) while (( i < iic )) do (( i = i + 1 )) if (( i == 1 )) then printf "\n\n >>> INPUT-commands:\n" fi printf "\n ${in_command[$i]}" done (( i = 0 )) while (( i < ioc )) do (( i = i + 1 )) if (( i == 1 )) then printf "\n\n >>> OUTPUT-commands:\n" fi printf "\n ${out_command[$i]}" done fi # QUERY FOR CONTINUE (ON LOCAL MACHINES ONLY) if [[ $remotecall = false && $silent = false && $jobmo != BATCH ]] then antwort=dummy printf "\n\n" printf " >>> everything o.k. (y/n) ? " while read antwort do if [[ "$antwort" != y && "$antwort" != Y && "$antwort" != n && "$antwort" != N ]] then printf " >>> everything o.k. (y/n) ? " else break fi done if [[ $antwort = n || $antwort = N ]] then locat=user_abort; (( iec = 0 )); exit fi if [[ $do_batch = true ]] then printf " >>> batch-job will be created and submitted" else printf " >>> MRUN will now continue to execute on this machine" fi fi # DETERMINE PATH FOR MAKE DEPOSITORY if [[ $do_batch = false || $create_executable_for_batch = true ]] then line="" grep "%depository_path" $config_file > tmp_mrun while read line do if [[ "$line" != "" && $(echo $line | cut -c1) != "#" ]] then if [[ "$(echo $line | cut -d" " -s -f3)" = "" ]] then global_depository_path=`echo $line | cut -d" " -s -f2` fi fi done < tmp_mrun line="" grep " $localhost" $config_file | grep "%depository_path" > tmp_mrun while read line do if [[ "$line" != "" && $(echo $line | cut -c1) != "#" ]] then if [[ "$(echo $line | cut -d" " -s -f4)" = "$cond1" && "$(echo $line | cut -d" " -s -f5)" = "$cond2" ]] then local_depository_path=`echo $line | cut -d" " -s -f2` fi fi done < tmp_mrun if [[ "$local_depository_path" = "" ]] then if [[ "$global_depository_path" != "" ]] then local_depository_path=$global_depository_path else printf "\n\n +++ no depository path found in configuration file" printf "\n for local host \"$localhost\" " printf "\n please set \"\%depository_path\" in configuration file\n" locat=config_file; exit fi fi eval local_depository_path=$local_depository_path [[ "$cond1" != "" ]] && local_depository_path=${local_depository_path}_$cond1 [[ "$cond2" != "" ]] && local_depository_path=${local_depository_path}_$cond2 basename=`echo $mainprog | cut -f1 -d"."` eval make_depository=${local_depository_path}/${basename}_current_version.tar if [[ ! -f $make_depository ]] then printf "\n" printf "\n *** WARNING: make depository \"$make_depository\" not found" printf "\n \"make\" will fail, if the Makefile or other source files are missing\n" fi fi # NOW PERFORM THOSE ACTIONS REQUIRED TO EXECUTE THE PROGRAM (PALM) ON THIS MACHINE # (COMPILING/LINKING, EXECUTING, COPYING I/O FILES) if [[ $do_batch = false ]] then # CREATE THE TEMPORARY WORKING DIRECTORY mkdir -p $TEMPDIR chmod go+rx $TEMPDIR tmpcreate=true # COPY EITHER THE COMPLETE SOURCE CODE FILES TO BE COMPILED OR THE EXECUTABLE # INTO THE TEMPORARY WORKING DIRECTORY if [[ $do_compile = true ]] then TEMPDIR_COMPILE=$TEMPDIR cp $make_depository $TEMPDIR_COMPILE cd $TEMPDIR_COMPILE tar -xf $make_depository > /dev/null 2>&1 cd - > /dev/null # DUE TO UNKNOWN REASONS, COPY WITH cp COMMAND CREATES CORRUPT # FILES ON CRAY XC30 SYSTEMS (HLRN III), rsync IS USED INSTEAD if [[ $(echo $host | cut -c1-6) = lccray ]] then rsync -av -t SOURCES_FOR_RUN_$fname/* $TEMPDIR_COMPILE > /dev/null else cp SOURCES_FOR_RUN_$fname/* $TEMPDIR_COMPILE > /dev/null fi else cp $executable ${TEMPDIR}/a.out fi # CHANGE TO THE TEMPORARY WORKING DIRECTORY cd $TEMPDIR printf "\n *** changed to temporary directory: $TEMPDIR" # IF REQUIRED, START WITH COMPILING if [[ $do_compile = true ]] then if [[ -f a.out ]] then # EXECUTABLE WAS CREATED DURING INTERACTIVE CALL OF MRUN printf "\n\n\n *** executable a.out found" printf "\n no compilation required \n" else # COMPILING WITH MAKE (ON NEC COMPILER IS CALLED ON HOST CROSS) printf "\n\n\n *** compilation starts \n$dashes\n" printf " *** compilation with make using following options:\n" printf " make depository: $make_depository" if [[ "$mopts" != "" ]] then printf " make options: $mopts\n" fi printf " compilername: $compiler_name\n" printf " compiler options: $fopts\n" printf " preprocessor directives: $cpp_options \n" printf " linker options: $lopts \n" if [[ "$modules" != "" ]] then printf " modules to be load: $modules \n" fi printf " source code files: $source_list \n" if [[ $localhost = ibmh ]] then printf " compiler is called via ssh on \"plogin1\" \n" ssh $SSH_PORTOPT plogin1 -l $usern "$init_cmds export PATH=/sw/ibm/xlf/13.1.0.8/usr/bin:$PATH; $module_calls cd $TEMPDIR; make $mopts -f Makefile PROG=a.out F90=$compiler_name COPT=\"$cpp_options\" F90FLAGS=\"$fopts\" LDFLAGS=\"$lopts\" " [[ ! -f a.out ]] && compile_error=true continue # STATUS=1, IF a.out EXISTS elif [[ $localhost = lcocean ]] then printf " compiler is called via ssh on \"ocean\" \n" echo $PWD ssh $SSH_PORTOPT ocean -l $usern "$init_cmds $module_calls cd $TEMPDIR; make $mopts -f Makefile PROG=a.out F90=$compiler_name COPT=\"$cpp_options\" F90FLAGS=\"$fopts\" LDFLAGS=\"$lopts\" " [[ ! -f a.out ]] && compile_error=true continue # STATUS=1, IF a.out EXISTS elif [[ $localhost = lcbullhh || $localhost = lccrayb || $localhost = lccrayf || $localhost = lccrayh ]] then make $mopts -f Makefile PROG=a.out F90=$compiler_name COPT="$cpp_options" F90FLAGS="$fopts" LDFLAGS="$lopts" elif [[ $localhost = lceddy ]] then which mpifc make $mopts -f Makefile PROG=a.out F90=$compiler_name COPT="$cpp_options" F90FLAGS="$fopts" LDFLAGS="$lopts" elif [[ $localhost = lcgeohu ]] then printf " compiler is called via ssh on \"cirrus\" \n" ssh $SSH_PORTOPT cirrus.geo.hu-berlin.de -l $usern "$init_cmds $module_calls cd $TEMPDIR; make $mopts -f Makefile PROG=a.out F90=$compiler_name COPT=\"$cpp_options\" F90FLAGS=\"$fopts\" LDFLAGS=\"$lopts\" " # this check did not work. Although a.out exists, it sets compile_error to true! # [[ ! -f a.out ]] && compile_error=true continue # STATUS=1, IF a.out EXISTS else [[ "$init_cmds" != "" ]] && eval $init_cmds [[ "$module_calls" != "" ]] && eval $module_calls make $mopts -f Makefile PROG=a.out F90=$compiler_name COPT="$cpp_options" F90FLAGS="$fopts" LDFLAGS="$lopts" fi if [[ $? != 0 || "$compile_error" = true || "$module_compile_error" = true ]] then printf "\n +++ error occured while compiling or linking" locat=compile exit else printf "$dashes\n *** compilation finished \n" fi fi fi # PROVIDE THE INPUT FILES # LOOP OVER ALL ACTIVATED FILES (LISTED IN THE CONFIGURATION FILE) (( i = 0 )) while (( i < iin )) do (( i = i + 1 )) if (( i == 1 )) then printf "\n\n *** providing INPUT-files:\n$dashes" fi # SKIP OPTIONAL FILES, IF THEY DO NOT EXIST if [[ "${transin[$i]}" = unavailable ]] then if [[ "${extin[$i]}" = "" || "${extin[$i]}" = " " ]] then printf "\n *** INFORMATIVE: input file \"${pathin[$i]}/${afname}${endin[$i]}\" " printf "\n is not available!" else printf "\n *** INFORMATIVE: input file \"${pathin[$i]}/${afname}${endin[$i]}.${extin[$i]}\" " printf "\n is not available!" fi continue fi # CHECK FOR SINGLE FILE (SERIAL RUN) OR DIRECTORY (ONE FILE PER CORE FOR PARELLEL EXECUTION) files_for_pes=false; datentyp=file if [[ "${actionin[$i]}" = pe && -n $numprocs ]] then files_for_pes=true; datentyp=files actionin[$i]="" elif [[ "${actionin[$i]}" = pe && ! -n $numprocs ]] then actionin[$i]="" elif [[ "${actionin[$i]}" = lnpe && -n $numprocs ]] then files_for_pes=true; datentyp=files actionin[$i]="ln" elif [[ "${actionin[$i]}" = lnpe && ! -n $numprocs ]] then actionin[$i]="ln" fi if [[ $files_for_pes = true ]] then printf "\n >>> INPUT: ${absnamein[$i]}/.... to ${localin[$i]}" else printf "\n >>> INPUT: ${absnamein[$i]} to ${localin[$i]}" fi # INPUT-FILES TO BE LINKED if [[ "${actionin[$i]}" = ln ]] then printf "\n $datentyp will be linked" if [[ $files_for_pes = false ]] then if [[ -f "${absnamein[$i]}" ]] then ln ${absnamein[$i]} ${localin[$i]} got_tmp[$i]=true fi else if [[ -d "${absnamein[$i]}" ]] then mkdir ${localin[$i]} cd ${absnamein[$i]} for file in $(ls *) do ln $file $TEMPDIR/${localin[$i]} done >|/dev/null 2>&1 cd $TEMPDIR fi # IF "ln -f" HAS FAILED DO A NORMAL COPY "cp -r" if [[ ! -f "${localin[$i]}/_000000" ]] then printf "\n --- WARNING: ln failed, using cp instead (might be time consuming...)" cp -r ${absnamein[$i]}/* ${localin[$i]} fi got_tmp[$i]=true fi fi # FILE IS STORED IN THE RESPECTIVE DIRECTORY GIVEN IN THE CONFIGURATION FILE if [[ "${actionin[$i]}" = "" || "${actionin[$i]}" = "di" || "${actionin[$i]}" = "npe" ]] then if [[ "${actionin[$i]}" = "npe" && -n $numprocs ]] then # FILE COPIES ARE PROVIDED FOR ALL CORES # EACH FILE GETS A UNIQUE FILENAME WITH A FOUR DIGIT NUMBER printf "\n file will be provided for $numprocs processors" mkdir ${localin[$i]} ival=$numprocs (( ii = 0 )) while (( ii <= ival-1 )) do if (( ii < 10 )) then cp ${absnamein[$i]} ${localin[$i]}/_000$ii elif (( ii < 100 )) then cp ${absnamein[$i]} ${localin[$i]}/_00$ii elif (( ii < 1000 )) then cp ${absnamein[$i]} ${localin[$i]}/_0$ii else cp ${absnamein[$i]} ${localin[$i]}/_$ii fi (( ii = ii + 1 )) done else if [[ $files_for_pes = true ]] then # PROVIDE FILES FOR EACH CORE # FIRST CREATE THE LOCAL DIRECTORY, THEN COPY FILES # FROM THE PERMANENT DIRECTORY BY LINKING THEM TO THE LOCAL ONE printf "\n providing $numprocs files for the respective processors" mkdir ${localin[$i]} if [[ $link_local_input = true ]] then printf " files will be linked\n" cd ${absnamein[$i]} for file in $(ls *) do ln -f $file ${localin[$i]} done cd $TEMPDIR fi # IF "ln -f" FAILED OR IF "$link_local_input = false" DO A NORMAL "cp -r" if [[ ! -f "${localin[$i]}/_000000" ]] then if [[ $link_local_input = true ]] then printf "\n --- WARNING: ln failed, using cp instead (might be time consuming...)" fi cp -r ${absnamein[$i]}/* ${localin[$i]} fi else # PROVIDE FILE FOR RUNS ON A SINGLE CORE if [[ $link_local_input = true ]] then printf " file will be linked\n" ln -f ${absnamein[$i]} ${localin[$i]} fi # If "ln -f" fails of if "$link_local_input = false" do a normal "cp" if [[ ! -f "${localin[$i]}" ]] then if [[ $link_local_input = true ]] then printf "\n --- WARNING: ln failed, using cp instead (might be time consuming...)" fi cp ${absnamein[$i]} ${localin[$i]} fi fi fi fi done if (( i != 0 )) then printf "\n$dashes\n *** all INPUT-files provided \n" fi # EXECUTE INPUT-COMMANDS GIVEN IN THE CONFIGURATION FILE (( i = 0 )) while (( i < iic )) do (( i = i + 1 )) if (( i == 1 )) then printf "\n\n *** execution of INPUT-commands:\n$dashes" fi printf "\n >>> ${in_command[$i]}" eval ${in_command[$i]} if (( i == iic )) then printf "\n$dashes\n" fi done # SET THE REMAINING CPU-TIME cpurest=${cpumax}. # START DVR STREAMING SERVER if [[ $(echo $package_list | grep -c dvrp_graphics) != 0 ]] then if [[ "$dvr_server" != "" ]] then printf "\n\n *** preparing the dvr streaming server configuration file" # CHECK, IF A DVR SERVER IS ALREADY RUNNING running_dvrserver_id=`echo $(ps -edaf | grep .dvrserver.config | grep -v grep) | cut -d" " -f2` if [[ "$running_dvrserver_id" != "" ]] then printf "\n\n +++ WARNING: A dvr server with id=$running_dvrserver_id is already running!" printf "\n This server is used instead starting a new one!" printf "\n If required, script \"process_dvr_output\" has to be run manually." else # COPY CONFIGURATION FILE FOR STREAMING SERVER FROM REPOSITORY TO HERE if [[ -f ${PALM_BIN}/.dvrserver.config ]] then cp ${PALM_BIN}/.dvrserver.config . # ENTERING THE BASEDIR, UID AND GID INTO THIS FILE user_id=`id -u` group_id=`id -g` # "&" IS REQUIRED AS A SEPERATOR, BECAUSE TEMPDIR CONTAINS "/" sed "s&&${TEMPDIR}&g" .dvrserver.config > .dvrserver.1 sed "s//$user_id/g" .dvrserver.1 > .dvrserver.2 sed "s//$group_id/g" .dvrserver.2 > .dvrserver.3 mv .dvrserver.3 .dvrserver.config rm .dvrserver.1 .dvrserver.2 # START DVR SERVER IN BACKGROUND, GET HIS ID AND PRINT ON TERMINAL $dvr_server .dvrserver.config >> DVR_LOGFILE 2>&1 & dvrserver_id=`echo $(ps -edaf | grep .dvrserver.config) | cut -d" " -f2` printf "\n *** streaming server with id=$dvrserver_id is started in background" local_dvrserver_running=.TRUE. else printf "\n +++ missing file \".dvrserver.config\" in directory:" printf "\n \"$PALM_BIN\" " locat=dvr exit fi fi else printf "\n\n --- INFORMATIVE: no dvr streaming server will be started" fi fi # CREATE THE NAMELIST-FILE WITH VALUES OF ENVIRONMENT-VARIABLES REQUIRED BY PALM # (FILE ENVPAR WILL BE READ BY PALM) cat > ENVPAR << EOF &envpar run_identifier = '$fname', host = '$localhost', write_binary = .${write_binary}., tasks_per_node = $tasks_per_node, maximum_parallel_io_streams = $maximum_parallel_io_streams, maximum_cpu_time_allowed = ${cpumax}., revision = '$global_revision', local_dvrserver_running = $local_dvrserver_running, batch_job = $batch_job / EOF # STARTING THE EXECUTABLE printf "\n\n *** execution starts in directory\n \"`pwd`\"\n$dashes\n" PATH=$PATH:$TEMPDIR if [[ $execute_command != "none" ]] then printf "\n +++ branch still not realized" locat=execution exit else # MPI DEBUG OPTION (ARGUMENT CHECKING, SLOWS DOWN EXECUTION DUE TO INCREASED LATENCY) if [[ "$mpi_debug" = true ]] then export MPI_CHECK_ARGS=1 printf "\n +++ MPI_CHECK_ARGS=$MPI_CHECK_ARGS" fi if [[ "$totalview" = true ]] then printf "\n *** totalview debugger will be used" tv_opt="-tv" else tv_opt="" fi if [[ "$cond1" = debug || "$cond2" = debug ]] then #Interactive ALLINEA DEBUG seesion if [[ "$ENVIRONMENT" != BATCH ]] then if [[ $host = lccrayb || $host = lccrayh ]] then if [[ "$allinea" = true ]] then echo "--- aprun -n $ii -N $tasks_per_node a.out < runfile_atmos" ddt aprun -n $ii -N $tasks_per_node a.out wait fi fi fi if [[ $localhost = ibmh ]] then # SETUP THE IBM MPI ENVIRONMENT export MP_SHARED_MEMORY=yes export AIXTHREADS_SCOPE=S export OMP_NUM_THREADS=$threads_per_task export AUTHSTATE=files export XLFRTEOPTS="nlwidth=132:err_recovery=no" # RECORD-LENGTH OF NAMELIST-OUTPUT # FOLLOWING OPTIONS ARE MANDATORY FOR TOTALVIEW export MP_ADAPTER_USE=shared export MP_CPU_USE=multiple export MP_TIMEOUT=1200 unset MP_TASK_AFFINITY if [[ "$LOADLBATCH" = yes ]] then totalview poe a.out else echo totalview poe -a a.out -procs $numprocs -rmpool 0 -nodes 1 export TVDSVRLAUNCHCMD=ssh totalview poe -a a.out -procs $numprocs -rmpool 0 -nodes 1 fi fi # END DEBUG MODE else # NORMAL EXECUTION if [[ -n $numprocs ]] then # RUNNING THE PROGRAM ON PARALLEL MACHINES if [[ $(echo $host | cut -c1-3) = ibm ]] then # SETUP THE IBM MPI ENVIRONMENT if [[ $host != ibmh && $host != ibmkisti ]] then export MP_SHARED_MEMORY=yes export AIXTHREAD_SCOPE=S export OMP_NUM_THREADS=$threads_per_task export XLSMPOPTS="spins=0:yields=0:stack=20000000" export AUTHSTATE=files export XLFRTEOPTS="nlwidth=132:err_recovery=no" # RECORD-LENGTH OF NAMELIST-OUTPUT # export MP_PRINTENV=yes # TUNING-VARIABLES TO IMPROVE COMMUNICATION SPEED # DO NOT SHOW SIGNIFICANT EFFECTS (SEP 04, FEDERATION-SWITCH) export MP_WAIT_MODE=poll [[ $node_usage = not_shared ]] && export MP_SINGLE_THREAD=yes fi if [[ $host = ibmkisti ]] then export LANG=en_US export MP_SHARED_MEMORY=yes if [[ $threads_per_task = 1 ]] then export MP_SINGLE_THREAD=yes export MEMORY_AFFINITY=MCM else export OMP_NUM_THREADS=$threads_per_task fi fi if [[ "$LOADLBATCH" = yes ]] then printf "\n--- Control: OMP_NUM_THREADS = \"$OMP_NUM_THREADS\" \n" if [[ "$cond1" = hpmcount || "$cond2" = hpmcount ]] then /opt/optibm/HPM_2_4_1/bin/hpmcount a.out elif [[ $localhost = ibmmuc* ]] then ulimit -c unlimited # only for debgingg echo $MP_NODES > ~/job_queue/hostfile.$kennung echo $MP_PROCS >> ~/job_queue/hostfile.$kennung cat $LOADL_HOSTFILE >> ~/job_queue/hostfile.$kennung export MP_NODES=$nodes export MP_PROCS=$numprocs # export MPI_SINGLE_THREAD=no # LRZ NetCDF # export MP_TASKS_PER_NODE=$tasks_per_node echo "Resource Info: " echo "numprocs: " $numprocs " MP_PROCS " $MP_PROCS echo "nodes: " $nodes " MP_NODES " $MP_NODES echo "tasks_per_node: " $tasks_per_node echo "threads_per_task: " $threads_per_task export OMP_NUM_THREADS=1 source /lrz/sys/share/modules/init/bash module li echo "runfile_atmos" if [[ $run_vnested_model = true ]] then printf "\n Nested run ($numprocs_crse Coarse, $numprocs_fine Fine)" printf "\n using $nested_mode nesting" printf "\n\n" echo "$vnested_mode $numprocs_crse $numprocs_fine" > runfile_atmos poe ./a.out < runfile_atmos # mpiexec -n $numprocs ./a.out < runfile_atmos else echo "precursor_atmos" > runfile_atmos poe ./a.out -proc $numprocs -nodes $nodes < runfile_atmos # mpiexec -n $numprocs ./a.out < runfile_atmos fi else if [[ $run_coupled_model = false ]] then if [[ "$ocean_file_appendix" = true ]] then echo "precursor_ocean" > runfile_atmos else echo "precursor_atmos" > runfile_atmos fi else (( iia = $numprocs_atmos / $threads_per_task )) (( iio = $numprocs_ocean / $threads_per_task )) printf "\n coupled run ($iia atmosphere, $iio ocean)" printf "\n\n" echo "coupled_run $iia $iio" > runfile_atmos fi poe ./a.out < runfile_atmos fi else if [[ $localhost = ibmh || $localhost = ibms ]] then poe a.out -procs $numprocs -nodes 1 -rmpool 0 elif [[ $localhost = ibmkisti || $localhost = ibmku ]] then if [[ -f $hostfile ]] then cp $hostfile hostfile else (( ii = 1 )) while (( ii <= $numprocs )) do echo $localhost_realname >> hostfile (( ii = ii + 1 )) done fi export MP_HOSTFILE=hostfile if [[ $run_coupled_model = false ]] then if [[ "$ocean_file_appendix" = true ]] then echo "precursor_ocean" > runfile_atmos else echo "precursor_atmos" > runfile_atmos fi else (( iia = $numprocs_atmos / $threads_per_task )) (( iio = $numprocs_ocean / $threads_per_task )) printf "\n coupled run ($iia atmosphere, $iio ocean)" printf "\n\n" echo "coupled_run $iia $iio" > runfile_atmos fi poe ./a.out -procs $numprocs < runfile_atmos else if [[ "$host_file" = "" ]] then printf "\n +++ no hostfile given in configuration file" locat=config_file exit else eval host_file=$host_file fi export MP_HOSTFILE=$host_file poe a.out -procs $numprocs -tasks_per_node $numprocs fi fi elif [[ $(echo $host | cut -c1-2) = lc && $host != lckyoto && $host != lctit ]] then # COPY HOSTFILE FROM SOURCE DIRECTORY OR CREATE IT, IF IT # DOES NOT EXIST if [[ $host != lcbullhh && $host != lccrayb && $host != lccrayf && $host != lccrayh && $host != lckyuh && $host != lckyut && $host != lcocean && $host != lceddy ]] then if [[ -f $hostfile ]] then cp $hostfile hostfile (( ii = $numprocs / $threads_per_task )) [[ $ii = 0 ]] && (( ii = 1 )) else (( ii = 1 )) while (( ii <= $numprocs / $threads_per_task )) do echo $localhost_realname >> hostfile (( ii = ii + 1 )) done if (( $numprocs / $threads_per_task == 0 )) then echo $localhost_realname >> hostfile fi fi eval zeile=\"`head -n $ii hostfile`\" printf "\n *** running on: $zeile" fi (( ii = $numprocs / $threads_per_task )) [[ $ii = 0 ]] && (( ii = 1 )) export OMP_NUM_THREADS=$threads_per_task if [[ $threads_per_task != 1 ]] then # INCREASE STACK SIZE TO UNLIMITED, BECAUSE OTHERWISE LARGE RUNS # MAY ABORT ulimit -s unlimited printf "\n threads per task: $threads_per_task stacksize: unlimited" fi if [[ $run_coupled_model = false && $run_vnested_model = false ]] then if [[ "$ocean_file_appendix" = true ]] then echo "precursor_ocean" > runfile_atmos else echo "precursor_atmos" > runfile_atmos fi printf "\n\n" if [[ $host = lccrayb || $host = lccrayh ]] then echo "--- aprun -n $ii -N $tasks_per_node a.out < runfile_atmos" aprun -n $ii -N $tasks_per_node a.out < runfile_atmos elif [[ $host = lcbullhh ]] then export OMPI_MCA_pml=cm export OMPI_MCA_mtl=mxm export OMPI_MCA_coll=^ghc export OMPI_MCA_mtl_mxm_np=0 export MXM_RDMA_PORTS=mlx5_0:1 export MXM_LOG_LEVEL=ERROR export OMP_NUM_THREADS=$threads_per_task export KMP_AFFINITY=verbose,granularity=core,compact,1 export KMP_STACKSIZE=64m srun -n $ii --ntasks-per-node=$tasks_per_node ./a.out < runfile_atmos elif [[ $host = lccrayf ]] then aprun -j1 -n $ii -N $tasks_per_node -m ${memory}M a.out < runfile_atmos elif [[ $host = lcxe6 || $host = lcxt5m ]] then aprun -n $ii -N $tasks_per_node a.out < runfile_atmos elif [[ $host = lceddy ]] then echo $ii echo $tasks_per_node echo $nodes mpirun -n $ii a.out < runfile_atmos elif [[ $host = lcgeohu ]] then srun -n $ii a.out < runfile_atmos elif [[ $host = lcocean ]] then mpirun a.out $ROPTS < runfile_atmos elif [[ $host = lcsb ]] then mpirun_rsh -hostfile $PBS_NODEFILE -np `cat $PBS_NODEFILE | wc -l` a.out < runfile_atmos elif [[ $host = lclrz || $host = lcbwuni ]] then mpiexec -n $ii a.out < runfile_atmos $ROPTeS elif [[ $host = lckea* ]] then srun -n $ii a.out < runfile_atmos $ROPTeS elif [[ $host = lckiaps ]] then mpirun -np $ii -machinefile $PBS_NODEFILE ./a.out < runfile_atmos elif [[ $host = lckyu* ]] then mpiexec -n $ii --stdin runfile_atmos ./a.out else mpiexec -machinefile hostfile -n $ii a.out < runfile_atmos fi elif [[ $run_coupled_model = true ]] then # COUPLED RUN (( iia = $numprocs_atmos / $threads_per_task )) (( iio = $numprocs_ocean / $threads_per_task )) printf "\n coupled run ($iia atmosphere, $iio ocean)" printf "\n\n" echo "coupled_run $iia $iio" > runfile_atmos if [[ $host = lccrayf || $host = lcxe6 || $host = lcxt5m ]] then aprun -n $ii -N $tasks_per_node a.out < runfile_atmos elif [[ $host = lck || $host = lckordi ]] then mpiexec -n $ii ./a.out < runfile_atmos & elif [[ $host = lckyu* ]] then mpiexec -n $ii --stdin runfile_atmos ./a.out elif [[ $host = lcmuk ]] then mpiexec -machinefile hostfile -n $ii a.out < runfile_atmos fi wait elif [[ $run_vnested_model = true ]] then printf "\n Vertical Nested run ($numprocs_crse Coarse, $numprocs_fine Fine)" printf "\n using $vnested_mode vnesting" printf "\n\n" echo "$vnested_mode $numprocs_crse $numprocs_fine" > runfile_atmos if [[ $host = lcbwuni || $host = lclrz* ]] then mpiexec -n $ii a.out < runfile_atmos $ROPTeS elif [[ $host = lckea* ]] then srun -n $ii a.out < runfile_atmos $ROPTeS else mpirun -np $numprocs ./a.out $ROPTS < runfile_atmos fi wait fi elif [[ $host = lckyoto ]] then set -xv export P4_RSHCOMMAND=plesh echo " P4_RSHCOMMAND = $P4_RSHCOMMAND" if [[ "$ENVIRONMENT" = BATCH ]] then if [[ "$cond2" = fujitsu ]] then mpiexec -n $numprocs ./a.out # for fujitsu-compiler elif [[ "cond2" = pgi ]] then mpirun -np $numprocs -machinefile ${QSUB_NODEINF} ./a.out else mpirun_rsh -np $numprocs -hostfile ${QSUB_NODEINF} MV2_USE_SRQ=0 ./a.out || /bin/true fi else if [[ "$cond2" = "" ]] then mpiruni_rsh -np $numprocs ./a.out # for intel else mpirun -np $numprocs ./a.out fi fi set +xv elif [[ $host = lctit ]] then export OMP_NUM_THREADS=$threads_per_task echo "OMP_NUM_THREADS=$OMP_NUM_THREADS" if [[ "$threads_per_task" != 1 ]] then export MV2_ENABLE_AFFINITY=0 fi echo "----- PBS_NODEFILE content:" cat $PBS_NODEFILE echo "-----" (( ii = $numprocs / $threads_per_task )) echo "mpirun -np $ii -hostfile $PBS_NODEFILE ./a.out" mpirun -np $ii -hostfile $PBS_NODEFILE ./a.out else mpprun -n $numprocs a.out fi else a.out fi fi # end normal (non-debug) execution fi # end explicit execute_command or host-specific execute actions if [[ $? != 0 ]] then # ABORT IN CASE OF RUNTIME ERRORS printf "\n +++ runtime error occured" locat=execution exit else printf "\n$dashes\n *** execution finished \n" # STOP THE DVR STREAMING SERVER AND PROCESS THE DVR OUTPUT IN ORDER # TO CREAT DVRS- AND HTML-FILES CONTAINING ALL STREAMS if [[ "$dvrserver_id" != "" ]] then kill $dvrserver_id printf "\n *** dvr server with id=$dvrserver_id has been stopped" # IF THERE IS A DIRECTORY, DATA HAVE BEEN OUTPUT BY THE # STREAMING SERVER. OTHERWISE, USER HAS CHOSEN dvrp_output=local if [[ -d DATA_DVR ]] then # ADD THE CURRENT DVR CONFIGURATION FILE TO THE DVR OUTPUT # DIRECTORY cp .dvrserver.config DATA_DVR # PROCESS THE DVR OUTPUT (OPTION -s FOR GENERATING # SEQUENCE MODE DATA TOO) process_dvr_output -d DATA_DVR -f $fname -s else # PROCESS THE LOCAL OUTPUT process_dvr_output -l -d DATA_DVR -f $fname fi elif [[ $(echo $package_list | grep -c dvrp_graphics) != 0 ]] then # PROCESS DVR OUTPUT GENERATD IN LOCAL MODE (dvrp_output=local) process_dvr_output -l -d DATA_DVR -f $fname fi fi # CALL OF combine_plot_fields IN ORDER TO MERGE SINGLE FILES WRITTEN # BY EACH CORE INTO ONE FILE if [[ ! -f ${PALM_BIN}/combine_plot_fields${block}.x ]] then printf "\n\n\n +++ WARNING: no combine_plot_fields found for given block \"$cond1 $cond2\"" printf "\n 2d- and/or 3d-data may be incomplete!" printf "\n Run \"mbuild -u -h $localhost\" to generate utilities for this block.\n" elif [[ "$combine_plot_fields" == true ]] then if [[ $localhost = lccrayh || $localhost = lccrayb ]] then printf "\n\n\n *** post-processing: now executing \"aprun -n 1 -N 1 combine_plot_fields${block}.x\" ..." aprun -n 1 -N 1 combine_plot_fields${block}.x else printf "\n\n\n *** post-processing: now executing \"combine_plot_fields${block}.x\" ..." combine_plot_fields${block}.x fi else # TEMPORARY SOLUTION TO SKIP combine_plot_fields. THIS IS REQUIRED IN CASE OF HUGE AMOUNT OF # DATA OUTPUT. TO DO: EXTEND THIS BRANCH BY CREATING A BATCH JOB for combine_plot_fields. # ??? koennen wir das streichen ??? printf "\n\n\n *** post-processing: skipping combine_plot_fields (-Z option set) ..." fi # EXECUTE OUTPUT-COMMANDS GIVEN IN THE CONFIGURATION FILE (( i = 0 )) while (( i < ioc )) do (( i = i + 1 )) if (( i == 1 )) then printf "\n\n *** execution of OUTPUT-commands:\n$dashes" fi printf "\n >>> ${out_command[$i]}" eval ${out_command[$i]} if (( i == ioc )) then printf "\n$dashes\n" fi done # IN TRACE-MODE PRINT CONTENTS OF THE CURRENT (TEMPORARY) WORKING DIRECTORY if [[ $do_trace = true ]] then printf "\n\n" ls -al fi # COPY LOCAL OUTPUT-FILES TO THEIR PERMANENT DESTINATIONS (( i = 0 )) while (( i < iout )) do (( i = i + 1 )) if (( i == 1 )) then printf "\n\n *** saving OUTPUT-files:" # GET RUN NUMBER ASSIGNED BY PALM if [[ -f RUN_NUMBER ]] then read run_number < RUN_NUMBER printf "\n *** PALM generated run_number = "$run_number" will be used as unified cycle number for all output files" usecycle_option="-U $run_number" else run_number=0 usecycle_option="" fi printf "\n$dashes" fi # ADD CYCLE NUMBER TO FILENAME if [[ ! ( $fromhost != $localhost && ( "${actionout[$i]}" = tr || "${actionout[$i]}" = tra || "${actionout[$i]}" = trpe ) ) ]] then # IN APPEND MODE, FILES KEEP THEIR CURRENT CYCLE NUMBER if [[ "${actionout[$i]}" != "a" ]] then # SET RUN NUMBER AS CYCLE NUMBER, IF THERE IS NOT A CONFLICT # WITH AN EXISTING CYCLE NUMBER if (( run_number >= cycnum[$i] )) then (( cycnum[$i] = run_number )) else if (( run_number > 0 )) then printf "\n --- INFORMATIVE: The following file cannot get a unified cycle number" fi fi fi if (( cycnum[$i] > 0 )) then pathout[$i]=${pathout[$i]}.${cycnum[$i]} fi fi # CHECK FOR SINGLE FILE (SERIAL RUN) OR DIRECTORY (ONE FILE PER CORE FOR PARELLEL EXECUTION) files_for_pes=false; filetyp=file link_local_output=false if [[ "${actionout[$i]}" = pe && -n $numprocs ]] then files_for_pes=true; filetyp=directory actionout[$i]="" elif [[ "${actionout[$i]}" = pe && ! -n $numprocs ]] then actionout[$i]="" elif [[ "${actionout[$i]}" = lnpe && -n $numprocs ]] then files_for_pes=true; filetyp=directory link_local_output=true actionout[$i]="" elif [[ "${actionout[$i]}" = lnpe && ! -n $numprocs ]] then link_local_output actionout[$i]="" elif [[ "${actionout[$i]}" = trpe && -n $numprocs ]] then files_for_pes=true; filetyp=directory actionout[$i]="tr" elif [[ "${actionout[$i]}" = trpe && ! -n $numprocs ]] then actionout[$i]="tr" fi if [[ ! -f ${localout[$i]} && $files_for_pes = false ]] then printf "\n +++ temporary OUTPUT-file ${localout[$i]} does not exist\n" elif [[ ! -d ${localout[$i]} && $files_for_pes = true ]] then printf "\n +++ temporary OUTPUT-file ${localout[$i]}/.... does not exist\n" else # COPY VIA SCP TO LOCAL HOST (ALWAYS IN BINARY MODE USING batch_scp option -m) # IF TARGET DIRECTORY DOES NOT EXISTS, TRY TO CREATE IT if [[ "${actionout[$i]}" = tr ]] then if [[ $localhost != $fromhost ]] then if [[ $files_for_pes = false ]] then cps="" cst="" else cps=-c cst="/" fi transfer_failed=false printf "\n >>> OUTPUT: ${localout[$i]}$cst by SCP to" printf "\n ${pathout[$i]}/${localhost}_${fname}${endout[$i]}$cst\n" if [[ $localhost = lccrayb ]] then ssh $usern@blogin1 ". \\$HOME/.bashrc; cd $TEMPDIR; batch_scp $PORTOPT $cps -b -m $usecycle_option -u $return_username $return_address ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]} ${extout[$i]}" elif [[ $localhost = lccrayh ]] then ssh $usern@hlogin1 ". \\$HOME/.bashrc; cd $TEMPDIR; batch_scp $PORTOPT $cps -b -m $usecycle_option -u $return_username $return_address ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]} ${extout[$i]}" elif [[ $localhost = lcbullhh ]] then ssh $usern@mlogin101 ". \\$HOME/.bashrc; cd $TEMPDIR; batch_scp $PORTOPT $cps -b -m $usecycle_option -u $return_username $return_address ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]} ${extout[$i]}" elif [[ $localhost = lcxe6 ]] then ssh $usern@hexagon ". \\$HOME/.bashrc; cd $TEMPDIR; batch_scp $PORTOPT $cps -b -m $usecycle_option -u $return_username $return_address ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]} ${extout[$i]}" else batch_scp $PORTOPT $cps -b -m $usecycle_option -u $return_username $return_address ${localout[$i]} "${pathout[$i]}" ${localhost}_${fname}${endout[$i]} ${extout[$i]} fi [[ $? != 0 ]] && transfer_failed=true # IF TRANSFER FAILED, CREATE BACKUP COPY ON THIS MACHINE if [[ $transfer_failed = true ]] then printf " +++ transfer failed. Trying to save a copy on this host under:\n" printf " ${pathout[$i]}/${localhost}_${fname}${endout[$i]}_$kennung\n" # FIRST CHECK, IF DIRECTORY EXISTS, AND CREATE IT, IF NECESSARY eval local_catalog=${pathout[$i]} if [[ ! -d $local_catalog ]] then printf " *** local directory does not exist. Trying to create:\n" printf " $local_catalog \n" mkdir -p $local_catalog fi eval cp ${localout[$i]} ${pathout[$i]}/${localhost}_${fname}${endout[$i]}_$kennung transfer_problems=true fi else # UNSET actionout. DUE TO THIS SETTING, FILE WILL LATER JUST BE COPIED ON THIS MACHINE actionout[$i]="" fi fi # APPEND VIA SCP TO LOCAL HOST (ALWAYS IN BINARY MODE USING batch_scp option -m) # IF TARGET DIRECTORY DOES NOT EXISTS, TRY TO CREATE IT if [[ "${actionout[$i]}" = tra ]] then if [[ $localhost != $fromhost ]] then if [[ $localhost = ibmh ]] then # TRANSFER IN SEPERATE JOB # FIRST COPY FILE TO TEMPORY DATA DIRECTORY [[ ! -d $tmp_data_catalog/TRANSFER ]] && mkdir -p $tmp_data_catalog/TRANSFER file_to_transfer=${fname}_${localout[$i]}_to_transfer_$kennung ln -f ${localout[$i]} $tmp_data_catalog/TRANSFER/$file_to_transfer echo "set -x" > transfer_${localout[$i]} echo "cd $tmp_data_catalog/TRANSFER" >> transfer_${localout[$i]} printf "\n >>> OUTPUT: ${localout[$i]} append by SCP in seperate job to" printf "\n ${pathout[$i]}/${localhost}_${fname}${endout[$i]}" printf "\n or higher cycle\n" echo "batch_scp $PORTOPT -A -b -m -u $return_username $return_address $file_to_transfer \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]} ${extout[$i]}" >> transfer_${localout[$i]} echo "[[ \$? = 0 ]] && rm $file_to_transfer" >> transfer_${localout[$i]} if [[ $LOGNAME = b323013 ]] then subjob -v -q c1 -X 0 -m 1000 -t 900 -c $job_catalog $PORTOPT transfer_${localout[$i]} else subjob -d -v -q c1 -X 0 -m 1000 -t 900 -c $job_catalog $PORTOPT transfer_${localout[$i]} fi else # TRANSFER WITHIN THIS JOB transfer_failed=false printf "\n >>> OUTPUT: ${localout[$i]} append by SCP to" printf "\n ${pathout[$i]}/${localhost}_${fname}${endout[$i]}\n" if [[ $localhost = lccrayb ]] then ssh $usern@blogin1 ". \\$HOME/.bashrc; cd $TEMPDIR; batch_scp $PORTOPT -A -b -m -u $return_username $return_address ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]} ${extout[$i]}" elif [[ $localhost = lccrayh ]] then ssh $usern@hlogin1 ". \\$HOME/.bashrc; cd $TEMPDIR; batch_scp $PORTOPT -A -b -m -u $return_username $return_address ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]} ${extout[$i]}" elif [[ $localhost = lcxe6 ]] then ssh $usern@hexagon ". \\$HOME/.bashrc; cd $TEMPDIR; batch_scp $PORTOPT -A -b -m -u $return_username $return_address ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]} ${extout[$i]}" else batch_scp $PORTOPT -A -b -m -u $return_username $return_address ${localout[$i]} "${pathout[$i]}" ${localhost}_${fname}${endout[$i]} ${extout[$i]} fi [[ $? != 0 ]] && transfer_failed=true # IF TRANSFER FAILED, CREATE BACKUP COPY ON THIS MACHINE if [[ $transfer_failed = true ]] then printf " +++ transfer failed. Trying to save a copy on this host under:\n" printf " ${pathout[$i]}/${localhost}_${fname}${endout[$i]}_$kennung\n" # FIRST CHECK, IF DIRECTORY EXISTS, AND CREATE IT, IF NECESSARY eval local_catalog=${pathout[$i]} if [[ ! -d $local_catalog ]] then printf " *** local directory does not exist. Trying to create:\n" printf " $local_catalog \n" mkdir -p $local_catalog fi eval cp ${localout[$i]} ${pathout[$i]}/${localhost}_${fname}${endout[$i]}_$kennung transfer_problems=true fi fi else # RESET actionout. DUE TO THIS SETTING, FILE WILL LATER JUST BE APPENDED ON THIS MACHINE actionout[$i]=a fi fi # APPEND ON THIS MACHINE if [[ "${actionout[$i]}" = "a" ]] then if [[ "${extout[$i]}" != " " && "${extout[$i]}" != "" ]] then printf "\n >>> OUTPUT: ${localout[$i]} append to ${pathout[$i]}.${extout[$i]}\n" cat ${localout[$i]} >> ${pathout[$i]}.${extout[$i]} else printf "\n >>> OUTPUT: ${localout[$i]} append to ${pathout[$i]}\n" cat ${localout[$i]} >> ${pathout[$i]} fi fi # COPY ON THIS MACHINE # COPY HAS TO BE USED, BECAUSE MOVE DOES NOT WORK IF FILE-ORIGIN AND TARGET ARE # ON DIFFERENT FILE-SYSTEMS if [[ "${actionout[$i]}" = "" && $files_for_pes = false ]] then # COPY IN CASE OF RUNS ON SINGLE CORES if [[ "${extout[$i]}" != " " && "${extout[$i]}" != "" ]] then printf "\n >>> OUTPUT: ${localout[$i]} to ${pathout[$i]}.${extout[$i]}\n" if [[ $link_local_output = true ]] then printf " file will be linked\n" ln -f ${localout[$i]} ${pathout[$i]}.${extout[$i]} fi # If "ln -f" fails of if "$link_local_output = false" do a normal "cp" if [[ ! -f "${pathout[$i]}.${extout[$i]}" ]] then if [[ $link_local_output = true ]] then printf " --- WARNING: ln failed, using cp instead (might be time consuming...)\n" fi cp ${localout[$i]} ${pathout[$i]}.${extout[$i]} else printf "+++ no copy because file ${pathout[$i]}.${extout[$i]} exists\n" fi else printf "\n >>> OUTPUT: ${localout[$i]} to ${pathout[$i]}\n" if [[ $link_local_output = true ]] then printf " file will be linked\n" ln -f ${localout[$i]} ${pathout[$i]} fi # If "ln -f" fails of if "$link_local_output = false" do a normal "cp" if [[ ! -f "${pathout[$i]}" ]] then if [[ $link_local_output = true ]] then printf " --- WARNING: ln failed, using cp instead (might be time consuming...)\n" fi cp ${localout[$i]} ${pathout[$i]} else printf "+++ no copy because file ${pathout[$i]} exists\n" fi fi elif [[ "${actionout[$i]}" = "" && $files_for_pes = true ]] then # FILES FROM THE DIFFERENT CORES ARE MOVED WITH ln-COMMAND TO THE PERMANENT DIRECTORY # AS A FIRST STEP, THE PERMANENT DIRECTORY IS CREATED printf "\n >>> OUTPUT: ${localout[$i]}/_.... to ${pathout[$i]}\n" if [[ $link_local_output = true ]] then printf " files will be linked\n" mkdir ${pathout[$i]} cd ${localout[$i]} for file in $(ls *) do ln -f $file ${pathout[$i]} done >|/dev/null 2>&1 cd $TEMPDIR fi # IF "ln -f" HAS FAILED OR IF "$link_local_output = false" DO A NORMAL COPY "cp -r" if [[ ! -f "${pathout[$i]}/_000000" ]] then if [[ $link_local_output = true ]] then printf " --- WARNING: ln failed, using cp instead (might be time consuming...)\n" fi cp -r ${localout[$i]}/* ${pathout[$i]} fi fi fi done if (( i != 0 )) then if [[ $transfer_problems = true ]] then printf "\n$dashes\n *** OUTPUT-files saved" printf "\n +++ WARNING: some data transfers failed! \n" else printf "\n$dashes\n *** all OUTPUT-files saved \n" fi fi # IF REQUIRED, START A RESTART-JOB # FILE CONTINUE_RUN MUST HAVE BEEN CREATED BY THE EXECUTABLE (PALM) if [[ -f CONTINUE_RUN ]] then # ADD RESTART-OPTIONS TO THE MRUN-CALL (IF THEY ARE NOT USED ALREADY): # -C TELLS MRUN THAT IT IS A RESTART-RUN # -v SILENT MODE WITHOUT INTERACTIVE QUERIES # -n BATCH-MODE (IMPORTANT ONLY IN CASE OF BATCH JOBS ON THE LOCAL MACHINE) [[ $(echo $mc | grep -c "\-C") = 0 ]] && mc="$mc -C" [[ $(echo $mc | grep -c "\-v") = 0 ]] && mc="$mc -v" [[ $(echo $mc | grep -c "\-b") = 0 ]] && mc="$mc -b" if [[ $(echo $mc | grep -c "#") != 0 ]] then mc=`echo $mc | sed 's/#/f/g'` fi # START THE RESTART-JOB printf "\n\n *** initiating restart-run on \"$return_address\" using command:\n" echo " $mc" printf "\n$dashes\n" if [[ $localhost != $fromhost ]] then if [[ $localhost = lcbullhh || $localhost = lccrayb || $localhost = lccrayh || $localhost = ibmh || $localhost = ibmkisti || $localhost = ibmku || $localhost = ibms || $localhost = lceddy || $localhost = lckyu* || $localhost = lcxe6 || $localhost = lcgeohu ]] then echo "*** ssh will be used to initiate restart-runs!" echo " return_address=\"$return_address\" " echo " return_username=\"$return_username\" " if [[ $(echo $return_address | grep -c "130.75.105") = 1 ]] then if [[ $localhost = ibmh ]] then ssh $SSH_PORTOPT $usern@136.172.40.15 "ssh $SSH_PORTOPT $return_address -l $return_username \". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" " elif [[ $localhost = lcbullhh ]] then ssh $SSH_PORTOPT $usern@mlogin101 "ssh $SSH_PORTOPT $return_address -l $return_username \". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" " elif [[ $localhost = lccrayb ]] then ssh $usern@blogin1 "ssh $SSH_PORTOPT $return_address -l $return_username \". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\\\$PATH:$LOCAL_MRUN_PATH; export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" " elif [[ $localhost = lccrayh ]] then ssh $usern@hlogin1 "ssh $SSH_PORTOPT $return_address -l $return_username \". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\\\$PATH:$LOCAL_MRUN_PATH; export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" " elif [[ $localhost = lcxe6 ]] then ssh $usern@hexagon "ssh $SSH_PORTOPT $return_address -l $return_username \". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\\\$PATH:$LOCAL_MRUN_PATH; export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" " else ssh $SSH_PORTOPT $return_address -l $return_username ". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc " fi else if [[ $localhost = ibmkisti ]] then ssh $SSH_PORTOPT $usern@gaiad "ssh $SSH_PORTOPT $return_address -l $return_username \"PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" " elif [[ $localhost = lceddy ]] then /usr/bin/ssh $SSH_PORTOPT $return_address -l $return_username "PATH=\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc " elif [[ $localhost = lcocean ]] then /usr/bin/ssh $SSH_PORTOPT $return_address -l $return_username "PATH=\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc " elif [[ $localhost = lccrayb ]] then ssh $usern@blogin1 "ssh $SSH_PORTOPT $return_address -l $return_username \"PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" " elif [[ $localhost = lccrayh ]] then ssh $usern@hlogin1 "ssh $SSH_PORTOPT $return_address -l $return_username \"PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" " elif [[ $localhost = lccgeohu ]] then ssh $usern@cirrus.geo.hu-berlin.de "ssh $SSH_PORTOPT $return_address -l $return_username \"PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" " else ssh $SSH_PORTOPT $return_address -l $return_username "PATH=\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc " fi fi else printf "\n +++ no restart mechanism available for host \"$localhost\" " locat=restart; exit fi # WAIT TO ALLOW THE RESTART-JOB TO BE QUEUED, BEFORE THE CURRENT JOB IS FINISHED if [[ $queue = special1q ]] then sleep 120 else sleep 30 fi else # JOBS RUNNING (AND STARTED) ON THE LOCAL MACHINE CAN DIRECTLY CALL MRUN (WITHOUT # USING SSH) cd $LOCAL_PWD if [[ $localhost = lckyuh ]] then printf "\n +++ WARNING: no restart mechanism available for host \"$localhost\" " printf "\n please restart job manually using command:\n" printf "\n \"$mc\" " else eval $mc # THE ' MUST BE EVALUATED fi cd - > /dev/null fi if [[ $localhost = lckyuh ]] then printf "\n$dashes\n *** restart-run to be initiated manually\n" else printf "\n$dashes\n *** restart-run initiated \n" fi # DELETE INPUT-(RESTART)FILES, WHICH HAVE BEEN FETCHED FROM THE TEMPORARY DATA # DIRECTORY, BACAUSE THEY ARE NOT REQUIRED BY THE RESTART-JOB. # THIS IS DONE IN ORDER TO AVOID EXCEEDING DISC QUOTAS OR DISC SPACE (RESTART-FILES # MAY BE VERY HUGE) (( i = 0 )) while (( i < iin )) do (( i = i + 1 )) if [[ "${got_tmp[$i]}" = true && $keep_data_from_previous_run = false ]] then rm -r ${absnamein[$i]} fi done fi # SEND EMAIL NOTIFICATION ABOUT THE FINISHED RUN if [[ "$email_notification" != "none" ]] then if [[ $localhost != $fromhost ]] then if [[ -f CONTINUE_RUN ]] then echo "PALM restart run necessary" > email_text echo "description header of actual run:" >> email_text cat CONTINUE_RUN >> email_text echo "mrun-command to restart:" >> email_text echo "$mc" >> email_text else echo "PALM run with base filename \"$fname\" on host \"$localhost\" finished" > email_text fi mail $email_notification < email_text printf "\n *** email notification sent to \"$email_notification\" " fi fi # ALL ACTIONS FINISHED, TEMPORARY WORKING-DIRECTORY CAN BE DELETED cd $HOME [[ $delete_temporary_catalog = true ]] && rm -rf $TEMPDIR else # PREPARING ACTIONS, # IF A BATCH-JOB IS TO BE GENERATED AND STARTED ON A REMOTE-MACHINE GERECHNET # BUILD THE MRUN-COMMAND TO BE CALLED IN THE BATCH-JOB ON THE REMOTE-MACHINE mrun_com="$mrun_script_name -a $afname -c $config_file -d $fname -h $host -H $fromhost -m $memory -t $cpumax -q $queue -R $return_address -U $return_username -u $remote_username" [[ "$global_revision" != "" ]] && mrun_com=${mrun_com}" -G \"$global_revision\"" [[ $group_number != none ]] && mrun_com=${mrun_com}" -g $group_number" [[ $do_compile = true ]] && mrun_com=${mrun_com}" -s \"$source_list\"" [[ "$input_list" != "" ]] && mrun_com=${mrun_com}" -i \"$input_list\"" [[ $keep_data_from_previous_run = true ]] && mrun_com=${mrun_com}" -k" [[ "$additional_conditions" != "" ]] && mrun_com=${mrun_com}" -K \"$additional_conditions\"" [[ "$output_list" != "" ]] && mrun_com=${mrun_com}" -o \"$output_list\"" [[ "$read_from_config" = false ]] && mrun_com=${mrun_com}" -S" [[ $do_trace = true ]] && mrun_com=${mrun_com}" -x" [[ "$numprocs" != "" ]] && mrun_com=${mrun_com}" -X $numprocs" if [[ $use_openmp = true ]] then mrun_com=${mrun_com}" -O $threads_per_task" fi [[ "$tasks_per_node" != "" ]] && mrun_com=${mrun_com}" -T $tasks_per_node" [[ $package_list != "" ]] && mrun_com=${mrun_com}" -p \"$package_list\"" [[ $return_password != "" ]] && mrun_com=${mrun_com}" -P $return_password" [[ $delete_temporary_catalog = false ]] && mrun_com=${mrun_com}" -B" [[ $node_usage != default && "$(echo $node_usage | cut -c1-3)" != "sla" && $node_usage != novice ]] && mrun_com=${mrun_com}" -n $node_usage" [[ "$ocean_file_appendix" = true ]] && mrun_com=${mrun_com}" -y" [[ $run_coupled_model = true ]] && mrun_com=${mrun_com}" -Y \"$coupled_dist\"" [[ $run_vnested_model = true ]] && mrun_com=${mrun_com}" -N \"$vnested_dist\"" [[ "$check_namelist_files" = false ]] && mrun_com=${mrun_com}" -z" [[ "$combine_plot_fields" = false ]] && mrun_com=${mrun_com}" -Z" [[ "$max_par_io_str" != "" ]] && mrun_com=${mrun_com}" -w $max_par_io_str" if [[ $do_remote = true ]] then printf "\n>>>> MRUN-command on execution host:\n>>>> $mrun_com \n" fi # CREATE EXECUTABLE FOR BATCH JOB if [[ $create_executable_for_batch = true && $restart_run != true ]] then printf "\n *** creating the executable for batch job\n" # METHOD ONLY WORKS FOR BATCH JOBS ON LOCAL HOSTS if [[ $host != $localhost ]] then printf "\n +++ creation of executables is only allowed for batch jobs on local hosts." printf "\n Please set create_executable_for_batch = false in the config-file.\n" locat=create_executable; exit fi mkdir $working_directory/SOURCES_FOR_RUN_${fname}/TMPDIR_FOR_CREATING_EXECUTABLE cd $working_directory/SOURCES_FOR_RUN_${fname}/TMPDIR_FOR_CREATING_EXECUTABLE cp $make_depository . tar -xf $make_depository > /dev/null 2>&1 cp ../* . > /dev/null 2>&1 make $mopts -f Makefile PROG=a.out F90=$compiler_name COPT="$cpp_options" F90FLAGS="$fopts" LDFLAGS="$lopts" if [[ $? != 0 || "$compile_error" = true || "$module_compile_error" = true ]] then printf "\n +++ error occured while compiling or linking" locat=compile exit fi mv a.out .. cd - > /dev/null 2>&1 rm -rf $working_directory/SOURCES_FOR_RUN_${fname}/TMPDIR_FOR_CREATING_EXECUTABLE printf " *** executable created in \"$working_directory/SOURCES_FOR_RUN_${fname}\"\n " fi # BUILD THE JOB-SCRIPTS ON FILE jobfile jobfile=jobfile.$RANDOM # CREATE TEMPORARY DIRECTORY AND SWITCH TO IT echo "mkdir $TEMPDIR" >> $jobfile echo "cd $TEMPDIR" >> $jobfile # ACTIVATE ERROR-TRACEBACK if [[ $do_trace = true ]] then echo "set -x" >> $jobfile else echo "set +vx" >> $jobfile fi # INITIALIZE THE ENVIRONMENT AND LOAD MODULES if [[ "$init_cmds" != "" ]] then echo "$init_cmds" >> $jobfile fi if [[ "$module_calls" != "" ]] then echo "$module_calls" >> $jobfile fi # PROVIDE SOURCE-CODE FILES, MRUN-SCRIPT AND CONFIGURATION-FILE FOR THE JOB # then-CLAUSE: FILES ARE COLLECTED IN THE SOURCES_FOR_RUN_... DIRECTORY ON THE LOCAL HOST, # WHICH IS THEN FETCHED FROM THE BATCH-JOB BY USING cp/scp # THE SOURCE-CODE FILES ALREADY EXIST IN THIS DIRECTORY # else-CLAUSE: FILE-CONTENTS ARE PUT INTO THE JOB-FILE AS HERE-DOCUMENTS # THIS MAY CREATE A QUITE LARGE JOB-FILE, WHICH CAN CAUSE PROBLEMS WITH SOME # QUEUEING-SYSTEMS if [[ $host = ibmkisti || $host = lcbullhh || $host = lccrayb || $host = lccrayf || $host = lccrayh || $host = lcocean || $host = lcgeohu ]] then # COPY CONFIGURATION-FILE AND MRUN-SCRIPT INTO THE SOURCES_FOR_RUN... DIRECTORY if [[ $restart_run != true ]] then cp $config_file $working_directory/SOURCES_FOR_RUN_$fname cp ${PALM_BIN}/$mrun_script_name $working_directory/SOURCES_FOR_RUN_$fname fi # COPY THE SOURCES_FOR_RUN... DIRECTORY FROM THE LOCAL HOST TO THE JOB VIA scp # (then-CLAUSE: JOBS ON THE LOCAL HOST CAN JUST USE cp) echo "set -x" >> $jobfile if [[ $host = $localhost ]] then # DUE TO UNKNOWN REASONS, COPY WITH cp COMMAND CREATES CORRUPT # FILES ON CRAY XC30 SYSTEMS (CSC HELSINKI), rsync IS USED INSTEAD if [[ $(echo $host | cut -c1-6) = lccray ]] then echo "rsync -av -t $working_directory/SOURCES_FOR_RUN_$fname ." >> $jobfile else echo "cp -r $working_directory/SOURCES_FOR_RUN_$fname ." >> $jobfile fi else if [[ $host = ibmkisti ]] then # ON KISTI'S IBM FIREWALL IS ONLY OPENED ON INTERACTIVE NODE echo "localdir=\`pwd\`" >> $jobfile echo "ssh $SSH_PORTOPT $remote_username@gaiad \"cd \$localdir; scp $PORTOPT -r $return_username@$return_address:$working_directory/SOURCES_FOR_RUN_$fname .\" " >> $jobfile elif [[ $host = lcbullhh ]] then echo "localdir=\`pwd\`" >> $jobfile echo "ssh $SSH_PORTOPT $remote_username@mlogin101 \"cd \$localdir; scp $PORTOPT -r $return_username@$return_address:$working_directory/SOURCES_FOR_RUN_$fname .\" " >> $jobfile elif [[ $host = lccrayb ]] then echo "localdir=\`pwd\`" >> $jobfile echo "ssh $SSH_PORTOPT $remote_username@blogin1 \"cd \$localdir; scp $PORTOPT -r $return_username@$return_address:$working_directory/SOURCES_FOR_RUN_$fname .\" " >> $jobfile elif [[ $host = lccrayh ]] then echo "localdir=\`pwd\`" >> $jobfile echo "ssh $SSH_PORTOPT $remote_username@hlogin1 \"cd \$localdir; scp $PORTOPT -r $return_username@$return_address:$working_directory/SOURCES_FOR_RUN_$fname .\" " >> $jobfile else echo "scp $PORTOPT -r $return_username@$return_address:$working_directory/SOURCES_FOR_RUN_$fname ." >> $jobfile fi fi echo "export SOURCES_COMPLETE=true" >> $jobfile # MOVE MRUN-SCRIPT AND CONFIGURATION-FILE FROM THE SOURCES_FOR_RUN... DIRECTORY TO THE # WORKING DIRECTORY OF THE JOB echo "mv SOURCES_FOR_RUN_$fname/$config_file . " >> $jobfile echo "mv SOURCES_FOR_RUN_$fname/$mrun_script_name . " >> $jobfile echo "chmod u+rwx $mrun_script_name" >> $jobfile echo "execute_mrun=true" >> $jobfile echo " " >> $jobfile else # PROVIDE SOURCE-CODE FILES AND MAKEFILE AS HERE DOCUMENT if [[ $do_compile = true ]] then source_catalog=SOURCES_FOR_RUN_$fname # CREATE SOURCES_FOR_RUN... DIRECTORY TO STORE THE SOURCE CODE FILES AND THE MAKEFILE echo "mkdir SOURCES_FOR_RUN_$fname" >> $jobfile echo "export SOURCES_COMPLETE=true" >> $jobfile echo "cd SOURCES_FOR_RUN_$fname" >> $jobfile for filename in $source_list do # BACKSLASH IS USED FOR MASKING echo "cat > $filename << \"%END%\"" >> $jobfile cat $source_catalog/$filename >> $jobfile echo " " >> $jobfile echo "%END%" >> $jobfile echo " " >> $jobfile done # BACKSLASH IS USED FOR MASKING echo "cat > Makefile << \"%END%\"" >> $jobfile cat $source_catalog/Makefile >> $jobfile echo " " >> $jobfile echo "%END%" >> $jobfile echo " " >> $jobfile echo "cd - > /dev/null" >> $jobfile fi # PROVIDE THE CONFIGURATION-FILE AS HERE-DOCUMENT # BACKSLASH IS USED FOR MASKING # LINES WITH #$ IN THE CONFIGURATION-FILE, COMING FROM THE SVN KEYWORD SUBSTITUTION, # ARE REMOVED FROM THE FILE IN ORDER TO AVOID PROBLEMS WITH THE SGE BATCH SYSTEM echo "cat > $config_file << \"%END%\"" >> $jobfile if [[ $host = lckyuh ]] then # NO CROSS-COMPILER ON COMPUTE NODE sed 's/frtpx/frt/g' $config_file >> $jobfile else sed 's/#$.*//g' $config_file >> $jobfile fi echo "%END%" >> $jobfile echo " " >> $jobfile # PROVIDE THE MRUN-SCRIPTS AS HERE-DOCUMENT # BACKSLASH IS USED FOR MASKING echo "cat > $mrun_script_name <<\"%END%\"" >> $jobfile if [[ $host = lckyuh ]] then sed 's/\/bin\/ksh/\/bin\/bash/g' ${PALM_BIN}/$mrun_script_name >> $jobfile else cat ${PALM_BIN}/$mrun_script_name >> $jobfile fi echo "%END%" >> $jobfile echo "chmod u+x $mrun_script_name" >> $jobfile echo "execute_mrun=true" >> $jobfile echo " " >> $jobfile fi # GET REQUIRED INPUT-FILES BY SCP OR BY SENDING THEM WITH THE JOB AS HERE-DOCUMENT # PUT THESE FILES INTO THE USER'S RESPECTIVE PERMANENT DIRECTORIES ON THE REMOTE-HOST # IF THE DIRECTORIES DO NOT EXIST, TRY TO CREATE THEM if [[ $do_remote = true ]] then (( i = 0 )) while (( i < iin )) do (( i = i + 1 )) echo "[[ ! -d ${pathin[$i]} ]] && mkdir -p ${pathin[$i]}" >> $jobfile if [[ "${transin[$i]}" = job ]] then echo "cat > ${remotepathin[$i]} <<\"%END%\"" >> $jobfile eval cat ${pathin[$i]}/${frelin[$i]} >> $jobfile echo " " >> $jobfile echo "%END%" >> $jobfile else echo "batch_scp $PORTOPT -b -o -g -s -u $return_username $return_address ${remotepathin[$i]} \"${pathin[$i]}\" ${frelin[$i]}" >> $jobfile fi # CHECK, IF FILE COULD BE CREATED echo "if [[ \$? = 1 ]]" >> $jobfile echo "then" >> $jobfile echo " echo \" \" " >> $jobfile echo " echo \"+++ file ${remotepathin[$i]} could not be created\" " >> $jobfile echo " echo \" please check, if directory exists on $host!\" " >> $jobfile echo " echo \"+++ MRUN will not be continued\" " >> $jobfile echo " execute_mrun=false" >> $jobfile echo "fi" >> $jobfile done fi # PROVIDE NAME OF THE CURRENT WORKING-DIRECTORY ON THE LOCAL MACHINE (FROM WHERE THE JOB IS # STARTED) BY SETTING AN ENVIRONMENT-VARIABLE. THIS INFORMATION IS USED IN THE JOB BY MRUN # IN CASE THAT RESTART-RUNS HAVE TO BE GENERATED echo "LOCAL_PWD=$working_directory" >> $jobfile echo "export LOCAL_PWD" >> $jobfile # PROVIDE THE PATH OF THE LOCAL MRUN-SCRIPT FOR THE SAME REASON echo "LOCAL_MRUN_PATH=$PALM_BIN" >> $jobfile echo "export LOCAL_MRUN_PATH" >> $jobfile # lceddy ALSO REQUIRES TO PROVIDE PATH FOR THE PALM-SCRIPTS if [[ $host = lceddy || $localhost = lceddy ]] then echo "export PALM_BIN=$PALM_BIN" | sed -e 's:'$HOME':$HOME:' >> $jobfile echo "export PATH=\$PATH:\$PALM_BIN" >> $jobfile fi # CALL MRUN WITHIN THE JOB (SETTING QUEUE IS A WORKAROUND FOR ibmkisti) # AS FINAL ACTION, REMOVE THE TEMPORARY DIRECTORY CREATED AT THE BEGINNING OF THE JOB echo "set -x" >> $jobfile echo "queue=$queue" >> $jobfile echo "[[ \$execute_mrun = true ]] && ./$mrun_com" >> $jobfile echo 'ls -al; echo `pwd`' >> $jobfile echo "cd \$HOME" >> $jobfile echo "rm -rf $TEMPDIR" >> $jobfile # START THE JOB USING SUBJOB-COMMAND if [[ $silent = false ]] then printf "\n " else printf "\n\n" fi subjob $job_on_file -h $host -u $remote_username -g $group_number -q $queue -m $memory -N $node_usage -t $cpumax $XOPT $TOPT $OOPT -n $fname -v -c $job_catalog -e $email_notification $PORTOPT $jobfile rm -rf $jobfile fi # END OF REMOTE-PART