#!/bin/bash
# palmrun - script for running PALM jobs
#--------------------------------------------------------------------------------#
# This file is part of PALM.
#
# PALM is free software: you can redistribute it and/or modify it under the terms
# of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
#
# PALM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# PALM. If not, see .
#
# Copyright 2017 Leibniz Universitaet Hannover
#--------------------------------------------------------------------------------#
#
# Current revisions:
# ------------------
#
#
# Former revisions:
# -----------------
# $Id: palmrun 2512 2017-10-04 08:26:59Z suehring $
# bugfix for determining cycle numbers of NetCDF input files
#
# 2506 2017-09-29 08:30:37Z raasch
# option -V added to check for an existing SOURCES_FOR_RUN_... folder
# host configuration added to SOURCES_FOR_RUN_... folder name
# host_identifier renamed host_configuration
# option -W added to allow for job dependencies
#
# 2501 2017-09-26 11:41:55Z raasch
# default value for number of cores (option -X) set to 1
# bugfix for mechanism which overwrites configuration file settings with values
# provided by palmrun options
#
# 2499 2017-09-22 16:47:58Z kanani
# option -h named configuration identifier
#
# 2480 2017-09-19 06:24:14Z maronga
# bugfix for last revision
#
# 2479 2017-09-19 06:12:16Z raasch
# option -A (project account number) added
#
# 2422 2017-09-08 08:25:41Z raasch
# initial revision
#
#--------------------------------------------------------------------------------#
# palmrun - script for running PALM jobs on local and remote hosts
#--------------------------------------------------------------------------------#
# DECLARATION OF VARIABLES AND THEIR DEFAULT VALUES
set +o allexport # SUPPRESS EXPORT OF ALL VARIABLES, SINCE IN THE PAST THIS
# LEAD TO PROBLEMS IN ROUTINES CALLED BY PALMRUN
# (TOO MANY ARGUMENTS - PROBLEM)
set +o noclobber # EXISTING FILES ARE ALLOWED TO BE OVERWRITTEN
typeset -i ibd=0 ibdt=0 iec=0 iic=0 iin=0 ioc=0 iout=0
typeset -x -i memory=0 # HAS TO BE EXPORTED HERE, OTHERWISE AN UNKNOWN SIDE
# SIDE EFFECT MAY CAUSE DATA LOSS WHEN GETOPTS IS READING THE
# SCRIPT-OPTION ARGUMENTS
typeset -i cores cputime cpu_hours cpu_minutes cpu_seconds i ii iia iii iio
typeset -i icycle inode ival maxcycle mpi_tasks
typeset -i nodes remaining_cores run_number tasks_per_node threads_per_task
activation_string_list=""
AddFilenames=""
combine_plot_fields=true
compiler_name=""
compiler_name_ser=""
compiler_options=""
cores=1
cores_atmos=0
cores_ocean=0
coupled_dist=""
cpp_options=""
cpumax=0
create_batch_job=false
create_jobfile_only=false
create_remote_batch_job=false
dashes=" ----------------------------------------------------------------------------"
defaultqueue=""
delete_temporary_catalog=true
do_compile=true
do_trace=false
executable=""
execute_command=""
execution_error=false
fileconnection_file=.palm.iofiles
fname=test
global_revision=""
host_configuration="default"
hostfile=""
hp=""
keep_data_from_previous_run=false
link_local_input=false
link_local_output=false
linker_options=""
local_jobcatalog=""
locat=normal
makefile=""
max_par_io_str=""
prc=$0
while [[ $(echo $prc | grep -c "/") != 0 ]]
do
prc=`echo $prc | cut -f2- -d"/"`
done
module_calls=""
palmrun_memory=""
palmrun_script_name=$prc
openmp=false
previous_job=""
project_account=""
queue=none
restart_run=false
return_address=""
remote_jobcatalog=""
remote_username=""
running_in_batch_mode=false
run_coupled_model=false
run_id=""
silent=false
source_list=""
source_path=""
tasks_per_node=0
threads_per_task=1
tmpcreate=false
transfer_problems=false
user_source_path=""
use_existing_sources_folder=""
use_openmp=false
version="palmrun 1.0 Rev$Rev: 2303 $"
working_directory=`pwd`
write_binary=""
TOPT=""
# ERROR HANDLING IN CASE OF EXIT
trap 'if [[ $locat != normal && $locat != control_c ]]
then
# CARRY OUT ERROR-COMMANDS GIVEN IN THE CONFIGURATION FILE (EC:)
(( i = 0 ))
while (( i < iec ))
do
(( i = i + 1 ))
printf "\n *** Execution of ERROR-command:\n"
printf " >>> ${err_command[$i]}\n"
eval ${err_command[$i]}
done
[[ $delete_temporary_catalog = true ]] && (cd; rm -rf $TEMPDIR)
printf "\n\n+++ palmrun killed \n\n"
elif [[ $locat != control_c ]]
then
printf "\n\n --> palmrun finished\n\n"
if [[ $running_on_remote = true ]]
then
printf " Bye, bye $remote_username !!\n\n"
else
printf " Bye, bye $local_username !!\n\n"
fi
fi' exit
# ACTIONS IN CASE OF TERMINAL-BREAK (CONTROL-C):
trap 'cd; rm -rf $TEMPDIR
printf "\n+++ palmrun killed by \"^C\" \n\n"
locat=control_c
exit
' 2
# READ SHELLSCRIPT-OPTIONS AND REBUILD THE PALMRUN-COMMAND STRING (prc),
# WHICH WILL BE USED TO START RESTART-JOBS
while getopts :a:A:bBCd:FG:h:jkm:M:O:q:r:R:s:t:T:u:U:vVw:W:xX:yY:Z option
do
case $option in
(a) activation_string_list=$OPTARG; prc="$prc -a'$OPTARG'";;
(A) project_account=$OPTARG; prc="$prc -A'$OPTARG'";;
(b) create_batch_job=true; prc="$prc -b";;
(B) delete_temporary_catalog=false; prc="$prc -B";;
(C) restart_run=true; prc="$prc -C";;
(d) fname=$OPTARG; prc="$prc -d$OPTARG";;
(F) create_jobfile_only=true;;
(G) global_revision=$OPTARG; prc="$prc -G'$OPTARG'";;
(h) host_configuration=$OPTARG; prc="$prc -h$OPTARG";;
(j) running_in_batch_mode=true;;
(k) keep_data_from_previous_run=true; prc="$prc -k";;
(m) palmrun_memory=$OPTARG; prc="$prc -m$OPTARG";;
(M) makefile=$OPTARG; prc="$prc -M$OPTARG";;
(O) use_openmp=true; threads_per_task=$OPTARG; prc="$prc -O$OPTARG";;
(q) queue=$OPTARG; prc="$prc -q$OPTARG";;
(r) run_id=$OPTARG;;
(R) return_address=$OPTARG;;
(s) source_list=$OPTARG;;
(t) palmrun_cpumax=$OPTARG; prc="$prc -t$OPTARG";;
(T) palmrun_tasks_per_node=$OPTARG; prc="$prc -T$OPTARG";;
(u) remote_username=$OPTARG; prc="$prc -u$OPTARG";;
(U) return_username=$OPTARG; prc="$prc -U$OPTARG";;
(v) silent=true; prc="$prc -v";;
(V) use_existing_sources_folder="-V";;
(w) max_par_io_str=$OPTARG; prc="$prc -w$OPTARG";;
(W) previous_job=$OPTARG;;
(x) do_trace=true;set -x; prc="$prc -x";;
(X) palmrun_cores=$OPTARG; prc="$prc -X$OPTARG";;
(y) ocean_file_appendix=true; prc="$prc -y";;
(Y) run_coupled_model=true; coupled_dist=$OPTARG; prc="$prc -Y'$OPTARG'";;
(Z) combine_plot_fields=false; prc="$prc -Z";;
(\?) printf "\n +++ unknown option $OPTARG \n"
printf "\n --> type \"$0 ?\" for available options \n"
locat=parameter;exit;;
esac
done
# SKIP GIVEN OPTIONS TO READ POSITIONAL PARAMETER, IF GIVEN
# CURRENTLY ONLY PARAMETER ? (TO OUTPUT A SHORT COMMAND INFO) IS ALLOWED
(( to_shift = $OPTIND - 1 ))
shift $to_shift
# PRINT SHORT DESCRIPTION OF PALMRUN OPTIONS
if [[ "$1" = "?" ]]
then
(printf "\n *** Description of available palmrun options:\n"
printf "\n Option Description Default-Value"
printf "\n -a activation string list \"\" "
printf "\n -A project account number ---"
printf "\n -b batch-job on local machine ---"
printf "\n -B do not delete temporary directory at end ---"
printf "\n -d base name of files attached to program test"
printf "\n -F create batch job file only ---"
printf "\n -h host configuration \"default\" "
printf "\n -k keep data from previous run"
printf "\n -m memory demand in MB (batch-jobs) 0 MB"
printf "\n -M Makefile name Makefile"
printf "\n -O threads per openMP task ---"
printf "\n -q queue \"$queue\" "
printf "\n -s filenames of routines to be compiled \"\" "
printf "\n must end with .f, .f90, .F, or .c !"
printf "\n use \"..\" for more than one file and wildcards"
printf "\n -s LM compiles all locally modified files"
printf "\n -t allowed cpu-time in seconds (batch) 0"
printf "\n -T tasks per node ---"
printf "\n -u username on remote machine \"\" "
printf "\n -v no prompt for confirmation ---"
printf "\n -V check if SOURCES_FOR_RUN_... exists ---"
printf "\n -w maximum parallel io streams as given by -X"
printf "\n -W name of job to wait for ---"
printf "\n -x tracing of palmrun for debug purposes ---"
printf "\n -X # of processors (on parallel machines) 1"
printf "\n -y add appendix \"_O\" to all local output"
printf "\n files (ocean precursor runs followed by"
printf "\n coupled atmosphere-ocean runs) ---"
printf "\n -Y run coupled model, \"#1 #2\" with"
printf "\n #1 atmosphere and #2 ocean processors \"#/2 #/2\" depending on -X"
printf "\n -Z skip combine_plot_fields at the end of "
printf "\n the simulation ---"
printf "\n "
printf "\n Possible values of positional parameter :"
printf "\n \"?\" - this outline \n\n") | more
exit
elif [[ "$1" != "" ]]
then
printf "\n +++ positional parameter $1 unknown \n"
locat=parameter; exit
fi
# SHORT STARTING MESSAGE
printf "\n*** $version "
printf "\n will be executed. Please wait ..."
# BUILD THE CONFIGURATION-FILE NAME AND THE SOURCES_FOR_RUN-FOLDER NAME
config_file=.palm.config.$host_configuration
sources_for_run_catalog=SOURCES_FOR_RUN_${host_configuration}_$fname
# CHECK, IF CONFIGURATION FILE EXISTS
if [[ ! -f $config_file ]]
then
printf "\n\n +++ configuration file: "
printf "\n $config_file"
printf "\n does not exist"
locat=connect; exit
fi
# CHECK, IF FILE CONNECTION FILE EXISTS
if [[ ! -f $fileconnection_file ]]
then
printf "\n\n +++ file connection file: "
printf "\n $fileconnection_file"
printf "\n does not exist"
locat=connect; exit
fi
# CHECK, IF THE ACTIVATION_STRING_LIST HAS BEEN GIVEN
if [[ "$activation_string_list" = "" ]]
then
printf "\n\n +++ no activation string list given: "
printf "\n please set palmrun option \"-a\" "
locat=palmrun_option; exit
fi
# SET VARIABLE TO ACTIVATE PALM BINARY OUTPUT FOR RESTARTS
if [[ $(echo $activation_string_list | grep -c "restart") != 0 ]]
then
write_binary=true
else
write_binary=false
fi
# READ AND EVALUATE THE CONFIGURATION-FILE
[[ $silent = false ]] && printf "\n\n Reading the configuration file... "
# READ VARIABLE SETTINGS FROM CONFIG FILE LINE BY LINE
while read line
do
# FIRST REPLACE ENVIRONMENT-VARIABLES BY THEIR RESPECTIVE VALUES
eval line=\"$line\"
# INTERPRET THE LINE
if [[ "$(echo $line)" = "" ]]
then
# EMPTY LINE, NO ACTION
continue
elif [[ "$(echo $line | cut -c1)" = "#" ]]
then
# LINE IS A COMMENT LINE
continue
elif [[ "$(echo $line | cut -c1)" = "%" ]]
then
# LINE DEFINES AN ENVIRONMENT-VARIABLE
var=`echo $line | cut -d" " -s -f1 | cut -c2-`
value=`echo $line | cut -d" " -s -f2-`
# VALUE FROM THE CONFIGURATION-FILE IS ASSIGNED TO THE
# ENVIRONMENT-VARIABLE, BUT ONLY IF NO VALUE HAS BEEN ALREADY
# ASSIGNED WITHIN THIS SCRIPT (E.G. BY SCRIPT-OPTIONS).
# NON-ASSIGNED VARIABLES HAVE VALUE "" OR 0 (IN CASE OF INTEGER).
# HENCE THE GENERAL RULE IS: SCRIPT-OPTION OVERWRITES THE
# CONFIGURATION-FILE.
if [[ "$(eval echo \$$var)" = "" || "$(eval echo \$$var)" = "0" ]]
then
eval export $var="\$value"
# TERMINAL OUTPUT OF ENVIRONMENT-VARIABLES, IF TRACEBACK IS SWITCHED on
if [[ $do_trace = true ]]
then
printf "\n*** ENVIRONMENT-VARIABLE $var = $value"
fi
fi
elif [[ "$(echo $line | cut -c1-3)" = "BD:" ]]
then
# LINE DEFINES BATCH-DIRECTIVE
(( ibd = ibd + 1 ))
line=$(echo $line | cut -c4-)
batch_directive[$ibd]="$line"
elif [[ "$(echo $line | cut -c1-4)" = "BDT:" ]]
then
# LINE DEFINES BATCH-DIRECTIVE FOR SENDING BACK THE JOBFILE FROM A
# REMOTE TO A LOCAL HOST
(( ibdt = ibdt + 1 ))
line=$(echo $line | cut -c5-)
batch_directive_transfer[$ibdt]="$line"
elif [[ "$(echo $line | cut -c1-3)" = "EC:" ]]
then
# LINE DEFINES ERROR-COMMAND
(( iec = iec + 1 ))
line=$(echo $line | cut -c4-)
err_command[$iec]="$line"
elif [[ "$(echo $line | cut -c1-3)" = "IC:" ]]
then
# LINE DEFINES INPUT-COMMAND
(( iic = iic + 1 ))
line=$(echo $line | cut -c4-)
in_command[$iic]="$line"
elif [[ "$(echo $line | cut -c1-3)" = "OC:" ]]
then
# LINE DEFINES OUTPUT-COMMAND
(( ioc = ioc + 1 ))
line=$(echo $line | cut -c4-)
out_command[$ioc]="$line"
else
# SKIP ALL OTHER LINES
continue
fi
done < $config_file
# CHECK SETTING OF REQUIRED PARAMETERS
if [[ "$compiler_name" = "" ]]
then
printf "\n +++ no compiler name found in $config_file"
printf "\n Please add line \"compiler_name ...\" to that file."
locat=config_file; exit
fi
if [[ "$compiler_name_ser" = "" ]]
then
printf "\n +++ no compiler name for non-paralle compilation found in $config_file"
printf "\n Please add line \"compiler_name_ser ...\" to that file."
locat=config_file; exit
fi
if [[ "$compiler_options" = "" ]]
then
printf "\n +++ no compiler options found in $config_file"
printf "\n Please add line \"compiler_options ...\" to that file."
locat=config_file; exit
fi
if [[ "$linker_options" = "" ]]
then
printf "\n +++ no linker options found in $config_file"
printf "\n Please add line \"linker_options ...\" to that file."
locat=config_file; exit
fi
if [[ "$execute_command" = "" ]]
then
printf "\n +++ no execute command found in $config_file"
printf "\n Please add line \"execute_command ...\" to that file."
locat=config_file; exit
fi
if [[ "$hostfile" != "" ]]
then
if [[ $hostfile != auto && ! -f $hostfile ]]
then
printf "\n +++ no hostfile \"$hostfile\" found"
printf "\n Please check line \"hostfile ...\" in $config_file"
locat=config_file; exit
fi
fi
# DETERMINE THE CALL STATUS
if [[ "$return_address" != "" ]]
then
# I AM RUNNING ON A REMOTE HOST, WHICH ALSO MEANS THAT I AM RUNNING IN
# BATCH MODE AND ...
running_on_remote=true
else
# I HAVE BEEN CALLED INTERACTIVELY ON THIS HOST
if [[ "$remote_ip" != "" ]]
then
# I HAVE TO CREATE A BATCH JOB TO RUN PALM ON THE REMOTE HOST
create_remote_batch_job=true
fi
running_on_remote=false
fi
# READ AND EVALUATE THE I/O-FILE LIST
[[ $silent = false ]] && printf "\n Reading the I/O files... "
# READ THE FILE CONNECTION FILE LINE BY LINE
while read line
do
# FIRST REPLACE ENVIRONMENT-VARIABLES BY THEIR RESPECTIVE VALUES
eval line=\"$line\"
# INTERPRET THE LINE
if [[ "$(echo $line)" = "" ]]
then
# EMPTY LINE, NO ACTION
continue
elif [[ "$(echo $line | cut -c1)" = "#" ]]
then
# LINE IS A COMMENT LINE
true
else
# LINE DEFINES FILE CONNECTION. READ THE FILE ATTRIBUTES.
# s2a: in/out - field
# s2b: loc - field (optional)
# s2c: tr/ar - field (optional)
s1=`echo $line | cut -d" " -f1`
s2=`echo $line | cut -d" " -s -f2`
s2a=$(echo $s2 | cut -d":" -f1)
if [[ $(echo $s2 | grep -c ":") = 0 ]]
then
s2b=""
s2c=""
else
s2b=`echo $s2 | cut -d":" -f2 | sed 's/:/ /g'`
s2c=`echo $s2 | cut -d":" -s -f3 | sed 's/:/ /g'`
fi
s3=`echo $line | cut -d" " -f3`
s4=`echo $line | cut -d" " -s -f4`
s5=`echo $line | cut -d" " -s -f5`
s6=`echo $line | cut -d" " -s -f6`
# STORE FILE CONNECTION, IF ACTIVATED BY ACTIVATION-STRING FROM
# INPUT- OR OUTPUT-LIST.
# VARIABLE S3 MAY CONTAIN A LIST OF ACTIVATION STRINGS (FIELD-SEPERATOR ":").
# IF EXECUTION IS SCHEDULED FOR A REMOTE-MACHINE AND THE FILE IS ONLY
# LOCALLY REQUIRED ON THAT MACHINE (I.E. s2b = loc), THE FILE CONNECTION
# IS NOT CHECKED AND STORED.
IFSALT="$IFS"; IFS="$IFS:"
if [[ "$s2a" = in && ! ( $create_remote_batch_job = true && ( "$s2b" = loc || "$s2b" = locopt ) ) ]]
then
found=false
for actual in $activation_string_list
do
for formal in $s3
do
[[ $actual = $formal || "$formal" = "-" ]] && found=true
done
done
if [[ $found = true ]]
then
(( iin = iin + 1 ))
localin[$iin]=$s1; transin[$iin]=$s2b; actionin[$iin]=$s2c;
typein[$iin]=$s3; pathin[$iin]=$s4; endin[$iin]=$s5;
extin[$iin]=$s6
fi
elif [[ "$s2a" = out && ! ( $create_remote_batch_job = true && "$s2b" = loc ) ]]
then
found=false
for actual in $activation_string_list
do
for formal in $s3
do
[[ $actual = $formal || "$formal" = "-" ]] && found=true
done
done
if [[ $found = true ]]
then
(( iout = iout + 1 ))
localout[$iout]=$s1; actionout[$iout]=$s2c; typeout[$iout]=$s3;
pathout[$iout]=$s4; endout[$iout]=$s5; extout[$iout]=$s6
fi
elif [[ "$s2a" != in && "$s2a" != out ]]
then
printf "\n +++ I/O-attribute in configuration file $config_file has the invalid"
printf "\n value \"$s2\". Only \"in\" and \"out\" are allowed!"
locat=connect; exit
fi
IFS="$IFSALT"
fi
done < $fileconnection_file
# VALUES OF PALMRUN-OPTIONS OVERWRITE THOSE FROM THE CONFIGURATION-FILE
[[ $palmrun_memory != "" ]] && memory=$palmrun_memory
[[ $palmrun_cpumax != "" ]] && cpumax=$palmrun_cpumax
[[ "$palmrun_cores" != "" ]] && cores=$palmrun_cores
[[ "$max_par_io_str" != "" ]] && maximum_parallel_io_streams=$max_par_io_str
[[ "$palmrun_tasks_per_node" != "" ]] && tasks_per_node=$palmrun_tasks_per_node
# EVALUATE MODEL COUPLING FEATURES (OPTION -Y)
if [[ $run_coupled_model = true ]]
then
cores_atmos=`echo $coupled_dist | cut -d" " -s -f1`
cores_ocean=`echo $coupled_dist | cut -d" " -s -f2`
if (( $cores_ocean + $cores_atmos != $cores ))
then
printf "\n +++ number of processors does not fit to specification by \"-Y\"."
printf "\n PEs (total) : $cores"
printf "\n PEs (atmosphere): $cores_atmos"
printf "\n PEs (ocean) : $cores_ocean"
locat=coupling; exit
fi
fi
# IF I AM IN BATCH MODE, CHECK IF EXECUTABLE AND OTHER REQUIRED FILES
# HAVE BEEN GENERATED BY PALMBUILD AND STORED IN THE SOURCES_FOR_RUN_...
# FOLDER
if [[ $running_in_batch_mode = true ]]
then
if [[ ! -d ${fast_io_catalog}/${sources_for_run_catalog} ]]
then
printf "\n +++ directory ${fast_io_catalog}/${sources_for_run_catalog} is missing"
printf "\n Please check the output of the palmrun-call"
printf "\n that you did on your local host."
locat=SOURCES_FOR_RUN; exit
fi
else
# CREATE THE SOURCES_FOR_RUN_... FOLDER, BUT NOT IF I AM PART OF AN
# AUTOMATIC RESTART RUN
# AUTOMATIC RESTART RUNS JUST ACCESS THE DIRECTORY CREATED BY THE INITIAL RUN
if [[ $restart_run = false ]]
then
# COLLECT FILES TO BE COMPILED IN THE SOURCES_FOR_RUN_... FOLDER ON
# THE LOCAL HOST
if [[ ! -d $source_path ]]
then
printf "\n\n +++ source path \"$source_path\" on local host"
printf "\n \"$(hostname)\" does not exist"
locat=source_path; exit
fi
rm -rf $sources_for_run_catalog
mkdir -p $sources_for_run_catalog
if [[ "$source_list" = LM ]]
then
# DETERMINE MODIFIED FILES OF THE SVN WORKING COPY
source_list=""
cd $source_path
# CHECK, IF TRUNK-DIRECTORY IS UNDER SVN CONTROL
if [[ ! -d ../.svn ]]
then
printf "\n\n +++ source directory"
printf "\n \"$source_path\" "
printf "\n is not under control of \"subversion\"."
printf "\n Please do not use palmrun-option \"-s LM\"\n"
fi
# LIST ALL MODIFIED SOURCE CODE FILES
Filenames=""
svn status > tmp_svnstatus
while read line
do
firstc=`echo $line | cut -c1`
if [[ $firstc = M || $firstc = "?" ]]
then
Name=`echo "$line" | cut -c8-`
extension=`echo $Name | cut -d. -f2`
if [[ "$extension" = f90 || "$extension" = F90 || "$extension" = f || "$extension" = F || "$extension" = c ]]
then
Filenames="$Filenames "$Name
fi
fi
done < tmp_svnstatus
rm -rf tmp_svnstatus
# COPY FILES TO SOURCES_FOR_RUN_...
for filename in $Filenames
do
cp $filename ${working_directory}/${sources_for_run_catalog}
source_list=$source_list"$filename "
done
cd - > /dev/null
# COPY FILES GIVEN BY OPTION -s TO DIRECTORY SOURCES_FOR_RUN_...
elif [[ "$source_list" != "" ]]
then
cd $source_path
for filename in $source_list
do
# SOURCE CODE FILE IS NOT ALLOWED TO INCLUDE PATH
if [[ $(echo $filename | grep -c "/") != 0 ]]
then
printf "\n +++ source code file: $filename"
printf "\n must not contain (\"/\") "
locat=source; exit
fi
if [[ ! -f $filename ]]
then
printf "\n +++ source code file: $filename"
printf "\n does not exist"
locat=source; exit
else
cp $filename ${working_directory}/${sources_for_run_catalog}
fi
done
cd - > /dev/null
fi
# CHECK, IF MAKEFILE EXISTS AND COPY IT TO THE SOURCES_FOR_RUN... DIRECTORY
[[ "$makefile" = "" ]] && makefile=$source_path/Makefile
if [[ ! -f $makefile ]]
then
printf "\n +++ file \"$makefile\" does not exist"
locat=make; exit
else
cp $makefile ${sources_for_run_catalog}/Makefile
fi
# COPY FILES FROM OPTIONAL SOURCE PATH GIVEN IN THE CONFIGURATION FILE
if [[ "$user_source_path" != "" ]]
then
# DOES THE DIRECTORY EXIST?
if [[ ! -d $user_source_path ]]
then
printf "\n\n *** INFORMATIVE: additional source code directory"
printf "\n \"$user_source_path\" "
printf "\n does not exist or is not a directory."
printf "\n No source code will be used from this directory!\n"
user_source_path=""
if [[ $silent == false ]]
then
sleep 2
fi
else
cd $user_source_path
found=false
Names=$(ls -1 *.f90 2>&1)
[[ $(echo $Names | grep -c '*.f90') = 0 ]] && AddFilenames="$Names"
Names=$(ls -1 *.F90 2>&1)
[[ $(echo $Names | grep -c '*.F90') = 0 ]] && AddFilenames="$AddFilenames $Names"
Names=$(ls -1 *.F 2>&1)
[[ $(echo $Names | grep -c '*.F') = 0 ]] && AddFilenames="$AddFilenames $Names"
Names=$(ls -1 *.f 2>&1)
[[ $(echo $Names | grep -c '*.f') = 0 ]] && AddFilenames="$AddFilenames $Names"
Names=$(ls -1 *.c 2>&1)
[[ $(echo $Names | grep -c '*.c') = 0 ]] && AddFilenames="$AddFilenames $Names"
cd - > /dev/null
cd $sources_for_run_catalog
# COPY MAKEFILE IF EXISTING
if [[ -f $user_source_path/Makefile ]]
then
printf "\n\n *** user Makefile from directory"
printf "\n \"$user_source_path\" is used \n"
if [[ $silent == false ]]
then
sleep 1
fi
cp $user_source_path/Makefile .
fi
for filename in $AddFilenames
do
if [[ -f $filename ]]
then
printf "\n +++ source code file \"$filename\" found in additional"
printf "\n source code directory \"$user_source_path\" "
printf "\n but was also given with option \"-s\" which means that it should be taken"
printf "\n from directory \"$source_path\"."
locat=source; exit
fi
cp $user_source_path/$filename .
source_list="$source_list $filename"
# CHECK IF FILE IS CONTAINED IN MAKEFILE
if [[ $(grep -c $filename Makefile) = 0 ]]
then
printf "\n\n +++ user file \"$filename\" "
printf "\n is not listed in Makefile \n"
locat=source; exit
else
if [[ $found = false ]]
then
found=true
printf "\n\n *** following user file(s) added to the"
printf " files to be translated:\n "
fi
printf "$filename "
if [[ $silent == false ]]
then
sleep 0.5
fi
fi
done
[[ $found = true ]] && printf "\n"
cd - > /dev/null
fi
fi
# COPY CONFIGURATION FILES
cp $config_file $sources_for_run_catalog
cp $fileconnection_file $sources_for_run_catalog
# COPY SHELLSCRIPTS
cp ${source_path}/../SCRIPTS/palmrun $sources_for_run_catalog
cp ${source_path}/../SCRIPTS/batch_scp $sources_for_run_catalog
fi
fi
# GET THE GLOBAL REVISION-NUMBER OF THE SVN-REPOSITORY
# (HANDED OVER TO RESTART-RUNS USING OPTION -G)
if [[ "$global_revision" = "" ]]
then
global_revision=`svnversion $source_path 2>/dev/null`
global_revision="Rev: $global_revision"
fi
# IN CASE OF PARALLEL EXECUTION, CHECK SOME SPECIFICATIONS CONCERNING PROCESSOR NUMBERS
if [[ -n $cores ]]
then
# CHECK, IF THE NUMBER OF CORES PER NODE HAS BEEN GIVEN UND IF IT IS AN
# INTEGRAL DIVISOR OF THE TOTAL NUMBER OF CORES GIVEN BY OPTION -X
if [[ $tasks_per_node = 0 ]]
then
printf "\n"
printf "\n +++ option \"-T\" (tasks per node) is missing"
printf "\n set -T option or define tasks_per_node in the config file"
locat=tasks_per_node; (( iec = 0 )); exit
fi
if (( cores < tasks_per_node ))
then
printf "\n"
printf "\n +++ tasks per node (-T) cannot exceed total number of cores (-X)"
printf "\n given values: -T $tasks_per_node -X $cores"
locat=tasks_per_node; (( iec = 0 )); exit
fi
(( nodes = cores / ( tasks_per_node * threads_per_task ) ))
(( mpi_tasks = cores / threads_per_task ))
[[ $mpi_tasks = 0 ]] && (( mpi_tasks = 1 ))
(( ii = cores / tasks_per_node ))
(( remaining_cores = cores - ii * tasks_per_node ))
if (( remaining_cores > 0 ))
then
printf "\n"
printf "\n +++ WARNING: tasks per node (option \"-T\") is not an integral"
printf "\n divisor of the total number of cores (option \"-X\")"
printf "\n values of this palmrun-call: \"-T $tasks_per_node\" \"-X $cores\""
printf "\n One of the nodes is filled with $remaining_cores instead of $tasks_per_node tasks"
(( nodes = nodes + 1 ))
fi
# SET THE TOTAL NUMBER OF NODES, REQUIRED FOR THE SUBJOB-COMMAND (SEE FURTHER BELOW)
if [[ "$tasks_per_node" != "" ]]
then
TOPT="-T $tasks_per_node"
fi
fi
# SET DEFAULT VALUE FOR THE MAXIMUM NUMBER OF PARALLEL IO STREAMS
if [[ "$maximum_parallel_io_streams" = "" ]]
then
maximum_parallel_io_streams=$cores
fi
# SET PORT NUMBER OPTION FOR CALLS OF SSH/SCP AND batch_scp SCRIPT
if [[ "$scp_port" != "" ]]
then
PORTOPT="-P $scp_port"
SSH_PORTOPT="-p $scp_port"
fi
# DETERMINE THE SSH-OPTION IN CASE THAT AN SSH-KEY IS EXPLICITLY GIVEN IN THE
# CONFIG-FILE
if [[ "$ssh_key" != "" ]]
then
ssh_key="-i $HOME/.ssh/$ssh_key"
fi
# SET QUEUE, IF NOT GIVEN
if [[ $create_batch_job = true || $create_remote_batch_job = true ]]
then
if [[ $queue = none && "$defaultqueue" = "" ]]
then
printf "\n"
printf "\n +++ no default queue given in configuration file and no queue"
printf "\n given with option -q"
fi
if [[ $queue = none ]]
then
queue=$defaultqueue
fi
fi
# GENERATE FULL FILENAMES OF INPUT-FILES, INCLUDING THEIR PATH
# CHECK, IF INPUT-FILES EXIST, AND DETERMINE HIGHEST CYCLE NUMBER (IF CYCLES EXIST)
(( i = 0 ))
while (( i < iin ))
do
(( i = i + 1 ))
(( maxcycle = 0 ))
# GENERATE PATH AND FULL FILE NAME (then-BRANCH: FIXED FULL NAME IS GIVEN, I.E. THE
# FILE IDENTIFIER IS NOT PART OF THE FILENAME))
if [[ "${actionin[$i]}" = di ]]
then
remotepathin[$i]=${pathin[$i]}/${endin[$i]} # EVALUATE REMOTE-PATH ON THE REMOTE
# HOST ONLY
eval filename=${pathin[$i]}/${endin[$i]}
else
remotepathin[$i]=${pathin[$i]}/${fname}${endin[$i]} # EVALUATE REMOTE-PATH ON THE REMOTE
# HOST ONLY
eval filename=${pathin[$i]}/${fname}${endin[$i]}
fi
eval pathname=${pathin[$i]}
# CHECK IF FILE EXISTS
if ! ls $filename* 1>/dev/null 2>&1
then
# FILES WITH ATTRIBUTE locopt ARE OPTIONAL. NO ABORT, IF THEY DO NOT EXIST.
if [[ "${transin[$i]}" != "locopt" ]]
then
printf "\n\n +++ INPUT-file: "
if [[ "${extin[$i]}" = "" || "${extin[$i]}" = " " ]]
then
printf "\n $filename"
else
printf "\n $filename.${extin[$i]}"
fi
printf "\n does not exist\n"
locat=input; exit
else
transin[$i]="unavailable"
fi
else
# DETERMINE THE FILE'S CYCLE NUMBER
ls -1 -d $filename > filelist 2>/dev/null
ls -1 -d $filename.* >> filelist 2>/dev/null
while read line
do
# filename without path (i.e. after the last "/")
basefilename=$(basename ${line})
# check if there is an extension
extension=${basefilename##*.}
if [[ "$extension" = "${extin[$i]}" ]]
then
basefilename=${basefilename%.*}
fi
# check for an existing cycle number
cycle=${basefilename##*.}
if [[ $cycle =~ ^-?[0-9]+$ ]]
then
(( icycle = $cycle ))
else
(( icycle = 0 ))
fi
if (( icycle > maxcycle ))
then
(( maxcycle = icycle ))
fi
done 0 ))
then
if [[ "${extin[$i]}" != " " && "${extin[$i]}" != "" ]]
then
filename=${filename}.$maxcycle.${extin[$i]}
else
filename=${filename}.$maxcycle
fi
else
if [[ "${extin[$i]}" != " " && "${extin[$i]}" != "" ]]
then
filename=${filename}.${extin[$i]}
fi
fi
# STORE FILENAME WITHOUT PATH BUT WITH CYCLE NUMBER,
# IS LATER USED FOR TRANSFERRING FILES WIHIN THE JOB (SEE END OF FILE)
absnamein[$i]=$filename
if (( maxcycle > 0 ))
then
if [[ "${actionin[$i]}" = di ]]
then
frelin[$i]=${endin[$i]}.$maxcycle
else
frelin[$i]=${fname}${endin[$i]}.$maxcycle
fi
else
if [[ "${actionin[$i]}" = di ]]
then
frelin[$i]=${endin[$i]}
else
frelin[$i]=${fname}${endin[$i]}
fi
fi
fi
done
# GENERATE FULL FILENAMES OF OUTPUT-FILES (WITHOUT $ OR ~),
# CHECK, IF OUTPUT-FILES EXIST, AND DETERMINE HIGHEST CYCLE NUMBER (IF CYCLES EXIST),
# OR, IN CASE THAT FILE DOES NOT EXIST, CHECK, IF IT CAN BE CREATED
# THESE ACTIONS ARE NOT CARRIED OUT, IF FILES SHALL BE TRASFERRED FROM THE REMOTE TO
# THE LOCAL HOST (BECAUSE THEIR IS NO DIRECT ACCESS TO THE LOCAL DIRECTORIES FROM THE
# REMOTE HOST)
(( i = 0 ))
while (( i < iout ))
do
(( i = i + 1 ))
if [[ ! ( $running_on_remote = true && ( "${actionout[$i]}" = tr || "${actionout[$i]}" = tra || "${actionout[$i]}" = trpe ) ) ]]
then
if [[ "${actionout[$i]}" = tr ]]
then
actionout[$i]=""
elif [[ "${actionout[$i]}" = trpe ]]
then
actionout[$i]=pe
elif [[ "${actionout[$i]}" = tra ]]
then
actionout[$i]=a
fi
(( maxcycle = 0 ))
eval filename=${pathout[$i]}/${fname}${endout[$i]}
eval catalogname=${pathout[$i]}
if ! ls $filename* 1>/dev/null 2>&1
then
# IF OUTPUT-FILE DOES NOT EXIST CHECK, IF IT CAN BE CREATED
if cat /dev/null > $filename
then
rm $filename
else
# CHECK, IF THE DIRECTORY WHERE FILE SHALL BE COPIED TO EXISTS
# IF IT DOES NOT EXIST, TRY TO CREATE IT
if [[ ! -d $catalogname ]]
then
if mkdir -p $catalogname
then
printf "\n\n *** directory:"
printf "\n $catalogname"
printf "\n was created\n"
else
printf "\n\n +++ OUTPUT-file:"
printf "\n $filename"
printf "\n cannot be created, because directory does not exist"
printf "\n and cannot be created either"
printf "\n"
locat=output ; exit
fi 2>/dev/null
else
printf "\n\n +++ OUTPUT-file:"
printf "\n $filename"
printf "\n cannot be created, although directory exists"
printf "\n"
locat=output ; exit
fi
fi 2>/dev/null
else
# DETERMINE THE CYCLE NUMBER
ls -1 -d $filename > filelist 2>/dev/null
ls -1 -d $filename.* >> filelist 2>/dev/null
while read line
do
# filename without path (i.e. after the last "/")
basefilename=$(basename ${line})
# check if there is an extension
extension=${basefilename##*.}
if [[ "$extension" = "${extout[$i]}" ]]
then
basefilename=${basefilename%.*}
fi
# check for an existing cycle number
cycle=${basefilename##*.}
if [[ $cycle =~ ^-?[0-9]+$ ]]
then
(( icycle = $cycle + 1 ))
else
(( icycle = 1 ))
fi
if (( icycle > maxcycle ))
then
(( maxcycle = icycle ))
fi
done 0 ))
then
filename_tmp=${filename}.$maxcycle
if cat /dev/null > $filename_tmp
then
rm $filename_tmp
else
printf "\n +++ OUTPUT-file:"
printf "\n $filename_tmp"
printf "\n cannot be created"
locat=output ; exit
fi
fi
else
(( maxcycle = maxcycle - 1 ))
fi
(( cycnum[$i] = maxcycle ))
pathout[$i]=$filename
fi
done
# DETERMINE THE NAME OF PALMRUN'S TEMPORARY WORKING DIRECTORY
if [[ $running_in_batch_mode = false ]]
then
run_id=$RANDOM
job_id=${fname}.$run_id
TEMPDIR=$fast_io_catalog/$job_id
fi
# CHECK SETTINGS REQUIRED FOR BATCH JOBS
if [[ $create_batch_job = true || $create_remote_batch_job = true ]]
then
# CHECK, IF JOB DIRECTIVES HAVE BEEN GIVEN IN CONFIGURATION FILE
if [[ $ibd = 0 ]]
then
printf "\n"
printf "\n +++ no batch directives found in configuration file"
locat=config_file_batch_directives; (( iec = 0 )); exit
fi
# CHECK IF CPUTIME IS GIVEN FOR JOB
done=false
cputime=$cpumax
while [[ $done = false ]]
do
if (( cputime == 0 ))
then
printf "\n +++ cpu-time is undefined"
printf "\n >>> Please type CPU-time in seconds as INTEGER:"
printf "\n >>> "
read cputime 1>/dev/null 2>&1
else
done=true
fi
done
cpumax=$cputime
# CHECK THE MEMORY DEMAND
done=false
while [[ $done = false ]]
do
if (( memory == 0 ))
then
printf "\n +++ memory demand is undefined"
printf "\n >>> Please type memory in MByte per process as INTEGER:"
printf "\n >>> "
read memory 1>/dev/null 2>&1
else
done=true
fi
done
# IN CASE OF REMOTE-JOBS CHECK, IF A USERNAME FOR THE REMOTE HOST IS GIVEN
if [[ $create_remote_batch_job = true && -z $remote_username ]]
then
while [[ -z $remote_username ]]
do
printf "\n +++ username on remote host with IP \"$remote_ip\" is undefined"
printf "\n >>> Please type username:"
printf "\n >>> "
read remote_username
done
fi
else
if [[ $running_in_batch_mode = false ]]
then
cputime=10000000 # NO LIMT FOR INTERACTIVE RUNS
cpumax=$cputime
else
cputime=$cpumax
fi
fi
# CALCULATE HOURS/MINUTES/SECONDS, E.G. FOR BATCH-DIRECTIVES
(( cpu_hours = cputime / 3600 ))
(( resttime = cputime - cpu_hours * 3600 ))
(( cpu_minutes = resttime / 60 ))
(( cpu_seconds = resttime - cpu_minutes * 60 ))
timestring=${cpu_hours}:${cpu_minutes}:${cpu_seconds}
# OUTPUT OF THE PALMRUN-HEADER
calltime=$(date)
printf "\n"
printf "#------------------------------------------------------------------------# \n"
printf "| %-35s%35s | \n" "$version" "$calltime"
printf "| | \n"
column1="called on:"; column2=$(hostname)
printf "| %-25s%-45s | \n" "$column1" "$column2"
if [[ $create_remote_batch_job = true ]]
then
column1="execution on:"; column2="$host_configuration (username: $remote_username)"
else
if [[ $running_on_remote = true ]]
then
column1="execution on:"; column2="$host_configuration (IP:$remote_ip)"
else
column1="execution on:"; column2="$host_configuration (IP:$local_ip)"
fi
fi
printf "| %-25s%-45s | \n" "$column1" "$column2"
column1="running in:"
if [[ $running_in_batch_mode = true ]]
then
column2="batch job mode"
else
if [[ $create_batch_job = true || $create_remote_batch_job = true ]]
then
column2="job creation mode"
else
column2="interactive run mode"
fi
fi
printf "| %-25s%-45s | \n" "$column1" "$column2"
if [[ $running_in_batch_mode = true || $create_batch_job = true || $create_remote_batch_job = true ]]
then
if [[ "$project_account" != "" ]]
then
column1="project account number:"
column2="$project_account"
printf "| %-25s%-45s | \n" "$column1" "$column2"
fi
fi
if [[ -n $cores ]]
then
if [[ $run_coupled_model = false ]]
then
column1="number of cores:"; column2=$cores
else
column1="number of cores:"; column2="$cores (atmosphere: $cores_atmos, ocean: $cores_ocean)"
fi
printf "| %-25s%-45s | \n" "$column1" "$column2"
fi
if [[ -n $tasks_per_node ]]
then
column1="tasks per node:"; column2="$tasks_per_node (number of nodes: $nodes)"
printf "| %-25s%-45s | \n" "$column1" "$column2"
if (( remaining_cores > 0 ))
then
column1=" "; column2="one of the nodes only filled with $remaining_cores tasks"
printf "| %-25s%-45s | \n" "$column1" "$column2"
fi
fi
if [[ $maximum_parallel_io_streams != $cores ]]
then
column1="max par io streams:"; column2="$maximum_parallel_io_streams"
printf "| %-25s%-45s | \n" "$column1" "$column2"
fi
if [[ $use_openmp = true ]]
then
column1="threads per task:"; column2="$threads_per_task"
printf "| %-25s%-45s | \n" "$column1" "$column2"
fi
if [[ $create_batch_job = true || $create_remote_batch_job = true || $running_in_batch_mode = true ]]
then
column1="memory demand / PE":; column2="$memory MB"
printf "| %-25s%-45s | \n" "$column1" "$column2"
column1="job cpu time (h:m:s):"; column2="$timestring"
printf "| %-25s%-45s | \n" "$column1" "$column2"
fi
printf "| | \n"
if [[ "$source_list" != "" ]]
then
if [[ "$make_options" != "" ]]
then
column1="make options:"; column2=$(echo "$make_options" | cut -c-45)
printf "| %-25s%-45s | \n" "$column1" "$column2"
line=$(echo "$make_options" | cut -c46-)
while [[ "$line" != "" ]]
do
column1=""
column2=$(echo "$line" | cut -c-45)
printf "| %-25s%-45s | \n" "$column1" "$column2"
line=$(echo "$line" | cut -c46-)
done
fi
fi
column1="cpp directives:"; column2=$(echo "$cpp_options" | cut -c-45)
printf "| %-25s%-45s | \n" "$column1" "$column2"
line=$(echo "$cpp_options" | cut -c46-)
while [[ "$line" != "" ]]
do
column1=""
column2=$(echo "$line" | cut -c-45)
printf "| %-25s%-45s | \n" "$column1" "$column2"
line=$(echo "$line" | cut -c46-)
done
column1="compiler options:"; column2=$(echo "$compiler_options" | cut -c-45)
printf "| %-25s%-45s | \n" "$column1" "$column2"
line=$(echo "$compiler_options" | cut -c46-)
while [[ "$line" != "" ]]
do
column1=""
column2=$(echo "$line" | cut -c-45)
printf "| %-25s%-45s | \n" "$column1" "$column2"
line=$(echo "$line" | cut -c46-)
done
column1="linker options:"; column2=$(echo "$linker_options" | cut -c-45)
printf "| %-25s%-45s | \n" "$column1" "$column2"
line=$(echo "$linker_options" | cut -c46-)
while [[ "$line" != "" ]]
do
column1=""
column2=$(echo "$line" | cut -c-45)
printf "| %-25s%-45s | \n" "$column1" "$column2"
line=$(echo "$line" | cut -c46-)
done
if [[ "$login_init_cmd" != "" ]]
then
column1="login init commands:"; column2=$(echo "$login_init_cmd" | cut -c-45)
printf "| %-25s%-45s | \n" "$column1" "$column2"
line=$(echo "$login_init_cmd" | cut -c46-)
while [[ "$line" != "" ]]
do
column1=""
column2=$(echo "$line" | cut -c-45)
printf "| %-25s%-45s | \n" "$column1" "$column2"
line=$(echo "$line" | cut -c46-)
done
fi
if [[ "$module_commands" != "" ]]
then
column1="module commands:"; column2=$(echo "$module_commands" | cut -c-45)
printf "| %-25s%-45s | \n" "$column1" "$column2"
line=$(echo "$module_commands" | cut -c46-)
while [[ "$line" != "" ]]
do
column1=""
column2=$(echo "$line" | cut -c-45)
printf "| %-25s%-45s | \n" "$column1" "$column2"
line=$(echo "$line" | cut -c46-)
done
fi
printf "| | \n"
column1="base name of files:"; column2=$fname
printf "| %-25s%-45s | \n" "$column1" "$column2"
column1="activation string list:"; column2=$(echo $activation_string_list)
printf "| %-25s%-45s | \n" "$column1" "$column2"
if [[ "$ocean_file_appendix" = true ]]
then
printf "| %-35s%-35s | \n" "suffix \"_O\" is added to local files" " "
fi
if [[ "$source_list" != "" ]]
then
printf "| | \n"
printf "| Files to be compiled: | \n"
line=$source_list
while [[ "$line" != "" ]]
do
linestart=$(echo $line | cut -c-70)
printf "| %-70s | \n" "$linestart"
line=$(echo "$line" | cut -c71-)
done
fi
printf "#------------------------------------------------------------------------#"
# OUTPUT OF FILE CONNECTIONS IN CASE OF TRACEBACK
if [[ $do_trace = true ]]
then
(( i = 0 ))
while (( i < iin ))
do
(( i = i + 1 ))
if (( i == 1 ))
then
printf "\n\n >>> INPUT-file assignments:\n"
fi
printf "\n ${localin[$i]} : ${absnamein[$i]}"
done
(( i = 0 ))
while (( i < iout ))
do
(( i = i + 1 ))
if (( i == 1 ))
then
printf "\n\n >>> OUTPUT-file assignments:\n"
fi
printf "\n ${localout[$i]} : ${pathout[$i]}"
done
(( i = 0 ))
while (( i < iic ))
do
(( i = i + 1 ))
if (( i == 1 ))
then
printf "\n\n >>> INPUT-commands:\n"
fi
printf "\n ${in_command[$i]}"
done
(( i = 0 ))
while (( i < ioc ))
do
(( i = i + 1 ))
if (( i == 1 ))
then
printf "\n\n >>> OUTPUT-commands:\n"
fi
printf "\n ${out_command[$i]}"
done
fi
# QUERY FOR CONTINUE
if [[ $silent = false && $running_in_batch_mode = false ]]
then
antwort=dummy
printf "\n\n"
printf " >>> everything o.k. (y/n) ? "
while read antwort
do
if [[ "$antwort" != y && "$antwort" != Y && "$antwort" != n && "$antwort" != N ]]
then
printf " >>> everything o.k. (y/n) ? "
else
break
fi
done
if [[ $antwort = n || $antwort = N ]]
then
locat=user_abort; (( iec = 0 )); exit
fi
if [[ $create_batch_job = true || $create_remote_batch_job = true ]]
then
printf " *** batch-job will be created and submitted"
else
printf " *** PALMRUN will now continue to execute on this machine"
fi
fi
# PROVIDE FILES TO EXECUTE PALM AND CREATE THE EXECUTABLE
if [[ $restart_run = false && $running_in_batch_mode = false ]]
then
if [[ $create_batch_job = true || $create_remote_batch_job = true ]]
then
printf "\n\n *** creating executable and other sources for the remote host"
else
printf "\n\n *** creating executable and other sources for the local host"
fi
# FIRST CHECK, IF A MAKE DEPOSITORY EXISTS, AND IF NOT, ASK THE USER IF
# IT SHALL BE CREATED
ask_for_make_depository=false
if [[ $create_remote_batch_job = true ]]
then
line=`grep %base_directory $config_file`
make_depository=`echo $line | cut -d" " -s -f2`/MAKE_DEPOSITORY_${host_configuration}
echo "[[ ! -d ${make_depository} ]] && echo depository not found" | ssh -q $ssh_key ${remote_username}@${remote_ip} 2>&1 | tee ${host_configuration}_last_make_protokoll
if [[ $(grep -c "depository not found" ${host_configuration}_last_make_protokoll) != 0 ]]
then
printf "\n\n +++ make depository \"${make_depository}\""
printf "\n on remote host not found!"
ask_for_make_depository=true
fi
rm ${host_configuration}_last_make_protokoll
else
# CHECK FOR MAKE_DEPOSITORY ON THE LOCAL HOST
make_depository=${base_directory}/MAKE_DEPOSITORY_${host_configuration}
if [[ ! -d ${make_depository} ]]
then
printf "\n\n +++ make depository \"${make_depository}\""
printf "\n on local host not found!"
ask_for_make_depository=true
fi
fi
if [[ $ask_for_make_depository = true ]]
then
antwort=dummy
printf "\n\n"
printf " >>> Create a new one (y/n) ? "
while read antwort
do
if [[ "$antwort" != y && "$antwort" != Y && "$antwort" != n && "$antwort" != N ]]
then
printf " >>> Create a new one (y/n) ? "
else
break
fi
done
if [[ $antwort = n || $antwort = N ]]
then
locat=user_abort; (( iec = 0 )); exit
fi
palmbuild -v -h $host_configuration
if [[ $? != 0 ]]
then
# ABORT IN CASE OF COMPILATION PROBLEMS
printf "\n +++ error while compiling for the MAKE_DEPOSITORY"
locat=make_depository
exit
else
echo " *** now continue with creating executable and other sources"
fi
fi
palmbuild -v $use_existing_sources_folder -h $host_configuration -d $fname
if [[ $? != 0 ]]
then
# ABORT IN CASE OF COMPILATION PROBLEMS
printf "\n +++ error while creating executable and/or other sources"
locat=execution
rm -rf $sources_for_run_catalog
exit
else
printf "\n$dashes\n *** executable and other sources created\n"
rm -rf $sources_for_run_catalog
fi
fi
# NOW PERFORM THOSE ACTIONS REQUIRED TO EXECUTE THE PROGRAM (PALM) ON THIS MACHINE
# (COMPILING/LINKING, EXECUTING, COPYING I/O FILES)
if [[ $create_batch_job = false && $create_remote_batch_job = false ]]
then
# CHANGE TO THE TEMPORARY WORKING DIRECTORY
if [[ $running_in_batch_mode = false ]]
then
# CREATE THE DIRECTORY AND COPY FILES FROM SOURCES_FOR_RUN_... TO THAT
# FOLDER
mkdir -p $TEMPDIR
chmod go+rx $TEMPDIR
tmpcreate=true
cd $TEMPDIR
cp ${fast_io_catalog}/${sources_for_run_catalog}/{*,.[!.]*} $TEMPDIR
printf "\n *** changed to temporary directory: $TEMPDIR"
else
# IN BATCH MODE PALMRUN IS CALLED FROM TEMPDIR
printf "\n *** running in temporary directory: $TEMPDIR"
fi
# PROVIDE THE INPUT FILES
# LOOP OVER ALL ACTIVATED FILES (LISTED IN THE CONFIGURATION FILE)
(( i = 0 ))
while (( i < iin ))
do
(( i = i + 1 ))
if (( i == 1 ))
then
printf "\n\n *** providing INPUT-files:\n$dashes"
fi
# SKIP OPTIONAL FILES, IF THEY DO NOT EXIST
if [[ "${transin[$i]}" = unavailable ]]
then
if [[ "${extin[$i]}" = "" || "${extin[$i]}" = " " ]]
then
printf "\n *** INFORMATIVE: input file \"${pathin[$i]}/${fname}${endin[$i]}\" "
printf "\n is not available!"
else
printf "\n *** INFORMATIVE: input file \"${pathin[$i]}/${fname}${endin[$i]}.${extin[$i]}\" "
printf "\n is not available!"
fi
continue
fi
# CHECK FOR SINGLE FILE (SERIAL RUN) OR DIRECTORY (ONE FILE PER CORE FOR PARELLEL EXECUTION)
files_for_cores=false; filetype=file
if [[ "${actionin[$i]}" = pe && -n $cores ]]
then
files_for_cores=true; filetype=files
actionin[$i]=""
elif [[ "${actionin[$i]}" = pe && ! -n $cores ]]
then
actionin[$i]=""
elif [[ "${actionin[$i]}" = lnpe && -n $cores ]]
then
files_for_cores=true; filetype=files
actionin[$i]="ln"
elif [[ "${actionin[$i]}" = lnpe && ! -n $cores ]]
then
actionin[$i]="ln"
fi
if [[ $files_for_cores = true ]]
then
printf "\n >>> INPUT: ${absnamein[$i]}/.... to ${localin[$i]}"
else
printf "\n >>> INPUT: ${absnamein[$i]} to ${localin[$i]}"
fi
# INPUT-FILES TO BE LINKED
if [[ "${actionin[$i]}" = ln ]]
then
printf "\n $filetype will be linked"
if [[ $files_for_cores = false ]]
then
if [[ -f "${absnamein[$i]}" ]]
then
ln ${absnamein[$i]} ${localin[$i]}
got_tmp[$i]=true
fi
else
if [[ -d "${absnamein[$i]}" ]]
then
mkdir -p ${localin[$i]}
cd ${absnamein[$i]}
for file in $(ls *)
do
ln $file $TEMPDIR/${localin[$i]}
done >|/dev/null 2>&1
cd $TEMPDIR
fi
# IF "ln -f" HAS FAILED DO A NORMAL COPY "cp -r"
if [[ ! -f "${localin[$i]}/_000000" ]]
then
printf "\n --- WARNING: ln failed, using cp instead (might be time consuming...)"
cp -r ${absnamein[$i]}/* ${localin[$i]}
fi
got_tmp[$i]=true
fi
fi
# FILE IS STORED IN THE RESPECTIVE DIRECTORY GIVEN IN THE CONFIGURATION FILE
if [[ "${actionin[$i]}" = "" || "${actionin[$i]}" = "di" || "${actionin[$i]}" = "npe" ]]
then
if [[ "${actionin[$i]}" = "npe" && -n $cores ]]
then
# FILE COPIES ARE PROVIDED FOR ALL CORES
# EACH FILE GETS A UNIQUE FILENAME WITH A FOUR DIGIT NUMBER
printf "\n file will be provided for $cores processors"
mkdir -p ${localin[$i]}
ival=$cores
(( ii = 0 ))
while (( ii <= ival-1 ))
do
if (( ii < 10 ))
then
cp ${absnamein[$i]} ${localin[$i]}/_000$ii
elif (( ii < 100 ))
then
cp ${absnamein[$i]} ${localin[$i]}/_00$ii
elif (( ii < 1000 ))
then
cp ${absnamein[$i]} ${localin[$i]}/_0$ii
else
cp ${absnamein[$i]} ${localin[$i]}/_$ii
fi
(( ii = ii + 1 ))
done
else
if [[ $files_for_cores = true ]]
then
# PROVIDE FILES FOR EACH CORE
# FIRST CREATE THE LOCAL DIRECTORY, THEN COPY FILES
# FROM THE PERMANENT DIRECTORY BY LINKING THEM TO THE LOCAL ONE
printf "\n providing $cores files for the respective cores"
mkdir -p ${localin[$i]}
if [[ $link_local_input = true ]]
then
printf " files will be linked\n"
cd ${absnamein[$i]}
for file in $(ls *)
do
ln -f $file ${localin[$i]}
done
cd $TEMPDIR
fi
# IF "ln -f" FAILED OR IF "$link_local_input = false" DO A NORMAL "cp -r"
if [[ ! -f "${localin[$i]}/_000000" ]]
then
if [[ $link_local_input = true ]]
then
printf "\n --- WARNING: ln failed, using cp instead (might be time consuming...)"
fi
cp -r ${absnamein[$i]}/* ${localin[$i]}
fi
else
# PROVIDE FILE FOR RUNS ON A SINGLE CORE
if [[ $link_local_input = true ]]
then
printf " file will be linked\n"
ln -f ${absnamein[$i]} ${localin[$i]}
fi
# If "ln -f" fails of if "$link_local_input = false" do a normal "cp"
if [[ ! -f "${localin[$i]}" ]]
then
if [[ $link_local_input = true ]]
then
printf "\n --- WARNING: ln failed, using cp instead (might be time consuming...)"
fi
cp ${absnamein[$i]} ${localin[$i]}
fi
fi
fi
fi
done
if (( i != 0 ))
then
printf "\n$dashes\n *** all INPUT-files provided \n"
fi
# EXECUTE INPUT-COMMANDS GIVEN IN THE CONFIGURATION FILE
(( i = 0 ))
while (( i < iic ))
do
(( i = i + 1 ))
if (( i == 1 ))
then
printf "\n\n *** execution of INPUT-commands:\n$dashes"
fi
printf "\n >>> ${in_command[$i]}"
eval ${in_command[$i]}
if (( i == iic ))
then
printf "\n$dashes\n"
fi
done
# CREATE THE NAMELIST-FILE WITH VALUES OF ENVIRONMENT-VARIABLES REQUIRED BY PALM
# (FILE ENVPAR WILL BE READ BY PALM)
cat > ENVPAR << EOF
&envpar run_identifier = '$fname', host = '$host_configuration',
write_binary = .${write_binary}., tasks_per_node = $tasks_per_node,
maximum_parallel_io_streams = $maximum_parallel_io_streams,
maximum_cpu_time_allowed = ${cpumax}.,
revision = '$global_revision',
batch_job = .${running_in_batch_mode}. /
EOF
# STARTING THE EXECUTABLE
printf "\n\n *** execution starts in directory\n \"`pwd`\"\n$dashes\n"
PATH=$PATH:$TEMPDIR
# REPLACE PARAMETERS IN THE EXECUTION COMMAND WITH REAL VALUES
line=`echo "${execute_command}" | sed 's/{{MPI_TASKS}}/$mpi_tasks/g' | sed 's/{{TASKS_PER_NODE}}/$tasks_per_node/g'`
line2=`echo "${execute_command}" | sed 's/{{MPI_TASKS}}/1/g' | sed 's/{{TASKS_PER_NODE}}/1/g' | sed 's/palm/combine_plot_fields.x/g'`
eval line=\"$line\"
execute_command="$line"
# EXECUTION COMMAND FOR COMBINE_PLOT_FIELDS
eval line2=\"$line2\"
execute_command_for_combine="$line2"
# PROVIDE A HOSTFILE, IF REQUIRED
if [[ "$hostfile" != "" ]]
then
if [[ $hostfile = auto ]]
then
# CREATE A NEW HOSTFILE
(( ii = 1 ))
while (( ii <= cores / threads_per_task ))
do
echo $(hostname) >> hostfile
(( ii = ii + 1 ))
done
if (( cores / threads_per_task == 0 ))
then
echo $(hostname) >> hostfile
fi
else
cp $hostfile hostfile
fi
eval line=\"`head -n $ii hostfile`\"
printf "\n *** running on: $line"
fi
# SET THE NUMBER OF OPENMP-THREADS
if [[ $use_openmp = true ]]
then
export OMP_NUM_THREADS=$threads_per_task
printf "\n *** number of OpenMP threads per MPI-task: $OMP_NUM_THREADS"
else
export OMP_NUM_THREADS=1
fi
# PROVIDE DATA FOR
if [[ $run_coupled_model = false ]]
then
if [[ "$ocean_file_appendix" = true ]]
then
echo "precursor_ocean" > coupling_steering
else
echo "precursor_atmos" > coupling_steering
fi
else
(( iia = $cores_atmos / $threads_per_task ))
(( iio = $cores_ocean / $threads_per_task ))
printf "\n coupled run ($iia atmosphere, $iio ocean)"
printf "\n\n"
echo "coupled_run $iia $iio" > coupling_steering
fi
printf "\n *** execute command:"
printf "\n \"$execute_command\" \n\n"
$execute_command < coupling_steering
if [[ $? != 0 ]]
then
# ABORT IN CASE OF RUNTIME ERRORS
printf "\n +++ runtime error occured"
locat=execution
exit
else
printf "\n$dashes\n *** execution finished \n"
fi
# CALL OF combine_plot_fields IN ORDER TO MERGE SINGLE FILES WRITTEN
# BY EACH CORE INTO ONE FILE
if [[ ! -f combine_plot_fields.x ]]
then
printf "\n\n\n +++ WARNING: no combine_plot_fields found"
printf "\n 2d- and/or 3d-data may be incomplete!"
printf "\n Your previous palmbuild may have failed. Please check.\n"
elif [[ "$combine_plot_fields" == true ]]
then
printf "\n\n\n *** post-processing: now executing \"$execute_command_for_combine\" ..."
$execute_command_for_combine
else
# TEMPORARY SOLUTION TO SKIP combine_plot_fields. THIS IS REQUIRED IN CASE OF HUGE AMOUNT OF
# DATA OUTPUT. TO DO: EXTEND THIS BRANCH BY CREATING A BATCH JOB for combine_plot_fields.
# ??? koennen wir das streichen ???
printf "\n\n\n *** post-processing: skipping combine_plot_fields (-Z option set) ..."
fi
# EXECUTE OUTPUT-COMMANDS GIVEN IN THE CONFIGURATION FILE
(( i = 0 ))
while (( i < ioc ))
do
(( i = i + 1 ))
if (( i == 1 ))
then
printf "\n\n *** execution of OUTPUT-commands:\n$dashes"
fi
printf "\n >>> ${out_command[$i]}"
eval ${out_command[$i]}
if (( i == ioc ))
then
printf "\n$dashes\n"
fi
done
# IN TRACE-MODE PRINT CONTENTS OF THE CURRENT (TEMPORARY) WORKING DIRECTORY
if [[ $do_trace = true ]]
then
printf "\n\n"
ls -al
fi
# COPY LOCAL OUTPUT-FILES TO THEIR PERMANENT DESTINATIONS
(( i = 0 ))
while (( i < iout ))
do
(( i = i + 1 ))
if (( i == 1 ))
then
printf "\n\n *** saving OUTPUT-files:"
# GET RUN NUMBER ASSIGNED BY PALM
if [[ -f RUN_NUMBER ]]
then
read run_number < RUN_NUMBER
printf "\n *** PALM generated run_number = "$run_number" will be used as unified cycle number for all output files"
usecycle_option="-U $run_number"
else
run_number=0
usecycle_option=""
fi
if [[ $running_on_remote = true && "$remote_loginnode" != "" ]]
then
printf "\n *** in case of SCP transfers to local host"
printf "\n they will be done via remote login-node \"$remote_loginnode\" "
fi
printf "\n$dashes"
fi
# ADD CYCLE NUMBER TO FILENAME
if [[ ! ( $running_on_remote = true && ( "${actionout[$i]}" = tr || "${actionout[$i]}" = tra || "${actionout[$i]}" = trpe ) ) ]]
then
# IN APPEND MODE, FILES KEEP THEIR CURRENT CYCLE NUMBER
if [[ "${actionout[$i]}" != "a" ]]
then
# SET RUN NUMBER AS CYCLE NUMBER, IF THERE IS NOT A CONFLICT
# WITH AN EXISTING CYCLE NUMBER
if (( run_number >= cycnum[$i] ))
then
(( cycnum[$i] = run_number ))
else
if (( run_number > 0 ))
then
printf "\n --- INFORMATIVE: The following file cannot get a unified cycle number"
fi
fi
fi
if (( cycnum[$i] > 0 ))
then
pathout[$i]=${pathout[$i]}.${cycnum[$i]}
fi
fi
# CHECK FOR SINGLE FILE (SERIAL RUN) OR DIRECTORY (ONE FILE PER CORE FOR PARELLEL EXECUTION)
files_for_cores=false; filetype=file
link_local_output=false
if [[ "${actionout[$i]}" = pe && -n $cores ]]
then
files_for_cores=true; filetype=directory
actionout[$i]=""
elif [[ "${actionout[$i]}" = pe && ! -n $cores ]]
then
actionout[$i]=""
elif [[ "${actionout[$i]}" = lnpe && -n $cores ]]
then
files_for_cores=true; filetype=directory
link_local_output=true
actionout[$i]=""
elif [[ "${actionout[$i]}" = lnpe && ! -n $cores ]]
then
link_local_output
actionout[$i]=""
elif [[ "${actionout[$i]}" = trpe && -n $cores ]]
then
files_for_cores=true; filetype=directory
actionout[$i]="tr"
elif [[ "${actionout[$i]}" = trpe && ! -n $cores ]]
then
actionout[$i]="tr"
fi
if [[ ! -f ${localout[$i]} && $files_for_cores = false ]]
then
printf "\n +++ temporary OUTPUT-file ${localout[$i]} does not exist\n"
elif [[ ! -d ${localout[$i]} && $files_for_cores = true ]]
then
printf "\n +++ temporary OUTPUT-file ${localout[$i]}/.... does not exist\n"
else
# COPY VIA SCP TO LOCAL HOST (ALWAYS IN BINARY MODE USING batch_scp option -m)
# IF TARGET DIRECTORY DOES NOT EXISTS, TRY TO CREATE IT
if [[ "${actionout[$i]}" = tr || "${actionout[$i]}" = tra ]]
then
if [[ $running_on_remote = true ]]
then
# SET OPTIONS FOR TRANSFER
if [[ "${actionout[$i]}" = tr ]]
then
if [[ $files_for_cores = false ]]
then
catalog_option=""
catalog_string=""
else
catalog_option="-c"
catalog_string="/"
fi
append_option=""
append_string=""
else
append_option="-A"
append_string="append"
fi
transfer_failed=false
printf "\n >>> OUTPUT: ${localout[$i]}$catalog_string $append_string by SCP to"
printf "\n ${pathout[$i]}/${host_configuration}_${fname}${endout[$i]}$catalog_string\n"
# TRANSFER VIA SCP
if [[ "$remote_loginnode" != "" ]]
then
echo "cd $TEMPDIR; ${fast_io_catalog}/${sources_for_run_catalog}/batch_scp $PORTOPT $catalog_option $append_option -b -m $usecycle_option -u $local_username $return_address ${localout[$i]} \"${pathout[$i]}\" ${host_configuration}_${fname}${endout[$i]} ${extout[$i]}" | ssh -q $remote_username@$remote_loginnode
else
batch_scp $PORTOPT $catalog_option $append_option -b -m $usecycle_option -u $local_username $return_address ${localout[$i]} "${pathout[$i]}" ${host_configuration}_${fname}${endout[$i]} ${extout[$i]}
fi
[[ $? != 0 ]] && transfer_failed=true
# IF TRANSFER FAILED, CREATE BACKUP COPY ON THIS MACHINE
if [[ $transfer_failed = true ]]
then
printf " +++ transfer failed. Trying to save a copy on this host under:\n"
printf " ${pathout[$i]}/${host_configuration}_${fname}${endout[$i]}_$run_id\n"
# FIRST CHECK, IF DIRECTORY EXISTS, AND CREATE IT, IF NECESSARY
eval local_catalog=${pathout[$i]}
if [[ ! -d $local_catalog ]]
then
printf " *** local directory does not exist. Trying to create:\n"
printf " $local_catalog \n"
mkdir -p $local_catalog
fi
eval cp ${localout[$i]} ${pathout[$i]}/${host_configuration}_${fname}${endout[$i]}_$run_id
transfer_problems=true
fi
else
# UNSET actionout. DUE TO THIS SETTING, FILE WILL LATER JUST
# BE COPIED OR APPENDED ON THIS MACHINE
if [[ "${actionout[$i]}" = tr ]]
then
actionout[$i]=""
else
actionout[$i]="a"
fi
fi
fi
# APPEND ON THIS MACHINE
if [[ "${actionout[$i]}" = "a" ]]
then
if [[ "${extout[$i]}" != " " && "${extout[$i]}" != "" ]]
then
printf "\n >>> OUTPUT: ${localout[$i]} append to"
printf "\n ${pathout[$i]}.${extout[$i]}\n"
cat ${localout[$i]} >> ${pathout[$i]}.${extout[$i]}
else
printf "\n >>> OUTPUT: ${localout[$i]} append to"
printf "\n ${pathout[$i]}\n"
cat ${localout[$i]} >> ${pathout[$i]}
fi
fi
# COPY ON THIS MACHINE
# COPY HAS TO BE USED, BECAUSE MOVE DOES NOT WORK IF FILE-ORIGIN AND TARGET ARE
# ON DIFFERENT FILE-SYSTEMS
if [[ "${actionout[$i]}" = "" && $files_for_cores = false ]]
then
# COPY IN CASE OF RUNS ON SINGLE CORES
if [[ "${extout[$i]}" != " " && "${extout[$i]}" != "" ]]
then
printf "\n >>> OUTPUT: ${localout[$i]} to"
printf "\n ${pathout[$i]}.${extout[$i]}\n"
if [[ $link_local_output = true ]]
then
printf " file will be linked\n"
ln -f ${localout[$i]} ${pathout[$i]}.${extout[$i]}
fi
# If "ln -f" fails of if "$link_local_output = false" do a normal "cp"
if [[ ! -f "${pathout[$i]}.${extout[$i]}" ]]
then
if [[ $link_local_output = true ]]
then
printf " --- WARNING: ln failed, using cp instead (might be time consuming...)\n"
fi
cp ${localout[$i]} ${pathout[$i]}.${extout[$i]}
else
printf "+++ no copy because file ${pathout[$i]}.${extout[$i]} exists\n"
fi
else
printf "\n >>> OUTPUT: ${localout[$i]} to"
printf "\n ${pathout[$i]}\n"
if [[ $link_local_output = true ]]
then
printf " file will be linked\n"
ln -f ${localout[$i]} ${pathout[$i]}
fi
# If "ln -f" fails of if "$link_local_output = false" do a normal "cp"
if [[ ! -f "${pathout[$i]}" ]]
then
if [[ $link_local_output = true ]]
then
printf " --- WARNING: ln failed, using cp instead (might be time consuming...)\n"
fi
cp ${localout[$i]} ${pathout[$i]}
else
printf "+++ no copy because file ${pathout[$i]} exists\n"
fi
fi
elif [[ "${actionout[$i]}" = "" && $files_for_cores = true ]]
then
# FILES FROM THE DIFFERENT CORES ARE MOVED WITH ln-COMMAND TO THE PERMANENT DIRECTORY
# AS A FIRST STEP, THE PERMANENT DIRECTORY IS CREATED
printf "\n >>> OUTPUT: ${localout[$i]}/_.... to"
printf "\n ${pathout[$i]}\n"
if [[ $link_local_output = true ]]
then
printf " files will be linked\n"
mkdir -p ${pathout[$i]}
cd ${localout[$i]}
for file in $(ls *)
do
ln -f $file ${pathout[$i]}
done >|/dev/null 2>&1
cd $TEMPDIR
fi
# IF "ln -f" HAS FAILED OR IF "$link_local_output = false" DO A NORMAL COPY "cp -r"
if [[ ! -f "${pathout[$i]}/_000000" ]]
then
if [[ $link_local_output = true ]]
then
printf " --- WARNING: ln failed, using cp instead (might be time consuming...)\n"
fi
cp -r ${localout[$i]}/* ${pathout[$i]}
fi
fi
fi
done
if (( i != 0 ))
then
if [[ $transfer_problems = true ]]
then
printf "\n$dashes\n *** OUTPUT-files saved"
printf "\n +++ WARNING: some data transfers failed! \n"
else
printf "\n$dashes\n *** all OUTPUT-files saved \n"
fi
fi
# IF REQUIRED, START A RESTART-JOB
# FILE CONTINUE_RUN MUST HAVE BEEN CREATED BY THE EXECUTABLE (PALM)
if [[ -f CONTINUE_RUN ]]
then
# ADD RESTART-OPTIONS TO THE PALMRUN-CALL (IF THEY ARE NOT USED ALREADY):
# -C TELLS PALMRUN THAT IT IS A RESTART-RUN
# -v SILENT MODE WITHOUT INTERACTIVE QUERIES
# -b START A BATCH JOB
[[ $(echo $prc | grep -c "\-C") = 0 ]] && prc="$prc -C"
[[ $(echo $prc | grep -c "\-v") = 0 ]] && prc="$prc -v"
[[ $(echo $prc | grep -c "\-b") = 0 ]] && prc="$prc -b"
# REPLACE THE HASH IN THE ACTIVATION STRINGS (GIVEN WITH OPTION -a)
# SO THAT RESTARTS ACCESS DIFFERENT FILES THAN THE INITIAL RUN
if [[ $(echo $prc | grep -c "#") != 0 ]]
then
prc=`echo $prc | sed 's/#/r/g'`
fi
# START THE RESTART-JOB
printf "\n\n *** initiating restart-run on \"$local_ip\" using command:\n"
echo " $prc"
printf "\n$dashes\n"
if [[ $running_on_remote = true ]]
then
echo "*** ssh will be used to initiate restart-runs!"
echo " return_address=\"$return_address\" "
echo " return_username=\"$local_username\" "
if [[ "$remote_loginnode" != "" ]]
then
echo "echo \" PATH=\\\$PATH:$LOCAL_PALMRUN_PATH; cd $LOCAL_PWD; $prc\" | ssh -q $SSH_PORTOPT $local_username@$return_address " | ssh -q $remote_username@$remote_loginnode
else
echo \" PATH=\\\$PATH:$LOCAL_PALMRUN_PATH; cd $LOCAL_PWD; $prc\" | ssh -q $SSH_PORTOPT $local_username@$return_address
fi
# WAIT TO ALLOW THE RESTART-JOB TO BE QUEUED, BEFORE THE CURRENT JOB IS FINISHED
sleep 30
else
# START THE RESTART JOB ON THE LOCAL HOST
eval $prc # THE ' MUST BE EVALUATED
cd - > /dev/null
fi
printf "\n$dashes\n *** restart-run initiated \n"
# DELETE INPUT-(RESTART)FILES, WHICH HAVE BEEN FETCHED FROM THE TEMPORARY DATA
# DIRECTORY, BACAUSE THEY ARE NOT REQUIRED BY THE RESTART-JOB.
# THIS IS DONE IN ORDER TO AVOID EXCEEDING DISC QUOTAS OR DISC SPACE (RESTART-FILES
# MAY BE VERY HUGE)
(( i = 0 ))
while (( i < iin ))
do
(( i = i + 1 ))
if [[ "${got_tmp[$i]}" = true && $keep_data_from_previous_run = false ]]
then
rm -r ${absnamein[$i]}
fi
done
fi
# ALL ACTIONS FINISHED, TEMPORARY WORKING-DIRECTORY CAN BE DELETED
cd $HOME
[[ $delete_temporary_catalog = true ]] && rm -rf $TEMPDIR
else
# PREPARING ACTIONS,
# IF A BATCH-JOB IS TO BE GENERATED AND TO BE STARTED ON A LOCAL OR REMOTE-MACHINE
# BUILD THE PALMRUN-COMMAND TO BE CALLED IN THE BATCH-JOB
palmrun_com="$palmrun_script_name -d $fname -h $host_configuration -m $memory -t $cpumax -q $queue -r $run_id -U $local_username"
[[ "$activation_string_list" != "" ]] && palmrun_com=${palmrun_com}" -a \"$activation_string_list\""
[[ "$global_revision" != "" ]] && palmrun_com=${palmrun_com}" -G \"$global_revision\""
[[ $keep_data_from_previous_run = true ]] && palmrun_com=${palmrun_com}" -k"
[[ $do_trace = true ]] && palmrun_com=${palmrun_com}" -x"
[[ "$cores" != "" ]] && palmrun_com=${palmrun_com}" -X $cores"
[[ $use_openmp = true ]] && palmrun_com=${palmrun_com}" -O $threads_per_task"
[[ $tasks_per_node != 0 ]] && palmrun_com=${palmrun_com}" -T $tasks_per_node"
[[ $delete_temporary_catalog = false ]] && palmrun_com=${palmrun_com}" -B"
[[ "$ocean_file_appendix" = true ]] && palmrun_com=${palmrun_com}" -y"
[[ $run_coupled_model = true ]] && palmrun_com=${palmrun_com}" -Y \"$coupled_dist\""
[[ "$combine_plot_fields" = false ]] && palmrun_com=${palmrun_com}" -Z"
[[ "$max_par_io_str" != "" ]] && palmrun_com=${palmrun_com}" -w $max_par_io_str"
[[ "$project_account" != "" ]] && palmrun_com=${palmrun_com}" -A $project_account"
if [[ $create_remote_batch_job = true ]]
then
palmrun_com=${palmrun_com}" -j -u $remote_username -R $local_ip"
if [[ $do_trace = true ]]
then
printf "\n *** PALMRUN-command on remote host:\n $palmrun_com \n"
fi
elif [[ $create_batch_job = true ]]
then
palmrun_com=${palmrun_com}" -j"
if [[ $do_trace = true ]]
then
printf "\n *** PALMRUN-command on local host:\n $palmrun_com \n"
fi
fi
# DETERMINE THE FULL PATHS FOR THE JOB PROTOCOL FILES ON THE LOCAL AND
# REMOTE HOST
job_protocol_file_local=${local_jobcatalog}/${host_configuration}_${job_id}
job_protocol_file=$job_protocol_file_local
if [[ $create_remote_batch_job = true ]]
then
job_protocol_file_remote=${remote_jobcatalog}/${host_configuration}_${job_id}
job_protocol_file=$job_protocol_file_remote
job_transfer_protocol_file=${remote_jobcatalog}/last_job_transfer_protocol
scpjob_file=${remote_jobcatalog}/scpjob.$run_id
fi
# BUILD THE JOB-SCRIPTS ON FILE jobfile
jobfile=jobfile.$run_id
# FIRST CREATE THE BATCH DIRECTIVES
(( i = 0 ))
while (( i < ibd ))
do
(( i = i + 1 ))
line=`echo "${batch_directive[$i]}" | sed 's/{{JOB_ID}}/$job_id/g' | sed 's/{{JOBFILE}}/$job_protocol_file/g' | sed 's/{{CPU_HOURS}}/$cpu_hours/g' | sed 's/{{CPU_MINUTES}}/$cpu_minutes/g' | sed 's/{{CPU_SECONDS}}/$cpu_seconds/g' | sed 's/{{NODES}}/$nodes/g' | sed 's/{{CORES}}/$cores/g' | sed 's/{{TASKS_PER_NODE}}/$tasks_per_node/g' | sed 's/{{HOST_CONFIGURATION}}/${host_configuration}/g' | sed 's/{{FNAME}}/$fname/g' | sed 's/{{QUEUE}}/$queue/g' | sed 's/{{MEMORY}}/$memory/g' | sed 's/{{PROJECT_ACCOUNT}}/$project_account/g' | sed 's/{{PREVIOUS_JOB}}/$previous_job/g'`
eval line=\"$line\"
echo "$line" >> $jobfile
done
echo " " >> $jobfile
# FOR BATCH JOBS ON REMOTE HOSTS, ADD THE JOBFILE TO SEND BACK THE JOB
# PROTOCOL
if [[ $create_remote_batch_job = true ]]
then
echo "set +vx" >> $jobfile
echo "trap '" >> $jobfile
echo "set +vx" >> $jobfile
echo "cd ${remote_jobcatalog}" >> $jobfile
echo "cat > scpjob.$run_id << %%END%%" >> $jobfile
# ADD THE BATCH DIRECTIVES
(( i = 0 ))
while (( i < ibdt ))
do
(( i = i + 1 ))
line=`echo "${batch_directive_transfer[$i]}" | sed 's/{{JOB_ID}}/$job_id/g' | sed 's/{{JOBFILE}}/$job_protocol_file/g' | sed 's/{{JOB_TRANSFER_PROTOCOL_FILE}}/$job_transfer_protocol_file/g' | sed 's/{{CPU_HOURS}}/$cpu_hours/g' | sed 's/{{CPU_MINUTES}}/$cpu_minutes/g' | sed 's/{{CPU_SECONDS}}/$cpu_seconds/g' | sed 's/{{NODES}}/$nodes/g' | sed 's/{{TASKS_PER_NODE}}/$tasks_per_node/g' | sed 's/{{HOST_CONFIGURATION}}/${host_configuration}/g' | sed 's/{{FNAME}}/$fname/g' | sed 's/{{PROJECT_ACCOUNT}}/$project_account/g'`
eval line=\"$line\"
echo "$line" >> $jobfile
done
echo " " >> $jobfile
echo "set -x" >> $jobfile
echo "${fast_io_catalog}/${sources_for_run_catalog}/batch_scp $PORTOPT -d -w 10 -u $local_username $local_ip $job_protocol_file_remote \"$local_jobcatalog\" ${host_configuration}_${fname}" >> $jobfile
echo "%%END%%" >> $jobfile
echo "echo \" *** submitting job for transfering the job protocol file to $local_ip\" " >> $jobfile
echo "$submit_command $scpjob_file" >> $jobfile
echo "rm $scpjob_file" >> $jobfile
echo "rm -rf $job_transfer_protocol_file" >> $jobfile
echo "set -x" >> $jobfile
echo " ' exit" >> $jobfile
fi
# ACTIVATE ERROR-TRACEBACK
if [[ $do_trace = true ]]
then
echo "set -x" >> $jobfile
else
echo "set +vx" >> $jobfile
fi
# INITIALIZE THE ENVIRONMENT AND LOAD MODULES
if [[ "$login_init_cmd" != "" ]]
then
echo "$login_init_cmd" >> $jobfile
fi
if [[ "$module_commands" != "" ]]
then
echo "$module_commands" >> $jobfile
fi
# CREATE TEMPORARY DIRECTORY AND SWITCH TO IT
echo "mkdir $TEMPDIR" >> $jobfile
echo "cd $TEMPDIR" >> $jobfile
echo "export TEMPDIR=$TEMPDIR" >> $jobfile
echo "cp ${fast_io_catalog}/${sources_for_run_catalog}/{*,.[!.]*} ." >> $jobfile
echo "export PATH=.:\$PATH" >> $jobfile
echo "export execute_palmrun=true" >> $jobfile
# GET REQUIRED INPUT-FILES BY SCP OR BY SENDING THEM WITH THE JOB AS HERE-DOCUMENT
# PUT THESE FILES INTO THE USER'S RESPECTIVE PERMANENT DIRECTORIES ON THE REMOTE-HOST
# IF THE DIRECTORIES DO NOT EXIST, TRY TO CREATE THEM
if [[ $create_remote_batch_job = true ]]
then
(( i = 0 ))
while (( i < iin ))
do
(( i = i + 1 ))
echo "[[ ! -d ${pathin[$i]} ]] && mkdir -p ${pathin[$i]}" >> $jobfile
if [[ "${transin[$i]}" = job ]]
then
echo "cat > ${remotepathin[$i]} <<\"%END%\"" >> $jobfile
eval cat ${pathin[$i]}/${frelin[$i]} >> $jobfile
echo " " >> $jobfile
echo "%END%" >> $jobfile
else
echo "batch_scp $PORTOPT -b -o -g -s -u $local_username $return_address ${remotepathin[$i]} \"${pathin[$i]}\" ${frelin[$i]}" >> $jobfile
fi
# CHECK, IF FILE COULD BE CREATED
echo "if [[ \$? = 1 ]]" >> $jobfile
echo "then" >> $jobfile
echo " echo \" \" " >> $jobfile
echo " echo \"+++ file ${remotepathin[$i]} could not be created\" " >> $jobfile
echo " echo \" please check, if directory exists on $host_configuration!\" " >> $jobfile
echo " echo \"+++ PALMRUN will not be continued\" " >> $jobfile
echo " execute_palmrun=false" >> $jobfile
echo "fi" >> $jobfile
done
fi
# PROVIDE NAME OF THE CURRENT WORKING-DIRECTORY ON THE LOCAL MACHINE (FROM WHERE THE JOB IS
# STARTED) BY SETTING AN ENVIRONMENT-VARIABLE. THIS INFORMATION IS USED IN THE JOB BY PALMRUN
# IN CASE THAT RESTART-RUNS HAVE TO BE GENERATED
echo "LOCAL_PWD=$working_directory" >> $jobfile
echo "export LOCAL_PWD" >> $jobfile
# PROVIDE THE PATH OF THE LOCAL PALMRUN-SCRIPT FOR THE SAME REASON
echo "LOCAL_PALMRUN_PATH=${source_path}/../SCRIPTS" >> $jobfile
echo "export LOCAL_PALMRUN_PATH" >> $jobfile
# CALL PALMRUN WITHIN THE JOB
# AS FINAL ACTION, REMOVE THE TEMPORARY DIRECTORY CREATED AT THE BEGINNING OF THE JOB
echo "set -x" >> $jobfile
echo "[[ \$execute_palmrun = true ]] && $palmrun_com" >> $jobfile
# TRANSFER JOBFILE TO THE TARGET HOST
if [[ $create_jobfile_only = false ]]
then
if [[ $create_remote_batch_job = true ]]
then
echo " "
echo " *** transfer of job to remote host via scp"
if [[ $do_trace = true ]]
then
echo " scp $ssh_key $PORTOPT $jobfile ${remote_username}@${remote_ip}:${remote_jobcatalog}/${host_configuration}_${job_id}"
fi
scp $ssh_key $PORTOPT $jobfile ${remote_username}@${remote_ip}:${remote_jobcatalog}/${host_configuration}_${job_id}
echo " "
echo " *** submit the job (output of submit command, e.g. the job-id, may follow)"
if [[ $do_trace = true ]]
then
echo " cd $remote_jobcatalog; $submit_command ${host_configuration}_${job_id}; rm ${host_configuration}_${job_id} | ssh -q $ssh_key $SSH_PORTOPT ${remote_username}@${remote_ip} 2>&1"
fi
echo "cd $remote_jobcatalog; $submit_command ${host_configuration}_${job_id}; rm ${host_configuration}_${job_id}" | ssh -q $ssh_key $SSH_PORTOPT ${remote_username}@${remote_ip} 2>&1
elif [[ $create_batch_job = true ]]
then
eval local_jobcatalog=$local_jobcatalog
cp $jobfile ${local_jobcatalog}/${host_configuration}_${job_id}
cd $local_jobcatalog
echo " "
echo " *** submit the job"
if [[ $do_trace = true ]]
then
echo "$submit_command ${host_configuration}_${job_id}"
fi
$submit_command ${host_configuration}_${job_id}
rm ${host_configuration}_${job_id}
cd - > /dev/null
fi
rm -rf $jobfile
else
printf "\n *** jobfile created under name \"$jobfile\" "
printf "\n no batch-job has been sent!"
fi
fi # END OF REMOTE-PART