#!/bin/sh # set -xv # # Submits job to Sun Grid Engine (SGE). # Input: path to grami file (same as Globus). # # A temporary job script is created for the submission and then removed # at the end of this script. # echo "----- starting submit_sge_job -----" 1>&2 joboption_lrms="sge" lrms_options="sge_root sge_cell sge_qmaster_port sge_execd_port sge_bin_path sge_jobopts" queue_options="sge_jobopts" # ARC1 passes first the config file. if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi GRAMI_FILE=$1 # define paths and config parser basedir=`dirname $0` basedir=`cd $basedir > /dev/null && pwd` || exit $? . "${basedir}/lrms_common.sh" # include common submit functions . "${pkgdatadir}/submit_common.sh" || exit $? # run common init # * parse grami # * parse config # * load LRMS-specific env # * set common variables common_init #Log performance perflogfilesub="${perflogdir}/submission.perflog" if [ ! -z "$perflogdir" ]; then #start time stamp start_ts=`date +%s.%N` fi ############################################################## # Zero stage of runtime environments ############################################################## RTE_stage0 # Force shell /bin/sh, other qsub options have been moved to the job script SGE_QSUB='qsub -S /bin/sh' SGE_QCONF=qconf if [ "$SGE_BIN_PATH" ] ; then SGE_QSUB=${SGE_BIN_PATH}/${SGE_QSUB} SGE_QCONF=${SGE_BIN_PATH}/${SGE_QCONF} fi mktempscript ############################################################## # Start job script ############################################################## echo '#!/bin/sh' > $LRMS_JOB_SCRIPT echo "# SGE batch job script built by arex" >> $LRMS_JOB_SCRIPT # Job not rerunable: echo "#$ -r n" >> $LRMS_JOB_SCRIPT # Don't send mail when job finishes: echo "#$ -m n" >> $LRMS_JOB_SCRIPT # Mix standard output and standard error: echo "#$ -j y" >> $LRMS_JOB_SCRIPT # Write output to comment file: echo "#$ -o ${joboption_directory}/.comment" >> $LRMS_JOB_SCRIPT ############################################################## # priority ############################################################## if [ ! -z "$joboption_priority" ]; then #first we must scale priority. SGE: -1023 -> 1024 ARC: 0-100 #user can only decrease priority: i.e. -1023 -> 0 (info from gsciacca@lhep.unibe.ch) #Same problem as SLURM. We can only prioritize grid jobs. Locally submitted jobs will get highest priority. priority=$((joboption_priority * 1023 / 100)) priority=$((priority-1023)) echo "#$ -p ${priority}" >> $LRMS_JOB_SCRIPT fi # Choose queue. echo "#$ -q $joboption_queue" >> $LRMS_JOB_SCRIPT # job name for convenience if [ ! -z "${joboption_jobname}" ] ; then jobname=`echo "$joboption_jobname" | \ sed 's/^\([^[:alpha:]]\)/N\1/' | \ sed 's/[^[:alnum:]]/_/g' | \ sed 's/\(...............\).*/\1/'` echo "#$ -N \"$jobname\"" >> $LRMS_JOB_SCRIPT fi echo "SGE jobname: $jobname" 1>&2 ############################################################## # (non-)parallel jobs ############################################################## set_count ############################################################## # parallel jobs ############################################################## # In addition to the number of parallel tasks, also a valid # parallel environment (PE) must be set for SGE. # # The selection of PE is done by Runtime Environment setup script in the zero # stage. The user has to request a proper RE in addition to setting the # "count" -property in the xrsl. The RE script must set the name of the desired # PE to joboption_nodeproperty_# -variable (# is a number starting from zero, # RE should use the lowest previously undefined number). This script then # searches through the joboption_nodeproperty_# variables and compares them to # the PE list obtained from SGE. The first matching PE name is used. # if [ -n "$joboption_nodeproperty_0" ]; then i=0 sge_parallel_environment_list=`$SGE_QCONF -spl` while eval jope=\${joboption_nodeproperty_$i} && test "$jope" ; do for ipe in $sge_parallel_environment_list ; do if [ "$jope" = "$ipe" ] ; then break 2 # now $jope contains the requested parallel env fi done i=$(($i + 1)) done if [ -n "$jope" ] ; then echo "#\$ -pe $jope $joboption_count" >> $LRMS_JOB_SCRIPT else echo 'ERROR: Setting parallel environment failed.' 1>&2 fi fi if [ "$joboption_exclusivenode" = "true" ]; then sge_excl_complex=`$SGE_QCONF -sc | grep EXCL | head -n 1` if [ -n "$sge_excl_complex" ]; then sge_excl_complex_name=`echo $sge_excl_complex | awk '{print $1}'` echo "#\$ -l ${sge_excl_complex_name}=true" >> $LRMS_JOB_SCRIPT else echo "WARNING: Exclusive execution support is not configured by this Grid Engine" 1>&2 echo "WARNING: Example configuration: https://wiki.nordugrid.org/index.php/LRMS_Backends/Testbeds" 1>&2 fi fi ############################################################## # Execution times (obtained in seconds) ############################################################## # SGE has soft and hard limits (soft = SIGUSR1, hard = SIGKILL sent to the job), # let's allow time_hardlimit_ratio extra before the hard limit. # cputime/walltime is obtained in seconds via $joboption_cputime and $joboption_walltime # parallel jobs, add initialization time, soft/hard limit configurable... if ( [ -n "$joboption_cputime" ] && [ $joboption_cputime -gt 0 ] ) ; then # SGE enforces job-total cpu time limit, but it expects in h_cpu and s_cpu # per-slot limits. It then scales these with the number of requested slots # before enforcing them. cputime_perslot=$(( $joboption_cputime / $joboption_count )) cputime_hard_perslot=$(( $cputime_perslot * $time_hardlimit_ratio )) s_cpu_requestable=$($SGE_QCONF -sc|awk '($1=="s_cpu" && ( $5=="YES" || $5=="FORCED" )){print $5}') h_cpu_requestable=$($SGE_QCONF -sc|awk '($1=="h_cpu" && ( $5=="YES" || $5=="FORCED" )){print $5}') opt="#$" if [ $s_cpu_requestable ]; then opt="$opt -l s_cpu=::${cputime_perslot}"; fi if [ $h_cpu_requestable ]; then opt="$opt -l h_cpu=::${cputime_hard_perslot}"; fi echo $opt >> $LRMS_JOB_SCRIPT fi if [ -n "$joboption_walltime" ] ; then if [ $joboption_walltime -lt 0 ] ; then echo 'WARNING: Less than 0 wall time requested: $joboption_walltime' 1>&2 joboption_walltime=0 echo 'WARNING: wall time set to 0' 1>&2 fi joboption_walltime_hard=$(( $joboption_walltime * $time_hardlimit_ratio )) s_rt_requestable=$($SGE_QCONF -sc|awk '($1=="s_rt" && ( $5=="YES" || $5=="FORCED" )){print $5}') h_rt_requestable=$($SGE_QCONF -sc|awk '($1=="h_rt" && ( $5=="YES" || $5=="FORCED" )){print $5}') opt="#$" if [ $s_rt_requestable ]; then opt="$opt -l s_rt=::${joboption_walltime}"; fi if [ $h_rt_requestable ]; then opt="$opt -l h_rt=::${joboption_walltime_hard}"; fi echo $opt >> $LRMS_JOB_SCRIPT fi ############################################################## # Requested memory (mb) ############################################################## set_req_mem # There are soft and hard limits for virtual memory consumption in SGE if [ -n "$joboption_memory" ] ; then joboption_memory_hard=$(( $joboption_memory * $memory_hardlimit_ratio )) h_vmem_requestable=$($SGE_QCONF -sc|awk '($1=="h_vmem" && ( $5=="YES" || $5=="FORCED" )){print $5}') s_vmem_requestable=$($SGE_QCONF -sc|awk '($1=="s_vmem" && ( $5=="YES" || $5=="FORCED" )){print $5}') opt="#$" if [ $s_vmem_requestable ]; then opt="$opt -l s_vmem=${joboption_memory}M"; fi if [ $h_vmem_requestable ]; then opt="$opt -l h_vmem=${joboption_memory_hard}M"; fi echo $opt >> $LRMS_JOB_SCRIPT fi ############################################################## # Extra job options. This is the last, so that # it can overwrite previously set options. ############################################################## if [ ! -z "$CONFIG_sge_jobopts" ]; then echo "#$ $CONFIG_sge_jobopts" >> $LRMS_JOB_SCRIPT fi ############################################################## # Override umask ############################################################## echo "" >> $LRMS_JOB_SCRIPT echo "# Overide umask of execution node (sometime values are really strange)" >> $LRMS_JOB_SCRIPT echo "umask 077" >> $LRMS_JOB_SCRIPT ############################################################## # By default, use $TMPDIR from SGE to alleviate its cleanup facilities. # It can be overridden with scratchdir though. # Don't do this if "shared_scratch" is defined in arc.conf. ############################################################## if [ "$RUNTIME_LOCAL_SCRATCH_DIR" ] && [ ! "$RUNTIME_FRONTEND_SEES_NODE" ]; then echo "if [ -d \"${CONFIG_scratchdir:-\$TMPDIR}\" ]; then RUNTIME_LOCAL_SCRATCH_DIR=${CONFIG_scratchdir:-\$TMPDIR}; fi" >> $LRMS_JOB_SCRIPT fi sourcewithargs_jobscript ############################################################## # Init accounting ############################################################## accounting_init ############################################################## # Add environment variables ############################################################## add_user_env ############################################################## # Check for existance of executable, ############################################################## if [ -z "${joboption_arg_0}" ] ; then echo 'Executable is not specified' 1>&2 exit 1 fi program_start=`echo ${joboption_arg_0} | cut -c 1 2>&1` if [ "$program_start" != '$' ] && [ "$program_start" != '/' ] ; then if [ ! -f $joboption_directory/${joboption_arg_0} ] ; then echo 'Executable does not exist, or permission denied.' 1>&2 echo " Executable $joboption_directory/${joboption_arg_0}" 1>&2 echo " whoami: "`whoami` 1>&2 echo " ls -l $joboption_directory/${joboption_arg_0}: "`ls -l $joboption_directory/${joboption_arg_0}` exit 1 fi if [ ! -x $joboption_directory/${joboption_arg_0} ] ; then echo 'Executable is not executable' 1>&2 exit 1 fi fi setup_runtime_env # Override location of .diag file: put it under the working directory echo 'RUNTIME_JOB_DIAG=$RUNTIME_JOB_DIR/.diag' >> $LRMS_JOB_SCRIPT ############################################################## # Add std... to job arguments ############################################################## include_std_streams ############################################################## # Move files to local working directory (job is done on node only) # RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id ############################################################## move_files_to_node echo "" >> $LRMS_JOB_SCRIPT echo "RESULT=0" >> $LRMS_JOB_SCRIPT echo "" >> $LRMS_JOB_SCRIPT ############################################################## # Skip execution if something already failed ############################################################## echo "" >> $LRMS_JOB_SCRIPT echo "if [ \"\$RESULT\" = '0' ] ; then" >> $LRMS_JOB_SCRIPT ############################################################## # Runtime configuration ############################################################## RTE_stage1 echo "echo \"runtimeenvironments=\$runtimeenvironments\" >> \"\$RUNTIME_JOB_DIAG\"" >> $LRMS_JOB_SCRIPT if [ -z "$RUNTIME_NODE_SEES_FRONTEND" ] ; then echo "Nodes detached from gridarea are not supported when SGE is used. Aborting job submit" 1>&2 rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR" exit 1 fi ############################################################## # Accounting (WN OS Detection) ############################################################## detect_wn_systemsoftware ############################################################## # Execution ############################################################## cd_and_run echo "fi" >> $LRMS_JOB_SCRIPT ############################################################## # Runtime (post)configuration at computing node ############################################################## RTE_stage2 ############################################################## # Move files back to session directory (job is done on node only) # RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id ############################################################## move_files_to_frontend ############################################################## # Finish accounting and exit job ############################################################## accounting_end if [ ! -z "$perflogdir" ]; then stop_ts=`date +%s.%N` t=`awk "BEGIN { printf \"%.3f\", ${stop_ts}-${start_ts} }"` echo "[`date +%Y-%m-%d\ %T`] submit-sge-job, JobScriptCreation: $t" >> $perflogfilesub fi if [ ! -z "$perflogdir" ]; then #start time stamp start_ts=`date +%s.%N` fi ####################################### # Submit the job ####################################### ( echo "SGE job script built" cd "$joboption_directory" echo "SGE script follows:" cat "$LRMS_JOB_SCRIPT" echo # Execute qsub command ${SGE_QSUB} < $LRMS_JOB_SCRIPT 1>$LRMS_JOB_OUT 2>$LRMS_JOB_ERR # expected SGE output is like: 'Your job 77 ("perftest") has been # submitted', the line below uses only the job number as job id. job_id=$(cat $LRMS_JOB_OUT $LRMS_JOB_ERR \ | awk '/^.our job .* has been submitted/ {split($0,field," ");print field[3]}') # anything else is a sign of problems, which should be logged warnings=$(cat $LRMS_JOB_OUT $LRMS_JOB_ERR \ | grep -v '^.our job .* has been submitted' | grep -v '^Exit') if [ ! -z "$warnings" ]; then echo "WARNING: $warnings"; echo; fi exitcode=0 if [ -z $job_id ] ; then echo "job *NOT* submitted successfully!" exitcode=1 else echo "joboption_jobid=$job_id" >> $GRAMI_FILE echo "local job id: $job_id" echo "job submitted successfully!" exitcode=0 fi # Remove temporary job script file rm -f $LRMS_JOB_SCRIPT $LRMS_JOB_OUT $LRMS_JOB_ERR echo "----- exiting submit_sge_job -----"; ) 1>&2 if [ ! -z "$perflogdir" ]; then stop_ts=`date +%s.%N` t=`awk "BEGIN { printf \"%.3f\", ${stop_ts}-${start_ts} }"` echo "[`date +%Y-%m-%d\ %T`] submit-sge-job, JobSubmission: $t" >> $perflogfilesub fi exit $exitcode