#!/bin/sh # set -xv # # Submits job to LSF # Input: path to grami file (same as Globus). # # A temporary job script is created for the submission and then removed # at the end of this script. echo "----- starting submit_lsf_job -----" 1>&2 joboption_lrms="lsf" lrms_options="lsf_architecture lsf_bin_path" # ARC1 passes first the config file. if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi GRAMI_FILE=$1 # define paths and config parser basedir=`dirname $0` basedir=`cd $basedir > /dev/null && pwd` || exit $? . "${basedir}/lrms_common.sh" # include common submit functions . "${pkgdatadir}/submit_common.sh" || exit $? # run common init # * parse grami # * parse config # * load LRMS-specific env # * set common variables common_init # perflog perflogfilesub="${perflogdir}/submission.perflog" if [ ! -z "$perflogdir" ]; then start_ts=`date +%s.%N` fi ############################################################## # Zero stage of runtime environments ############################################################## RTE_stage0 ############################################################## # create temp job script ############################################################## mktempscript ############################################################## # Start job script ############################################################## LSF_BSUB='bsub' LSF_BPARAMS='bparams' if [ ! -z "$LSF_BIN_PATH" ] ; then LSF_BSUB=${LSF_BIN_PATH}/${LSF_BSUB} LSF_BPARAMS=${LSF_BIN_PATH}/${LSF_BPARAMS} fi echo "#! /bin/bash" > $LRMS_JOB_SCRIPT echo "#LSF batch job script built by arex" >> $LRMS_JOB_SCRIPT echo "#" >> $LRMS_JOB_SCRIPT # Specify the bash shell as default #echo "#BSUB -L /bin/bash" >> $LRMS_JOB_SCRIPT # Write output to comment file: echo "#BSUB -oo ${joboption_directory}.comment" >> $LRMS_JOB_SCRIPT echo "" >> $LRMS_JOB_SCRIPT # Choose queue(s). if [ ! -z "${joboption_queue}" ] ; then echo "#BSUB -q $joboption_queue" >> $LRMS_JOB_SCRIPT fi if [ ! -z "${joboption_rsl_architecture}" ] ; then queuearch=`echo ${joboption_rsl_architecture}|sed 's/\"//g'` echo "#BSUB -R type=${queuearch}" >> $LRMS_JOB_SCRIPT else if [ ! -z $CONFIG_lsf_architecture ] ; then echo "#BSUB -R type=$CONFIG_lsf_architecture" >> $LRMS_JOB_SCRIPT fi fi # Project name for accounting if [ ! -z "${joboption_rsl_project}" ] ; then echo "#BSUB -P $joboption_rsl_project" >> $LRMS_JOB_SCRIPT fi # job name for convenience if [ ! -z "${joboption_jobname}" ] ; then jobname=`echo "$joboption_jobname" | \ sed 's/^\([^[:alpha:]]\)/N\1/' | \ sed 's/[^[:alnum:]]/_/g' | \ sed 's/\(...............\).*/\1/'` echo "#BSUB -J $jobname" >> $LRMS_JOB_SCRIPT fi echo "LSF jobname: $jobname" 1>&2 ############################################################## # (non-)parallel jobs ############################################################## set_count ############################################################## # parallel jobs ############################################################## echo "#BSUB -n $joboption_count" >> $LRMS_JOB_SCRIPT # parallel structure if [ ! -z $joboption_countpernode ] && [ $joboption_countpernode != '-1' ] ; then echo "#BSUB -R span[ptile=$joboption_countpernode]" >> $LRMS_JOB_SCRIPT fi # exclusive execution if [ "$joboption_exclusivenode" = "true" ]; then echo "#BSUB -x" >> $LRMS_JOB_SCRIPT fi ############################################################## # Execution times (obtained in seconds) ############################################################## #OBS: Assuming here that LSB_JOB_CPULIMIT=y or is unset. if [ -n "$joboption_cputime" ] && [ $joboption_cputime -gt 0 ]; then cputime=$(( ${joboption_cputime} / 60 )) echo "#BSUB -c ${cputime}" >> $LRMS_JOB_SCRIPT fi if [ -n "$joboption_walltime" ] && [ $joboption_walltime -gt 0 ] ; then walltime=$(( ${joboption_walltime} / 60 )) echo "#BSUB -W ${walltime}" >> $LRMS_JOB_SCRIPT fi ############################################################## # Requested memory (mb) ############################################################## set_req_mem #-M is memory limit per process in LSF, so no need to modify memory limit based on count. if [ ! -z "$joboption_memory" ]; then memory=$(( ${joboption_memory} * 1024 )) echo "#BSUB -M ${memory}" >> $LRMS_JOB_SCRIPT fi ############################################################## # Start Time ############################################################## if [ -n "$joboption_starttime" ] ; then echo "#BSUB -b ${joboption_starttime}" >> $LRMS_JOB_SCRIPT fi ############################################################## # priority ############################################################## if [ ! -z "$joboption_priority" ]; then #first we must parse the max priority maxprio=`${LSF_BPARAMS} -a| grep MAX_USER_PRIORITY | cut -f 2 -d '=' | cut -f 2 -d ' '` #scale priority LSF: 1 -> MAX_USER_PRIORITY ARC: 0-100 if [ ! -z "$maxprio" ]; then if [ "$maxprio" -gt "0" ]; then priority=$((joboption_priority * ($maxprio - 1) / 100 +1)) echo "#BSUB -sp ${priority}" >> $LRMS_JOB_SCRIPT fi fi fi ############################################################## # Override umask ############################################################## echo "# Overide umask of execution node (sometime values are really strange)" >> $LRMS_JOB_SCRIPT echo "umask 077" >> $LRMS_JOB_SCRIPT echo " " >> $LRMS_JOB_SCRIPT sourcewithargs_jobscript ############################################################## # Init accounting ############################################################## accounting_init ############################################################## # Add environment variables ############################################################## add_user_env ############################################################## # Check for existence of executable, # there is no sense to check for executable if files are # downloaded directly to computing node ############################################################## if [ -z "${joboption_arg_0}" ] ; then echo 'Executable is not specified' 1>&2 rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR" echo "Submission: Job description error." exit 1 fi program_start=`echo ${joboption_arg_0} | head -c 1 2>&1` if [ "$program_start" != '$' ] && [ "$program_start" != '/' ] ; then if [ ! -f $joboption_directory/${joboption_arg_0} ] ; then echo 'Executable does not exist, or permission denied.' 1>&2 echo " Executable $joboption_directory/${joboption_arg_0}" 1>&2 echo " whoami: "`whoami` 1>&2 echo " ls -l $joboption_directory/${joboption_arg_0}: "`ls -l $joboption_directory/${joboption_arg_0}` exit 1 fi if [ ! -x $joboption_directory/${joboption_arg_0} ] ; then echo 'Executable is not executable' 1>&2 exit 1 fi fi ###################################################################### # Adjust working directory for tweaky nodes # RUNTIME_GRIDAREA_DIR should be defined by external means on nodes ###################################################################### if [ ! -z "${RUNTIME_NODE_SEES_FRONTEND}" ] ; then setup_runtime_env else echo "RUNTIME_JOB_DIR=$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid" >> $LRMS_JOB_SCRIPT echo "RUNTIME_JOB_DIAG=$RUNTIME_LOCAL_SCRATCH_DIR/${joboption_gridid}.diag" >> $LRMS_JOB_SCRIPT RUNTIME_STDIN_REL=`echo "${joboption_stdin}" | sed "s#^${joboption_directory}/*##"` RUNTIME_STDOUT_REL=`echo "${joboption_stdout}" | sed "s#^${joboption_directory}/*##"` RUNTIME_STDERR_REL=`echo "${joboption_stderr}" | sed "s#^${joboption_directory}/*##"` if [ "$RUNTIME_STDIN_REL" = "${joboption_stdin}" ] ; then echo "RUNTIME_JOB_STDIN=\"${joboption_stdin}\"" >> $LRMS_JOB_SCRIPT else echo "RUNTIME_JOB_STDIN=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDIN_REL\"" >> $LRMS_JOB_SCRIPT fi if [ "$RUNTIME_STDOUT_REL" = "${joboption_stdout}" ] ; then echo "RUNTIME_JOB_STDOUT=\"${joboption_stdout}\"" >> $LRMS_JOB_SCRIPT else echo "RUNTIME_JOB_STDOUT=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDOUT_REL\"" >> $LRMS_JOB_SCRIPT fi if [ "$RUNTIME_STDERR_REL" = "${joboption_stderr}" ] ; then echo "RUNTIME_JOB_STDERR=\"${joboption_stderr}\"" >> $LRMS_JOB_SCRIPT else echo "RUNTIME_JOB_STDERR=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDERR_REL\"" >> $LRMS_JOB_SCRIPT fi fi ############################################################## # Add std... to job arguments ############################################################## include_std_streams ############################################################## # Move files to local working directory (job is done on node only) # RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id ############################################################## move_files_to_node echo "" >> $LRMS_JOB_SCRIPT echo "RESULT=0" >> $LRMS_JOB_SCRIPT echo "" >> $LRMS_JOB_SCRIPT ##################################################### # Go to working dir and start job #################################################### echo "# Changing to session directory" >> $LRMS_JOB_SCRIPT echo "cd \$RUNTIME_JOB_DIR" >> $LRMS_JOB_SCRIPT echo "export HOME=\$RUNTIME_JOB_DIR" >> $LRMS_JOB_SCRIPT ############################################################## # Skip execution if something already failed ############################################################## echo "" >> $LRMS_JOB_SCRIPT echo "if [ \"\$RESULT\" = '0' ] ; then" >> $LRMS_JOB_SCRIPT echo "echo \"runtimeenvironments=\$runtimeenvironments\" >> \"\$RUNTIME_JOB_DIAG\"" >> $LRMS_JOB_SCRIPT ############################################################## # Runtime configuration at computing node ############################################################## RTE_stage1 #extra checks if [ -z "$RUNTIME_NODE_SEES_FRONTEND" ] ; then echo "Nodes detached from gridarea are not supported when LSF is used. Aborting job submit" 1>&2 rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR" exit 1 fi gate_host=`uname -n` if [ -z "$gate_host" ] ; then echo "Can't get own hostname" 1>&2 rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR" exit 1 fi ############################################################## # Execution ############################################################## cd_and_run ############################################################## # End of RESULT checks ############################################################## echo "fi" >> $LRMS_JOB_SCRIPT ############################################################## # Runtime (post)configuration at computing node ############################################################## RTE_stage2 ############################################################## # Move files back to session directory (job is done on node only) # RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id ############################################################## move_files_to_frontend ############################################################## # Finish accounting and exit job ############################################################## accounting_end ####################################### # Submit the job ####################################### # Execute bsub command cd "$joboption_directory" #chmod 0755 $LRMS_JOB_SCRIPT # We make the assumption that $joboption_directory is locally available according to the requirements of any arc installation echo "----------------- BEGIN job script -----" 1>&2 cat $LRMS_JOB_SCRIPT 1>&2 echo "----------------- END job script -----" 1>&2 if [ ! -z "$perflogdir" ]; then stop_ts=`date +%s.%N` t=`awk "BEGIN { printf \"%.3f\", ${stop_ts}-${start_ts} }"` echo "[`date +%Y-%m-%d\ %T`] submit-lsf-job, JobScriptCreation: $t" >> $perflogfilesub fi if [ ! -z "$perflogdir" ]; then start_ts=`date +%s.%N` fi ${LSF_BSUB} < $LRMS_JOB_SCRIPT 1>$LRMS_JOB_OUT 2>$LRMS_JOB_ERR LSF_RESULT="$?" if [ ! -z "$perflogdir" ]; then stop_ts=`date +%s.%N` t=`awk "BEGIN { printf \"%.3f\", ${stop_ts}-${start_ts} }"` echo "[`date +%Y-%m-%d\ %T`] submit-lsf-job, JobSubmission: $t" >> $perflogfilesub fi if [ $LSF_RESULT -eq '0' ] ; then job_id=`cat $LRMS_JOB_OUT | awk '{split($0,field," ");print field[2]}' | sed 's/[<>]//g'` if [ "${job_id}" = "" ] ; then echo "job *NOT* submitted successfully!" 1>&2 echo "failed getting the jobid for the job!" 1>&2 else echo "joboption_jobid=$job_id" >> $GRAMI_FILE echo "job submitted successfully!" 1>&2 echo "local job id: $job_id" 1>&2 # Remove temporary job script file rm -f $LRMS_JOB_SCRIPT $LRMS_JOB_OUT $LRMS_JOB_ERR echo "----- exiting submit_lsf_job -----" 1>&2 echo "" 1>&2 exit 0 fi else echo "job *NOT* submitted successfully!" 1>&2 echo "got error code from qsub!" 1>&2 fi echo "Output is:" 1>&2 cat $LRMS_JOB_OUT 1>&2 echo "Error output is:" cat $LRMS_JOB_ERR 1>&2 rm -f $LRMS_JOB_SCRIPT $LRMS_JOB_OUT $LRMS_JOB_ERR echo "----- exiting submit_lsf_job -----" 1>&2 echo "" 1>&2 exit 1