#!/bin/sh # set -xv # # Submits job to loadleveler. # # A temporary job script is created for the submission and then removed # at the end of this script. # echo "----- starting submit_ll_job -----" 1>&2 joboption_lrms="ll" lrms_options="ll_bin_path ll_consumable_resources ll_parallel_single_jobs" # ARC1 passes first the config file. if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi GRAMI_FILE=$1 # define paths and config parser basedir=`dirname $0` basedir=`cd $basedir > /dev/null && pwd` || exit $? . "${basedir}/lrms_common.sh" # include common submit functions . "${pkgdatadir}/submit_common.sh" || exit $? # run common init # * parse grami # * parse config # * load LRMS-specific env # * set common variables common_init # perflog perflogfilesub="${perflogdir}/submission.perflog" if [ ! -z "$perflogdir" ]; then start_ts=`date +%s.%N` fi # GD enforce this for the moment RUNTIME_FRONTEND_SEES_NODE='' RUNTIME_NODE_SEES_FRONTEND='yes' ############################################################## # Zero stage of runtime environments ############################################################## RTE_stage0 LL_SUB='llsubmit' if [ ! -z "$LL_BIN_PATH" ] ; then LL_SUB=${LL_BIN_PATH}/${LL_SUB} fi mktempscript ############################################################## # Start job script ############################################################## echo "# LL batch job script built by arex" > $LRMS_JOB_SCRIPT # job name for convenience if [ ! -z "${joboption_jobname}" ] ; then jobname=`echo "$joboption_jobname" | \ sed 's/^\([^[:alpha:]]\)/N\1/' | \ sed 's/[^[:alnum:]]/_/g' | \ sed 's/\(...............\).*/\1/'` echo "# @ job_name = $jobname" >> $LRMS_JOB_SCRIPT fi echo "LL jobname: $jobname" 1>&2 echo "# @ output = ${joboption_directory}.comment" >> $LRMS_JOB_SCRIPT echo "# @ error = ${joboption_directory}.comment" >> $LRMS_JOB_SCRIPT # Project account number for accounting if [ ! -z "${joboption_rsl_project}" ] ; then echo "# @ account_no = $joboption_rsl_project" >> $LRMS_JOB_SCRIPT fi ############################################################## # (non-)parallel jobs ############################################################## set_count if [ $joboption_count -gt 1 ] || [ "$LL_PARALLEL_SINGLE_JOBS" = "yes" ] ; then echo "# @ job_type = parallel" >> $LRMS_JOB_SCRIPT echo "# @ total_tasks = $joboption_count" >> $LRMS_JOB_SCRIPT echo "# @ node = $joboption_numnodes" >> $LRMS_JOB_SCRIPT fi #set node to exclusive if [ "$joboption_exclusivenode" = "true" ]; then echo "# @ node_usage = not_shared " >> $LRMS_JOB_SCRIPT fi ############################################################## # Execution times (obtained in seconds) ############################################################## # cputime/walltime is obtained in seconds via $joboption_cputime and $joboption_walltime if ( [ -n "$joboption_cputime" ] && [ $joboption_cputime -gt 0 ] ) ; then # CPU time must be given per-task for LL cputime_pertask=$(( $joboption_cputime / $joboption_count )) cputime_hard_pertask=$(($(( $cputime_pertask * $time_hardlimit_ratio))+30)) echo "# @ cpu_limit = ${cputime_hard_pertask} , ${cputime_pertask}" >> $LRMS_JOB_SCRIPT fi if [ -n "$joboption_walltime" ] ; then if [ $joboption_walltime -lt 0 ] ; then echo 'WARNING: Less than 0 wall time requested: $joboption_walltime' 1>&2 joboption_walltime=0 echo 'WARNING: wall time set to 0' 1>&2 fi joboption_walltime_hard=$(($(( $joboption_walltime * $time_hardlimit_ratio))+30)) echo "# @ wall_clock_limit = ${joboption_walltime_hard} , ${joboption_walltime}" >> $LRMS_JOB_SCRIPT fi ############################################################## # Requested memory (mb) ############################################################## set_req_mem # There are soft and hard limits for virtual memory consumption in LL # The limits are interpreted by LoadLeveler as per process in a # parallel job. There is no need to recalculate the mem limit. if [ -n "$joboption_memory" ] ; then joboption_memory_hard=$(( $joboption_memory * $memory_hardlimit_ratio )) requirements="(Memory > ${joboption_memory_hard})" preferences="(Memory > ${joboption_memory})" if [ "$LL_CONSUMABLE_RESOURCES" != "yes" ]; then echo "# @ requirements = ${requirements}" >> $LRMS_JOB_SCRIPT echo "# @ preferences = ${preferences}" >> $LRMS_JOB_SCRIPT fi fi ############################################################## # Consumable resources # One cpu should be requested per task created. I.e. per count. ############################################################# if [ "$LL_CONSUMABLE_RESOURCES" = "yes" ]; then echo "# @ resources = ConsumableCpus(1) ConsumableMemory(${joboption_memory})" >> $LRMS_JOB_SCRIPT fi ############################################################## # Override umask ############################################################## #echo "umask 077" >> $LRMS_JOB_SCRIPT #echo 'exec > /var/tmp/grid-job-output.$$ 2>&1' >> $LRMS_JOB_SCRIPT ############################################################## # Init accounting ############################################################## accounting_init ############################################################## # Add environment variables ############################################################## add_user_env ############################################################## # Check for existence of executable, ############################################################## if [ -z "${joboption_arg_0}" ] ; then echo 'Executable is not specified' 1>&2 exit 1 fi program_start=`echo ${joboption_arg_0} | cut -c 1 2>&1` if [ "$program_start" != '$' ] && [ "$program_start" != '/' ] ; then if [ ! -f $joboption_directory/${joboption_arg_0} ] ; then echo 'Executable does not exist, or permission denied.' 1>&2 echo " Executable $joboption_directory/${joboption_arg_0}" 1>&2 echo " whoami: "`whoami` 1>&2 echo " ls -l $joboption_directory/${joboption_arg_0}: "`ls -l $joboption_directory/${joboption_arg_0}` exit 1 fi if [ ! -x $joboption_directory/${joboption_arg_0} ] ; then echo 'Executable is not executable' 1>&2 exit 1 fi fi ################################################################## #Read queue from config or figure out which queue to use ################################################################## if [ ! -z "${joboption_queue}" ] ; then class=$joboption_queue else #if queue is not set we must choose one LL_CLASS='llclass -l' if [ ! -z "$LL_BIN_PATH" ] ; then LL_CLASS=${LL_BIN_PATH}/${LL_CLASS} fi queue_names=`${LL_CLASS}|grep Name|awk '{split($0,field," ");print field[2]}'` #default will be shortest queue if [! -n "$joboption_walltime" ] ; then joboption_walltime_hard=1 fi queue_time_sel=0 for queue in $queue_names do queue_time=`${LL_CLASS} ${queue}|grep Wall_clock_limit|awk '{split($0,field,"(");print field[2]}'|awk '{split($0,field," ");print field[1]}'` if [${joboption_walltime_hard} -lt ${queue_time}] ; then if [${queue_time_sel} -eq 0] || [${queue_time_sel} -gt ${queue_time}] ; then class=${queue} queue_time_sel=${queue_time} fi fi done fi echo "# @ class=${class}" >> $LRMS_JOB_SCRIPT ################################################################### #Priority of jobs ################################################################## if [ ! -z $joboption_priority ]; then # LL: priority from 0-100. 50 is default # We can just use ARC priority directly echo "# @ user_priority = ${joboption_priority}" >> $LRMS_JOB_SCRIPT fi ################################################################### #Queue job #No mail notification ################################################################## echo "# @ notification = never" >> $LRMS_JOB_SCRIPT echo "# @ queue" >> $LRMS_JOB_SCRIPT echo " " >> $LRMS_JOB_SCRIPT sourcewithargs_jobscript setup_runtime_env ################################################################### #setup soft limit trap ################################################################## echo "trap \"echo 'exitcode=24'>>\$RUNTIME_JOB_DIAG;exit 24\" SIGXCPU" >> $LRMS_JOB_SCRIPT ############################################################## # Add std... to job arguments ############################################################## include_std_streams ############################################################## # Move files to local working directory (job is done on node only) # RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id ############################################################## move_files_to_node echo "" >> $LRMS_JOB_SCRIPT echo "RESULT=0" >> $LRMS_JOB_SCRIPT echo "" >> $LRMS_JOB_SCRIPT ############################################################## # Skip execution if something already failed ############################################################## echo "" >> $LRMS_JOB_SCRIPT echo "if [ \"\$RESULT\" = '0' ] ; then" >> $LRMS_JOB_SCRIPT echo "echo \"runtimeenvironments=\$runtimeenvironments\" >> \"\$RUNTIME_JOB_DIAG\"" >> $LRMS_JOB_SCRIPT ############################################################## # Runtime configuration ############################################################## RTE_stage1 if [ -z "$RUNTIME_NODE_SEES_FRONTEND" ] ; then echo "Nodes detached from gridarea are not supported when LL is used. Aborting job submit" 1>&2 rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR" exit 1 fi ############################################################## # Accounting (WN OS Detection) ############################################################## detect_wn_systemsoftware ############################################################## # Execution ############################################################## cd_and_run echo "fi" >> $LRMS_JOB_SCRIPT ############################################################## # Runtime (post)configuration at computing node ############################################################## RTE_stage2 ############################################################## # Move files back to session directory (job is done on node only) # RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id ############################################################## move_files_to_frontend ############################################################## # Finish accounting and exit job ############################################################## accounting_end ####################################### # Submit the job ####################################### echo "ll job script built" 1>&2 #job creation finished if [ ! -z "$perflogdir" ]; then stop_ts=`date +%s.%N` t=`awk "BEGIN { printf \"%.3f\", ${stop_ts}-${start_ts} }"` echo "[`date +%Y-%m-%d\ %T`] submit-ll-job, JobScriptCreation: $t" >> $perflogfilesub fi # Execute sub command cd "$joboption_directory" echo "LL script follows:" 1>&2 cat "$LRMS_JOB_SCRIPT" 1>&2 echo "" 1>&2 if [ ! -z "$perflogdir" ]; then start_ts=`date +%s.%N` fi ${LL_SUB} $LRMS_JOB_SCRIPT 1>$LRMS_JOB_OUT 2>$LRMS_JOB_ERR LLSUB_RESULT="$?" if [ ! -z "$perflogdir" ]; then stop_ts=`date +%s.%N` t=`awk "BEGIN { printf \"%.3f\", ${stop_ts}-${start_ts} }"` echo "[`date +%Y-%m-%d\ %T`] submit-ll-job, JobSubmission: $t" >> $perflogfilesub fi if [ $LLSUB_RESULT -eq '0' ] ; then echo "LRMS_JOB_OUT is $LRMS_JOB_OUT" job_id=`cat $LRMS_JOB_OUT | awk '{split($0,field,"\"");print field[2]}'`.0 if [ "${job_id}" = "" ] ; then echo "job *NOT* submitted successfully!" 1>&2 echo "failed getting the LL jobid for the job!" 1>&2 else echo "joboption_jobid=$job_id" >> $GRAMI_FILE echo "job submitted successfully!" 1>&2 echo "local job id: $job_id" 1>&2 # Remove temporary job script file rm -f $LRMS_JOB_SCRIPT $LRMS_JOB_OUT $LRMS_JOB_ERR echo "----- exiting submit_ll_job -----" 1>&2 echo "" 1>&2 exit 0 fi else echo "job *NOT* submitted successfully!" 1>&2 echo "got error code from llsubmit!" 1>&2 fi echo "Output is:" 1>&2 cat $LRMS_JOB_OUT 1>&2 echo "Error output is:" cat $LRMS_JOB_ERR 1>&2 rm -f $LRMS_JOB_SCRIPT $LRMS_JOB_OUT $LRMS_JOB_ERR echo "----- exiting submit_ll_job -----" 1>&2 echo "" 1>&2 exit 1