#! /bin/sh # Copyright 2004-2017 The MathWorks, Inc. #========================= realpath.sh (start) ============================ #----------------------------------------------------------------------------- # Usage: realpath # Returns the actual path in the file system of a file. It follows links. # It returns an empty path if an error occurs. # Return status: 0 if successful. # If return status is 0, the function echoes out the real path to the file. # Return status 1 Exceeded the maximum number of links to follow. # Return status 2 Some other error occurred. realpath() { filename_rpath=$1 SUCCESS_STATUS_rpath=0 MAX_LINKS_EXCEEDED_rpath=1 OTHER_ERROR_rpath=2 # # Now filename_rpath is either a file or a link to a file. # cpath_rpath=`pwd` # # Follow up to 8 links before giving up. Same as BSD 4.3 # We cd into the directory where the file is located, and do a /bin/pwd # to get the name of the CWD. If the file is a symbolic link, we update # the basename of the file and cd into the directory that the link points # to and repeat the process. # Once we arrive in a directory where we do not have a soft-link, we are # done. n_rpath=1 maxlinks_rpath=8 while [ $n_rpath -le $maxlinks_rpath ] do # # Get directory part of $filename_rpath correctly! # newdir_rpath=`dirname "$filename_rpath"` # dirname shouldn't return empty instead of ".", but let's be paranoid. if [ -z "${newdir_rpath}" ]; then newdir_rpath="."; fi (cd "$newdir_rpath") > /dev/null 2>&1 if [ $? -ne 0 ]; then # This should not happen. The file is in a non-existing directory. cd "$cpath_rpath" return $OTHER_ERROR_rpath fi cd "$newdir_rpath" # # Need the function pwd - not the shell built-in one. The command # /bin/pwd resolves all symbolic links, but the shell built-in one # does not. # newdir_rpath=`/bin/pwd` # Stip the directories off the filename_rpath. newbase_rpath=`basename "$filename_rpath"` lscmd=`ls -l "$newbase_rpath" 2>/dev/null` if [ ! "$lscmd" ]; then # This should not happen, the file does not exist. cd "$cpath_rpath" return $OTHER_ERROR_rpath fi # # Check for link. The link target is everything after ' -> ' in # the output of ls. # if [ `expr "$lscmd" : '.*->.*'` -ne 0 ]; then filename_rpath=`echo "$lscmd" | sed 's/.*-> //'` else # # We are done. We found a file and not a symbolic link. # newdir_rpath contains the directory name, newbase_rpath contains # the file name. cd "$cpath_rpath" echo "$newdir_rpath/$newbase_rpath" return $SUCCESS_STATUS_rpath fi n_rpath=`expr $n_rpath + 1` done # We exceeded the maximum number of links to follow. cd "$cpath_rpath" return $MAX_LINKS_EXCEEDED_rpath } #========================= realpath.sh (end) ============================== #========================= pathsetup.sh (start) ============================ #----------------------------------------------------------------------------- # Usage: warnIfNotInBin warnIfNotInBin() { # Search for toolbox/distcomp/bin in $1. if [ `expr "$1" : ".*toolbox/distcomp/bin$"` -eq 0 ]; then echo "Warning: $2 should be run only from toolbox/distcomp/bin," echo "or using a symbolic link to toolbox/distcomp/bin/$2." echo "" fi } # THIS MUST BE RUN FIRST BEFORE SETTING UP THE APPLICATION VARIABLES # Get the fully qualified path to the script SCRIPT="$0" REALPATH=`realpath "$SCRIPT"` # Get the path to distcomp bin BASE from this by removing the name of the shell # script. BASE=`echo $REALPATH | sed -e 's;\/[^\/]*$;;g'` warnIfNotInBin "$BASE" startjobmanager # Make sure we are in the correct directory to run setbase.sh cd "$BASE" # Set base directory variables . util/setbase.sh #----------------------------------------------------------------------------- #========================= pathsetup.sh (end) ============================== usage() { echo echo "startjobmanager: Start a job manager process and the associated job manager" echo " lookup process under the mdce service, which maintains them" echo " after that. The job manager handles the storage of jobs and" echo " the distribution of tasks contained in jobs to MATLAB workers" echo " that are registered with it." echo " The mdce service must already be running on the specified" echo " computer." echo echo "Usage: startjobmanager [ -name job_manager_name ]" echo " [ -remotehost hostname ]" echo " [ -clean | -cleanPreserveJobs ]" echo " [ -baseport port_number ]" echo " [ -v ]" echo " [ -help ]" echo echo "-name Specify the name of the job manager. This identifies the" echo " job manager to MATLAB worker sessions and MATLAB clients." echo " The default is the value of DEFAULT_JOB_MANAGER_NAME in the" echo " mdce_def file." echo echo "-remotehost Specify the name of the host where you want to start the" echo " job manager and the job manager lookup process. If omitted," echo " they are started on the local host." echo echo "-clean Delete all checkpoint information stored on disk from previous" echo " instances of this job manager before starting. This will" echo " clean the job manager so that it will initialize with no jobs" echo " or tasks." echo echo "-cleanPreserveJobs Delete checkpoint information stored on disk from previous" echo " instances of this job manager before starting. Any existing " echo " jobs and tasks will be preserved. " echo echo "-baseport Specify the base port that the mdce service on the remote host" echo " is using. You only need to specify this if the value of" echo " BASE_PORT in the local mdce_def file does not match the base" echo " port being used by the mdce service on the remote host." echo echo "-waitForWorkers Specify the required minimum number of workers to register" echo " with the jobmanager before starting to process jobs. As long" echo " as the number of workers registered is smaller than this number," echo " the job manager remains in the paused state." echo echo "-v Be verbose. Display the progress of the command execution." echo echo "-help Print this help information." echo echo "Examples: 1) Start the job manager MyJobManager on the local host." echo echo " startjobmanager -name MyJobManager" echo echo " 2) Start the job manager MyJobManager on the host JMHost." echo echo " startjobmanager -name MyJobManager -remotehost JMHost" echo echo " 3) Start the job manager MyJobManager on the local host and" echo " wait for at least 4 workers to register before processing" echo " the queue." echo echo " startjobmanager -name MyJobManager -waitForWorkers 4" echo echo "See also: mdce, nodestatus, stopjobmanager, startworker and stopworker." echo } #----------------------------------------------------------------------------- SERVICE_CLEAN="false" PRESERVE_JOBS="false" REMOTE_COMMAND_VERBOSITY="false" CERTIFICATE_FILE="" READ_DATABASE_DIRECTORY="" WAIT_ON_NUM_WORKERS="0" JM_SERVICE_CONFIG_FILE="start-jobmanager.config" while [ -n "$1" ] ; do case $1 in -name) SERVICE_NAME="$2" shift ;; -clean) SERVICE_CLEAN="true" PRESERVE_JOBS="false" ;; -cleanPreserveJobs) SERVICE_CLEAN="true" PRESERVE_JOBS="true" ;; -remotehost) REMOTE_HOSTNAME=$2 shift ;; -baseport) READ_BASE_PORT=$2 shift ;; -waitForWorkers) WAIT_ON_NUM_WORKERS=$2 shift ;; -multicast) READ_JOB_MANAGER_HOST="USE_MULTICAST" ;; -allowResizing) MDCS_ALLOW_RESIZING="true" ;; -v) REMOTE_COMMAND_VERBOSITY="true" ;; -certificate) CERTIFICATE_FILE=$2 shift ;; -help|-h) usage exit 1 ;; # The rest of the flags are undocumented. -GDSendpoint) GDS_END_POINT=$2 shift ;; -GDSlogintoken) GDS_LOGIN_TOKEN=$2 shift ;; -GDSqueueid) GDS_QUEUE_ID=$2 shift ;; -useGDSstorage) USE_GDS_STORAGE="true" ;; -useMSMPI) USE_MSMPI="true" ;; -databaseDirectory) READ_DATABASE_DIRECTORY="$2" shift ;; -scriptlogfile) READ_LOG_FILE="$2" shift ;; -startResizeService) START_RESIZE_SERVICE="true" ;; -cloudCenterToken) CLOUD_CENTER_TOKEN=$2 shift ;; -clusterID) CLOUD_CENTER_CLUSTER_ID=$2 shift ;; -jmServiceConfigFile) JM_SERVICE_CONFIG_FILE=$2 shift ;; *) echo "Error: unrecognized option: $1" usage exit 1 ;; esac shift done # Check the job manager name for invalid characters . "$UTILBASE/checkInputs" validateJobManagerName $SERVICE_NAME # Set the general MDCE environment . "$UTILBASE/setmdceenv" sourceMdceDef defineJRECMD REMOTE_HOSTNAME=${REMOTE_HOSTNAME:-$HOSTNAME} BASE_PORT=${READ_BASE_PORT:-$BASE_PORT} JOB_MANAGER_HOST=${READ_JOB_MANAGER_HOST:-JOB_MANAGER_LOOKUP_NOT_SPECIFIED} DATABASE_DIRECTORY=${READ_DATABASE_DIRECTORY:-$DATABASE_DIRECTORY} USE_GDS_STORAGE=${USE_GDS_STORAGE:-"false"} USE_MSMPI=${USE_MSMPI:-"false"} MDCS_ALLOW_RESIZING=${MDCS_ALLOW_RESIZING:-"false"} SCRIPT_LOG_FILE=${READ_LOG_FILE:-""} START_RESIZE_SERVICE=${START_RESIZE_SERVICE:-"false"} if [ "$USE_GDS_STORAGE" = "true" ] ; then if [ -z "$GDS_END_POINT" -o -z "$GDS_LOGIN_TOKEN" -o -z "$GDS_QUEUE_ID" ] ; then echo "Error: -useGDSstorage requires -GDSendpoint, -GDSlogintoken and -GDSqueueid to be specified" exit 1 fi else if [ -n "$GDS_END_POINT" -o -n "$GDS_LOGIN_TOKEN" -o -n "$GDS_QUEUE_ID" ] ; then echo "Error: -useGDSstorage must be specified in order to use -GDSendpoint, -GDSlogintoken and -GDSqueueid" exit 1 fi fi if [ "$START_RESIZE_SERVICE" = "true" ] ; then if [ -z "$CLOUD_CENTER_TOKEN" -o -z "$CLOUD_CENTER_CLUSTER_ID" ] ; then echo "Error: -startResizeService requires -cloudCenterToken and -clusterID to be specified" exit 1 fi else if [ -n "$CLOUD_CENTER_TOKEN" -o -n "$CLOUD_CENTER_CLUSTER_ID" ] ; then echo "Error: -startResizeService must be specified in order to use -cloudCenterToken and -clusterID" exit 1 fi fi # Start the lookup service if not already running. $JRECMD \ ${COMMAND_LINE_JRE_MEMORY} \ ${COMMAND_LINE_JRE_GC} \ -classpath "$REMOTE_COMMAND_CLASSPATH" \ -Djava.library.path="$NATIVE_LIBRARY_PATH" \ -Djava.security.policy="$CONFIGBASE/jsk-all.policy" \ -Dcom.mathworks.toolbox.distcomp.matlabroot=$MATBASE \ -Dcom.mathworks.toolbox.distcomp.remote_command_type="lookup" \ -Dcom.mathworks.toolbox.distcomp.remote_hostname=$REMOTE_HOSTNAME \ -Dcom.mathworks.toolbox.distcomp.base_port=$BASE_PORT \ -Dcom.mathworks.toolbox.distcomp.remote_command_verbosity=$REMOTE_COMMAND_VERBOSITY \ -Dcom.mathworks.toolbox.distcomp.remote_command_action="start" \ -Dcom.mathworks.toolbox.distcomp.servicename="$SERVICE_NAME" \ -Dcom.mathworks.toolbox.distcomp.service_config_file="start-jini.config" \ -Dcom.mathworks.toolbox.distcomp.clean_checkpoint_info=$SERVICE_CLEAN \ -Dcom.mathworks.toolbox.distcomp.script_log_file="$SCRIPT_LOG_FILE" \ com.mathworks.toolbox.distcomp.control.RunCommandSender \ "$CONFIGBASE/control-startstop.config" # Exit immediately in case of failure. if [ $? -ne 0 ] ; then exit $? fi # Start the job manager itself. $JRECMD \ ${COMMAND_LINE_JRE_MEMORY} \ ${COMMAND_LINE_JRE_GC} \ -classpath "$REMOTE_COMMAND_CLASSPATH" \ -Djava.library.path="$NATIVE_LIBRARY_PATH" \ -Djava.security.policy="$CONFIGBASE/jsk-all.policy" \ -Dcom.mathworks.toolbox.distcomp.matlabroot=$MATBASE \ -Dcom.mathworks.toolbox.distcomp.servicename="$SERVICE_NAME" \ -Dcom.mathworks.toolbox.distcomp.remote_hostname=$REMOTE_HOSTNAME \ -Dcom.mathworks.toolbox.distcomp.base_port=$BASE_PORT \ -Dcom.mathworks.toolbox.distcomp.remote_command_verbosity=$REMOTE_COMMAND_VERBOSITY \ -Dcom.mathworks.toolbox.distcomp.remote_command_action="start" \ -Dcom.mathworks.toolbox.distcomp.service_config_file=$JM_SERVICE_CONFIG_FILE \ -Dcom.mathworks.toolbox.distcomp.clean_checkpoint_info=$SERVICE_CLEAN \ -Dcom.mathworks.toolbox.distcomp.preserve_job_database=$PRESERVE_JOBS \ -Dcom.mathworks.toolbox.distcomp.use_GDS_storage=$USE_GDS_STORAGE \ -Dcom.mathworks.toolbox.distcomp.GDS_login_token=$GDS_LOGIN_TOKEN \ -Dcom.mathworks.toolbox.distcomp.GDS_end_point=$GDS_END_POINT \ -Dcom.mathworks.toolbox.distcomp.GDS_queue_id=$GDS_QUEUE_ID \ -Dcom.mathworks.toolbox.distcomp.use_MSMPI=$USE_MSMPI \ -Dcom.mathworks.toolbox.distcomp.allowResizing=$MDCS_ALLOW_RESIZING \ -Dcom.mathworks.toolbox.distcomp.database_directory_override="$DATABASE_DIRECTORY" \ -Dcom.mathworks.toolbox.distcomp.lookup_hosts="$JOB_MANAGER_HOST" \ -Dcom.mathworks.toolbox.distcomp.control.certificate_file="$CERTIFICATE_FILE" \ -Dcom.mathworks.toolbox.distcomp.script_log_file="$SCRIPT_LOG_FILE" \ -Dcom.mathworks.toolbox.distcomp.wait_on_num_workers=$WAIT_ON_NUM_WORKERS \ -Dcom.mathworks.toolbox.distcomp.mjs.start_resize_service=$START_RESIZE_SERVICE \ -Dcom.mathworks.toolbox.distcomp.mjs.cloud_center_token=$CLOUD_CENTER_TOKEN \ -Dcom.mathworks.toolbox.distcomp.mjs.cluster_id=$CLOUD_CENTER_CLUSTER_ID \ -Ddistcomp.rmi.server.hostname=$HOSTNAME \ com.mathworks.toolbox.distcomp.control.RunCommandSender \ "$CONFIGBASE/control-startstop-jobmanager.config" JM_EXIT_STATUS=$? # Handle job manager startup failure by stopping the lookup service if there are # no job managers running. if [ $JM_EXIT_STATUS -ne 0 ] ; then $JRECMD \ ${COMMAND_LINE_JRE_MEMORY} \ ${COMMAND_LINE_JRE_GC} \ -classpath "$REMOTE_COMMAND_CLASSPATH" \ -Djava.library.path="$NATIVE_LIBRARY_PATH" \ -Djava.security.policy="$CONFIGBASE/jsk-all.policy" \ -Dcom.mathworks.toolbox.distcomp.matlabroot=$MATBASE \ -Dcom.mathworks.toolbox.distcomp.remote_command_type="lookup" \ -Dcom.mathworks.toolbox.distcomp.remote_hostname=$REMOTE_HOSTNAME \ -Dcom.mathworks.toolbox.distcomp.base_port=$BASE_PORT \ -Dcom.mathworks.toolbox.distcomp.remote_command_verbosity=$REMOTE_COMMAND_VERBOSITY \ -Dcom.mathworks.toolbox.distcomp.remote_command_action="stop" \ -Dcom.mathworks.toolbox.distcomp.servicename="$SERVICE_NAME" \ -Dcom.mathworks.toolbox.distcomp.service_config_file="start-jini.config" \ -Dcom.mathworks.toolbox.distcomp.clean_checkpoint_info=$SERVICE_CLEAN \ -Dcom.mathworks.toolbox.distcomp.script_log_file="$SCRIPT_LOG_FILE" \ com.mathworks.toolbox.distcomp.control.RunCommandSender \ "$CONFIGBASE/control-startstop.config" fi exit $JM_EXIT_STATUS