#!/bin/sh ## ## This script tunes Linux kernel parameters useful for scaling HTCondor. ## Some values may be inappropriate for machines not dedicated to HTCondor. ## This script must be run as root, and we assume that (and support only) ## HTCondor is running as root for large installations. ## ## The next '##' sections marks the beginning of the implementation section ## of this script, and you should not need to change anything past that mark. ## # # This script logs its actions to syslog. If you'd rather it didn't, # change the line below to something like '/bin/true'. # LOGGER='/usr/bin/logger -t htcondor' # # Increase the global number of file descriptors. For example, each # dynamically-linked shadow uses approximately 13 just to open its # libraries. # # We don't set the per-process maximum, because HTCondor will do that # appropriately for each different subsystem (daemon) when run as root. # If necessary, you can change those values using the configuration variables # MAX_FILE_DESCRIPTORS and _MAX_FILE_DESCRIPTORS. # # This should match LimitNOFILE in the condor.service file for systemd # GLOBAL_MAX_FDS=32768 # # Increase the maximum process ID. By default, Linux process IDs wrap at # 32768, which isn't a lot of processes when you have one per running job. # # This should match the TasksMax in the condor.service file for systemd # GLOBAL_MAX_PROCESSES=4194303 # # Unless otherwise specified, an outbound connection uses a port within # this range. On some systems, this default to 1024-4999, which may not # be enough. You can also force HTCondor to specify a particular # outbound port by setting the configuration variables OUT_LOWPORT and # OUT_HIGHPORT appropriately, but this will be slower under load, as # HTCondor has to search for an open port. # LOCAL_PORT_RANGE="1024 65535" # # Increase the length of the TCP listen queue. This allows more connections # to pile up while HTCondor is otherwise busy. # TCP_LISTEN_QUEUE=1024 # # Likewise, the central manager (collector) needs have large UDP buffers. # Increase the maximum allowed size of networks receive buffers. # MAX_RECEIVE_BUFFER=10485760 # Maximum amount of dirty filesystem bytes to buffer in the kernel # before processes writing to the filesystem are blocked, and made # to do their own i/o synchronously. Set to 0 to undo. # If we are running with cgroups on, there's a Linux kernel bug # that causes spurious OOM events sent to a cgroup with a hard memory limit # if it writes a lot of data to the filesystem quickly. Limitting # the buffering to 100M works around the problem DIRTY_BYTES=100000000 # Set root user quota for max number of kernel session keys and # bytes. Kernel session keys are used by Kerberos, AFS, ecryptfs, and # other services. The condor_master creates a new session keyring by # default on startup (via config knob DISCARD_SESSION_KEYRING_ON_STARTUP) # so that Kerberos tokens and the like are not leaked to user jobs, and # also to support encrypted execute directories. The default quota # starting with RHEL 6.4 is 1M keys and 25M bytes, which is plenty # big. But older kernels (like what shipped in RHEL 6.0) have a crazy # small quota of 200 keys. So here we just set these params to what # RHEL 6.4+ use by default just in case people try to use HTCondor on # an earlier kernel. ROOT_MAXKEYS=1000000 ROOT_MAXKEYS_BYTES=25000000 ## ## Implementation. You shouldn't need to change anything below here. ## LOG=/dev/null if [ -d /etc/sysctl.d ]; then LOG=/etc/sysctl.d/99-htcondor.conf ( echo "#" echo "# This file was written by $0" echo "# when the condor_master started up." echo "#" echo "# This script tunes kernel parameters to support HTCondor at" echo "# larger scales. A list of the changes follows. You can set" echo "# ENABLE_KERNEL_TUNING = FALSE" echo "# in your HTCondor configuration to disable this entirely, or" echo "# set LINUX_KERNEL_TUNING_SCRIPT to some other file to change" echo "# which script is run when the condor_master daemon starts." echo "#" ) > ${LOG} fi increaseKernelParameter() { PARAMETER=$1 FILE=$2 NEW=$3 if [ -n "${NEW}" ]; then OLD=`cat "${FILE}"` if [ "${NEW}" -gt "${OLD}" ]; then echo "Changing ${PARAMETER} (${FILE}) from ${OLD} to ${NEW}" echo "# Changing ${PARAMETER} (${FILE}) from ${OLD} to ${NEW}" >> ${LOG} echo "Changing ${PARAMETER} (${FILE}) from ${OLD} to ${NEW}" | ${LOGGER} echo "${NEW}" > "${FILE}" else echo "Not changing ${PARAMETER} (${FILE}): new value (${NEW}) <= old value (${OLD})." echo "# Not changing ${PARAMETER} (${FILE}): new value (${NEW}) <= old value (${OLD})." >> ${LOG} echo "Not changing ${PARAMETER} (${FILE}): new value (${NEW}) <= old value (${OLD})." | ${LOGGER} fi fi } setKernelParameter() { PARAMETER=$1 FILE=$2 NEW=$3 if [ -n "${NEW}" ]; then OLD=`cat ${FILE}` echo "Changing ${PARAMETER} (${FILE}) from ${OLD} to ${NEW}" echo "# Changing ${PARAMETER} (${FILE}) from ${OLD} to ${NEW}" >> ${LOG} echo "Changing ${PARAMETER} (${FILE}) from ${OLD} to ${NEW}" | ${LOGGER} echo "${NEW}" > "${FILE}" fi } increaseKernelParameter "GLOBAL_MAX_FDS" "/proc/sys/fs/file-max" ${GLOBAL_MAX_FDS} # Set GLOBAL_MAX_PROCESSES and LOCAL_PORT_TRANGE only for schedd machines. daemonList=`condor_config_val DAEMON_LIST | sed -e's/,/ /g'` for daemon in $daemonList; do daemonFile=`condor_config_val ${daemon}` if [ -n "${daemonFile}" ]; then baseName=`basename ${daemonFile}` if [ "${baseName}" = "condor_schedd" ]; then increaseKernelParameter "GLOBAL_MAX_PROCESSES" "/proc/sys/kernel/pid_max" ${GLOBAL_MAX_PROCESSES} setKernelParameter "LOCAL_PORT_RANGE" "/proc/sys/net/ipv4/ip_local_port_range" "${LOCAL_PORT_RANGE}" fi fi done increaseKernelParameter "TCP_LISTEN_QUEUE" "/proc/sys/net/core/somaxconn" ${TCP_LISTEN_QUEUE} increaseKernelParameter "ROOT_MAXKEYS" "/proc/sys/kernel/keys/root_maxkeys" ${ROOT_MAXKEYS} increaseKernelParameter "ROOT_MAXKEYS_BYTES" "/proc/sys/kernel/keys/root_maxbytes" ${ROOT_MAXKEYS_BYTES} if condor_config_val BASE_CGROUP > /dev/null 2>&1 then setKernelParameter "FS_CACHE_DIRTY_BYTES" "/proc/sys/vm/dirty_bytes" "${DIRTY_BYTES}" fi # FIXME: Only on the collector. increaseKernelParameter "MAX_RECEIVE_BUFFER" "/proc/sys/net/core/rmem_max" ${MAX_RECEIVE_BUFFER} # Mount cgroups if not already mounted # Check to see if admin has enabled cgroups in condor if condor_config_val BASE_CGROUP > /dev/null 2>&1 then # Admin has requested them (or left the default on) # Check to see if they are already mounted, assume if memory is, they all are if ! grep -q memory /proc/self/cgroup then mkdir -p /cgroup/memory mount -t cgroup -o memory memory /cgroup/memory mkdir -p /cgroup/cpu mount -t cgroup -o cpu cpu /cgroup/cpu mkdir -p /cgroup/cpuacct mount -t cgroup -o cpuacct cpuacct /cgroup/cpuacct mkdir -p /cgroup/blkio mount -t cgroup -o blkio blkio /cgroup/blkio mkdir -p /cgroup/freezer mount -t cgroup -o freezer freezer /cgroup/freezer fi fi exit 0