Services { JobManager { Port = 9132 MaxParametricJobs = 100 Authorization { Default = authenticated } } TornadoJobManager { Protocol = https Authorization { Default = authenticated } } JobMonitoring { Port = 9130 Authorization { Default = authenticated } } TornadoJobMonitoring { Protocol = https Authorization { Default = authenticated } } JobStateUpdate { Port = 9136 Authorization { Default = authenticated } MaxThreads = 100 } TornadoJobStateUpdate { Protocol = https Authorization { Default = authenticated } } #Parameters of the WMS Matcher service Matcher { Port = 9170 MaxThreads = 20 Authorization { Default = authenticated getActiveTaskQueues = JobAdministrator } } #Parameters of the WMS Administrator service WMSAdministrator { Port = 9145 Authorization { Default = Operator getJobPilotOutput = authenticated getSiteMask = authenticated getSiteMaskStatus = authenticated ping = authenticated allowSite = SiteManager allowSite += Operator banSite = SiteManager banSite += Operator } } #Parameters of the Pilots service PilotManager { Port = 9171 Authorization { Default = authenticated } } #Parameters of the PilotsLogging service PilotsLogging { Port = 9146 Authorization { Default = Operator getPilotsLogging = authenticated addPilotsLogging = Operator deletePilotsLogging = Operator } Enable = No PilotsLoggingQueue = serviceURL::QueueType::QueueName } SandboxStore { Port = 9196 LocalSE = ProductionSandboxSE MaxThreads = 200 toClientMaxThreads = 100 Backend = local MaxSandboxSizeMiB = 10 SandboxPrefix = Sandbox BasePath = /opt/dirac/storage/sandboxes DelayedExternalDeletion = True Authorization { Default = authenticated FileTransfer { Default = authenticated } } } OptimizationMind { Port = 9175 } VirtualMachineManager { Port = 9163 Dependencies { Databases = WorkloadManagement/VirtualMachineDB } Authorization { Default = authenticated declareInstanceRunning = VmRpcOperation instanceIDHeartBeat = VmRpcOperation declareInstanceHalting = VmRpcOperation declareInstancesStopping = Operator } } } Agents { ##BEGIN PilotSyncAgent PilotSyncAgent { PollingTime = 600 # Directory where the files can be moved. If running on the WebApp, use /opt/dirac/webRoot/www/pilot SaveDirectory = # List of locations where to upload the pilot files. Can be https://some.where, or DIRAC SE names. UploadLocations = # Set to False (or No, or N) to exclude the master CS from the list of CS servers IncludeMasterCS = True } ##END ##BEGIN PilotStatusAgent PilotStatusAgent { PollingTime = 300 # Flag enabling sending of the Pilot accounting info to the Accounting Service PilotAccountingEnabled = yes } ##END JobAgent { PollingTime = 20 FillingModeFlag = true StopOnApplicationFailure = true StopAfterFailedMatches = 10 SubmissionDelay = 10 JobWrapperTemplate = DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py } ##BEGIN StalledJobAgent StalledJobAgent { StalledTimeHours = 2 FailedTimeHours = 6 PollingTime = 3600 MaxNumberOfThreads = 15 # List of sites for which we want to be more tolerant before declaring the job stalled StalledJobsTolerantSites = StalledJobsToleranceTime = 0 # List of sites for which we want to be Reschedule (instead of declaring Failed) the Stalled jobs StalledJobsToRescheduleSites = SubmittingTime = 300 MatchedTime = 7200 RescheduledTime = 600 Enable = True } ##END ##BEGIN JobCleaningAgent JobCleaningAgent { PollingTime = 3600 #Maximum number of jobs to be processed in one cycle MaxJobsAtOnce = 500 # Maximum number of jobs to be processed in one cycle for HeartBeatLoggingInfo removal MaxHBJobsAtOnce = 0 RemoveStatusDelay { # Number of days after which Done jobs are removed Done = 7 # Number of days after which Killed jobs are removed Killed = 7 # Number of days after which Failed jobs are removed Failed = 7 # Number of days after which any jobs, irrespective of status is removed (-1 for disabling this feature) Any = -1 } RemoveStatusDelayHB { # Number of days after which HeartBeatLoggingInfo for Done jobs are removed, positive to enable Done = -1 # Number of days after which HeartBeatLoggingInfo for Killed jobs are removed Killed = -1 # Number of days after which HeartBeatLoggingInfo for Failed jobs are removed Failed = -1 } # Which production type jobs _not_ to remove, takes default from Operations/Transformations/DataProcessing ProductionTypes = } ##END ##BEGIN SiteDirector SiteDirector { # VO treated (leave empty for auto-discovery) VO = # VO treated (leave empty for auto-discovery) Community = # Group treated (leave empty for auto-discovery) Group = # Grid Environment (leave empty for auto-discovery) GridEnv = # the DN of the certificate proxy used to submit pilots. If not found here, what is in Operations/Pilot section of the CS will be used PilotDN = # the group of the certificate proxy used to submit pilots. If not found here, what is in Operations/Pilot section of the CS will be used PilotGroup = # List of sites that will be treated by this SiteDirector ("any" can refer to any Site defined in the CS) Site = any # List of CE types that will be treated by this SiteDirector ("any" can refer to any CE defined in the CS) CETypes = any # List of CEs that will be treated by this SiteDirector ("any" can refer to any type of CE defined in the CS) CEs = any # The maximum length of a queue (in seconds). Default: 3 days MaxQueueLength = 259200 # Log level of the pilots PilotLogLevel = INFO # Max number of pilots to submit per cycle MaxPilotsToSubmit = 100 # Check, or not, for the waiting pilots already submitted PilotWaitingFlag = True # How many cycels to skip if queue is not working FailedQueueCycleFactor = 10 # Every N cycles we update the pilots status PilotStatusUpdateCycleFactor = 10 # Every N cycles we update the number of available slots in the queues AvailableSlotsUpdateCycleFactor = 10 # Maximum number of times the Site Director is going to try to get a pilot output before stopping MaxRetryGetPilotOutput = 3 # If True, pilots will be submitted with option --pythonVersion=3 Python3Pilots = True # To submit pilots to empty sites in any case AddPilotsToEmptySites = False # Should the SiteDirector consider platforms when deciding to submit pilots? CheckPlatform = False # Attribute used to define if the status of the pilots will be updated UpdatePilotStatus = True # Boolean value used to indicate if the pilot output will be or not retrieved GetPilotOutput = False # Boolean value that indicates if the pilot job will send information for accounting SendPilotAccounting = True # Boolean value that indicates if the pilot submission statistics will be sended for accounting SendPilotSubmissionAccounting = True } ##END ##BEGIN PushJobAgent PushJobAgent { # VO treated (leave empty for auto-discovery) VO = # The DN of the certificate proxy used to submit pilots/jobs. If not found here, what is in Operations/Pilot section of the CS will be used PilotDN = # The group of the certificate proxy used to submit pilots/jobs. If not found here, what is in Operations/Pilot section of the CS will be used PilotGroup = # List of sites that will be treated by this PushJobAgent ("any" can refer to any Site defined in the CS) Site = # List of CE types that will be treated by this PushJobAgent ("any" can refer to any CE defined in the CS) CETypes = # List of CEs that will be treated by this PushJobAgent ("any" can refer to any CE type defined in the CS) CEs = # Max number of jobs to handle simultaneously MaxJobsToSubmit = 100 # How many cycels to skip if queue is not working FailedQueueCycleFactor = 10 } ##END ##BEGIN StatesAccountingAgent StatesAccountingAgent { # The backend used. Either "Accounting" or "Monitoring", or both Backends = Accounting # the name of the message queue used for the failover MessageQueue = dirac.wmshistory # Polling time. For this agent it should always be 15 minutes. PollingTime = 900 } ##END CloudDirector { PollingTime = 60 RunningPod = Default } VirtualMachineMonitorAgent { MinWorkingLoad = 0.01 LoadAverageTimespan = 60 HaltPeriod = 600 HaltBeforeMargin = 300 HeartBeatPeriod = 300 } ##BEGIN TaskQueuesAgent TaskQueuesAgent { PollingTime = 120 } ##END } Executors { Optimizers { Load = JobPath, JobSanity, InputData, JobScheduling } JobPath { } JobSanity { } InputData { } JobScheduling { } } ##BEGIN JobWrapper JobWrapper { BufferLimit = 10485760 CleanUpFlag = True DefaultCatalog = [] DefaultCPUTime = 600 DefaultErrorFile = 'std.err' DefaultOutputFile = 'std.out' DiskSE = ['-disk', '-DST', '-USER'] MasterCatalogOnlyFlag = True MaxJobPeekLines = 20 OutputSandboxLimit = 1024 * 1024 * 10 # Retry the upload of the output file if only one output SE is defined RetryUpload = False TapeSE = ['-tape', '-RDST', '-RAW'] } ##END