#!/usr/bin/env python # requires a DIRAC UI to be set up (source bashrc) # and a valid proxy: dirac-proxy-init -g [your vo here]_user -M # TODO: Error handling # DIRAC does not work otherwise from DIRAC.Core.Base import Script Script.initialize() # end of DIRAC setup from DIRAC.Interfaces.API.Job import Job from DIRAC.Interfaces.API.Dirac import Dirac import pprint import linecache def select_LFN(n_run, rt_list): return linecache.getline(rt_list, n_run) def count_files(rt_list): with open(rt_list) as f: lines = sum(1 for _ in f) return lines def configure_and_submit(dirac, job, logfile, n_run, rt_list): #n_run defines the current number in the loop of jobs #job.setCPUTime(5000) us_rootracker=select_LFN(n_run, rt_list) us_mac=us_rootracker.replace('.rootracker','.mac') my_sandbox = ['produce_dstrm.sh', rt_list, 'LFN:'+us_rootracker, 'LFN:'+us_mac] # Any additional files to be run that you want to pass locally job.setInputSandbox(my_sandbox) submissionscript = "submit_dstrm.sh" job.setExecutable(submissionscript, arguments=str(n_run)) job.setName('ICEDUST_dstrm_run_'+str(n_run)) # any site you don't want your jobs to go to: #job.setBannedSites(['LCG.UKI-SOUTHGRID-BRIS-HEP.uk', 'LCG.pic.es']) # This is GridPP DIRAC specific job.setPlatform("AnyPlatform") out_pre = 'data_dstrm_tmp/' job.setOutputData([out_pre+'*.root*', out_pre+'*.mac'], outputSE='UKI-LT2-IC-HEP-disk', outputPath='MC6/dstrm_tmp/') # Save all root and rootracker files, discard logs. Save all to MC6/dstrm for downstream run. result = dirac.submitJob(job) logfile.write('Submission Result: ') pprint.pprint(result, logfile) jobid = result['JobID'] # print job id to file for future reference joblog = open("api_jobid.log", "a") joblog.write(str(jobid)+'\n') joblog.close() return jobid def check_job(dirac, jobid, logfile): # to interactively check on job status do: # dirac-wms-job-status -f api_jobid.log logfile.write("\nThe current status of this job is:") pprint.pprint(dirac.getJobStatus(jobid), logfile) def check_all_jobs(dirac, logfile): joblog = open("api_jobid.log", "r") # list comprehension :-D all_jobids = [jobid.strip() for jobid in joblog.readlines()] logfile.write("\nThe current status of all jobs is:") all_status = dirac.getJobStatus(all_jobids) pprint.pprint(all_status, logfile) def main(n_run, rt_list): logfile = open("api.log", "w") dirac = Dirac() job = Job() jobid = configure_and_submit(dirac, job, logfile, n_run, rt_list) check_job(dirac, jobid, logfile) check_all_jobs(dirac, logfile) logfile.close() #print "API logs can be found in api.log and api_jobid.log." #print( "submitting job " + str(n_run) + " of " str(end_job) ) if __name__ == "__main__": rt_list = 'list_of_rootrackers.txt' jobs = count_files(rt_list) global end_job end_job = jobs+1 start_job = 1 # Default starting point for n_run in range(start_job, end_job): main(n_run, rt_list) # If you want to run just one production, just manually do main(number_of_run) without the for loop