""" CCP4databaseDef.py: CCP4 GUI Project Copyright (C) 2010 University of York This library is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 3, modified in accordance with the provisions of the license to address the requirements of UK law. You should have received a copy of the modified GNU Lesser General Public License along with this library. If not, copies may be downloaded from http://www.ccp4.ac.uk/ccp4license.php This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. """ """ Liz Potterton May 2010: created """ ##@package CCP4DatabaseDef Python representation of old ccp4i database.def files """ Example use: t = CDatabaseDef('..../database.def') for iJob in range(1,t.nJobs): if t.jobs.has_key(iJob): print 'taskname',t.jobs[iJob]['TASKNAME'] """ class CDatabaseDef: def __init__(self,filename): self.headerArray = {} self.dataArray = {} self.project_name ='' self.project_dir = '' self.jobs = {} self.headerArray,typeArray,self.dataArray = self.read(filename) #print 'dataArray',self.dataArray # Get project name and dir from header if self.headerArray.has_key('PROJECT'): self.project_name = self.headerArray['PROJECT'][0] if len(self.headerArray['PROJECT'])>1: self.project_dir = self.headerArray['PROJECT'][1] print 'Read project',self.project_name,self.project_dir self.nJobs = int(self.dataArray.get('NJOBS',0)) self.jobs = self.extractJobs(self.dataArray) def read (self,filename='',headerOnly=0): ''' Read the def file into dicts for header, types and data A database.def should not have any type information This is left in the code in case want to reuse elsewhere ''' headerArray = {} typeArray = {} dataArray = {} try: f = open(filename) except: print 'ERROR opening',filename return headerArray,typeArray,dataArray if not f: print 'ERROR opening',filename return headerArray,typeArray,dataArray try: content = f.readlines() f.close() except: print 'ERROR reading ',filename return headerArray,typeArray,dataArray for line in content: words = self.splitDefLine(line) #print 'CCP4Data.parseDefFile',words if len(words)>=2: if words[0] == '#CCP4I' and len(words)>=3: #print 'header',words headerArray[words[1]] = words[2:] elif words[0][0] == '#': pass elif not headerOnly: if len(words)>2: if words[1][0]=='_': typeArray[words[0]] = words[1][1:] else: typeArray[words[0]] = words[1] idata = 2 else: idata = 1 if words[idata].strip('"')=='': dataArray[words[0]] = '' else: dataArray[words[0]]=words[idata] return headerArray,typeArray,dataArray def splitDefLine(self,line=''): import re m = re.search(r'(.*)\"(.*)\"(.*)',line) if not m: return line.split() else: a,b,c = m.groups() a = a.strip() c = c.strip() rv = [] if a:rv.extend(a.split()) rv.append(b) if c: rv.append(c.split()) return rv def splitDefList(self,line=''): ''' Parse the INPUT_FILE and OUTPUT_FILE from database.def This is a space-separated list If the file name includes spaces then the name is enclosed in curly brace ''' import re rv = [] while len(line)>0: m = re.search(r'(.*?)\{(.*?)\}(.*)',line) if not m: rv.extend(line.split()) line = '' else: a,b,c = m.groups() #print 'splitDefList',a,'*',b,'*',c a = a.strip() c = c.strip() if a:rv.extend(a.split()) rv.append(b) line = c.strip() #print 'new line',line return rv def extractJobs(self,dataArray): jobs = {} for key,value in dataArray.items(): try: if key.count(',') == 1: dataType,jobId = key.split(',') try: jobId = int(jobId) except: jobId = -1 if jobId>0: # Initialise the data for one job if not jobs.has_key(jobId): jobs[jobId] = { 'STATUS' : '', 'DATE' : 0, 'LOGFILE' : '', 'TASKNAME' : '', 'TITLE' : '', 'INPUT_FILES' : [], 'INPUT_FILES_DIR' : [], 'OUTPUT_FILES' : [], 'OUTPUT_FILES_DIR' : [] } # Check that the dataType is one of the recognised properties for a job if jobs[jobId].has_key(dataType): if ['INPUT_FILES','OUTPUT_FILES'].count(dataType): jobs[jobId][dataType] = self.splitDefList(value) elif ['INPUT_FILES_DIR','OUTPUT_FILES_DIR'].count(dataType): jobs[jobId][dataType] = value.split(' ') else: jobs[jobId][dataType] = value except: print 'ERROR interpreting ',key return jobs