# get-ebi.py # Copyright 2005, 2006 by Bernhard Lohkamp # Copyright 2005, 2006 by Paul Emsley, The University of York # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . pdbe_server = "http://www.ebi.ac.uk" pdbe_pdb_file_dir = "pdbe-srv/view/files" pdbe_file_name_tail = "ent" # sf example http://www.ebi.ac.uk/pdbe-srv/view/files/r4hrhsf.ent # 20151126-PE No, we can't have coot-download created on coot-startup, it must be # made only when we need it. # global coot_tmp_dir # coot_tmp_dir = get_directory("coot-download") # e.g. (ebi-get-pdb "1crn") # # no useful return value # # Note that that for sf data, we need to construct something like the # string: http://oca.ebi.ac.uk/oca-bin/send-sf?r2acesf.ent.Z and we # don't need to strip any html (thank goodness). Also not that the # accession code now is lower case. # # data-type(here string) can be 'pdb' or 'sfs' (structure factors). # We might like to use # 'coordinates rather than 'pdb in the future. # # The optional argument imol-coords-arg-list is necessary for # ouptutting sfs, because we need coordinates from which we can # calculate phases. # # helper function to avoid downloading empty files # returns download filename upon success or False when fail # def coot_urlretrieve(url, file_name): """Helper function to avoid downloading empty files returns download filename upon success or False when fail.""" import urllib local_filename = False class CootURLopener(urllib.FancyURLopener): def http_error_default(self, url, fp, errcode, errmsg, headers): # handle errors the way you'd like to # we just pass pass opener = CootURLopener() try: local_filename, header = opener.retrieve(url, file_name) except: # we could catch more here, but dont bother for now print "BL WARNING:: retrieve of url %s failed" %url return local_filename # we dont need something like net-get-url in python # since we have build in functions like urlretrieve (in module urllib) # check the directory and get url url_string. # def check_dir_and_get_url(dir,file_name,url_string): import os,urllib # FIXME logic, can be done better if (os.path.isfile(dir) or os.path.isdir(dir)): if (os.path.isfile(dir)): print dir, " is atually a file and not a dir, so we can't write to it" else: if (os.path.isdir(dir)): coot_urlretrieve(url_string, file_name) else: print "ERROR:: Oops - Can't write to ", dir, " directory!" else: os.mkdir(dir) if (os.path.isdir(dir)): coot_urlretrieve(url_string, file_name) else: print "ERROR:: Oops - create-directory ",dir," failed!" # get url_string for data type (string actually) 'pdb' or 'sfs' # def get_url_str(id, url_string, data_type, imol_coords_arg_list): import operator #print "DEBUG:: in get_url_string:", id, url_string, data_type coot_tmp_dir = get_directory("coot-download") if (data_type == "pdb"): pdb_file_name = coot_tmp_dir + "/" + id + ".pdb." + pdbe_file_name_tail check_dir_and_get_url(coot_tmp_dir,pdb_file_name,url_string) imol_coords = handle_read_draw_molecule(pdb_file_name) return imol_coords if (data_type == "sfs"): sfs_file_name = coot_tmp_dir + "/" + id + ".cif" # print "BL DEBUG:: cif output file is: ",sfs_file_name imol_coords = imol_coords_arg_list if (operator.isNumberType(imol_coords) and imol_coords>=-1): check_dir_and_get_url(coot_tmp_dir, sfs_file_name, url_string) read_cif_data(sfs_file_name, imol_coords_arg_list) # do we need to return something here too?! # Get the pdb and sfs. @var{id} is the accession code # def get_ebi_pdb_and_sfs(id): import operator,string imol_coords = get_ebi_pdb(id) if (not operator.isNumberType(imol_coords)): print "Failed at reading coordinates. imol-coords was ",imol_coords if (imol_coords < 0): # -1 is coot code for failed read. print "failed to read coordinates." else: down_id = string.lower(id) url_str = pdbe_server + "/" + pdbe_pdb_file_dir + "/" + \ "r" + down_id + "sf." + \ pdbe_file_name_tail get_url_str(id, url_str, "sfs", imol_coords) # Return a molecule number on success # or not a number (False) or -1 on error. # def get_ebi_pdb(id): import urllib, string # print "======= id:", id down_id = string.lower(id) url_str = pdbe_server + "/" + pdbe_pdb_file_dir + "/" + down_id + \ "." + pdbe_file_name_tail imol_coords = get_url_str(id, url_str, "pdb", None) # e.g. http://ftp.ebi.ac.uk/pub/databases/pdb + # /validation_reports/cb/1cbs/1cbs_validation.xml.gz if valid_model_molecule_qm(imol_coords): pdb_validate(down_id, imol_coords) return imol_coords # Return a list of molecules (i.e. the model molecule and the 2 maps). # or, if it didn't work then return False # # @var{id} is the accession code. # # returns imol of read pdb or False on error. # # 20050725 EDS code # # return a list of 3 molecule numbers [imol, map, diff_map] or False # # def get_eds_pdb_and_mtz(id): import string import urllib # Gerard DVD Kleywegt says we can find the coords/mtz thusly: # # - model = http://eds.bmc.uu.se/eds/sfd/1cbs/pdb1cbs.ent # - mtz = http://eds.bmc.uu.se/eds/sfd/1cbs/1cbs_sigmaa.mtz # # 20091222 # - newprefix: http://eds.bmc.uu.se/eds/dfs/cb/1cbs/ # # URL:: "http://eds.bmc.uu.se/eds/sfd/sa/2sar/pdb2sar.ent" # URL:: "http://eds.bmc.uu.se/eds/sfd/sa/2sar/2sar_sigmaa.mtz" def get_cached_eds_files(accession_code): down_code = string.lower(accession_code) dir_name = get_directory("coot-download") pdb_file_name = os.path.join(dir_name, "pdb" + down_code + ".ent") mtz_file_name = os.path.join(dir_name, down_code + "_sigmaa.mtz") if not os.path.isfile(pdb_file_name): return False else: if not os.path.isfile(mtz_file_name): return False else: imol = read_pdb(pdb_file_name) imol_map = make_and_draw_map(mtz_file_name, "2FOFCWT", "PH2FOFCWT", "", 0, 0) imol_map_d = make_and_draw_map(mtz_file_name, "FOFCWT", "PHFOFCWT", "", 0, 1) if not (valid_model_molecule_qm(imol) and valid_map_molecule_qm(imol_map) and valid_map_molecule_qm(imol_map_d)): close_molecule(imol) close_molecule(imol_map) close_molecule(imol_map_d) return False else: return [imol, imol_map, imol_map_d] eds_site = "http://eds.bmc.uu.se/eds" eds_core = "http://eds.bmc.uu.se" # "1cbds" -> "cb/" # def mid_chars(id_code): if not id_code: # check for string? return "//fail//" if not (len(id_code) == 4): return "/FAIL/" else: return id_code[1:3] + "/" # main line # cached_status = get_cached_eds_files(id) if isinstance(cached_status, list): return cached_status else: coot_tmp_dir = get_directory("coot-download") r = coot_mkdir(coot_tmp_dir) if (r): down_id = string.lower(id) eds_url = eds_site + "/dfs/" target_pdb_file = "pdb" + down_id + ".ent" dir_target_pdb_file = coot_tmp_dir + "/" + target_pdb_file mc = mid_chars(down_id) model_url = eds_url + mc + down_id + "/" + target_pdb_file target_mtz_file = down_id + "_sigmaa.mtz" dir_target_mtz_file = coot_tmp_dir + "/" + target_mtz_file mtz_url = eds_url + mc + down_id + "/" + target_mtz_file eds_info_page = eds_core + "/cgi-bin/eds/uusfs?pdbCode=" + down_id try: pre_download_info = coot_get_url_as_string(eds_info_page) # print "INFO:: --------------- pre-download-info:", pre_download_info bad_map_status = "No reliable map available" in pre_download_info if "There is no structure factor entry" in pre_download_info: print "BL WARNING:: no sfs available for entry %s, so wont download." %id # no pdb and no mtz return False except: print "BL ERROR:: could not get pre_download_info from", eds_core # we probably wont get anything else, so bail out. return False s1 = coot_urlretrieve(model_url, dir_target_pdb_file) print "INFO:: read model status: ",s1 s2 = coot_urlretrieve(mtz_url, dir_target_mtz_file) print "INFO:: read mtz status: ",s2 if bad_map_status: s = "This map (" + down_id + \ ") is marked by the EDS as \"not a reliable map\"" info_dialog(s) # maybe should then not load the map!? r_imol = handle_read_draw_molecule(dir_target_pdb_file) map_1 = make_and_draw_map(dir_target_mtz_file, "2FOFCWT", "PH2FOFCWT","",0,0) map_2 = make_and_draw_map(dir_target_mtz_file, "FOFCWT", "PHFOFCWT", "", 0, 1) set_scrollable_map(map_1) if (valid_model_molecule_qm(r_imol)): return [r_imol, map_1, map_2] else: return False else: print "Can't make directory ",coot_tmp_dir # not sure if coot functio better or python script function coot_urlretrieve def net_get_url(my_url, file_name): coot_get_url(my_url, file_name) def get_pdb_redo(text): if not isinstance(text, str): print "BL WARNING:: No string. No accession code." else: if not (len(text) == 4): print "BL WARNING:: Accession code not 4 chars." else: text = string.lower(text) stub = "http://www.cmbi.ru.nl/pdb_redo/" + \ text[1:3] + \ "/" + text + "/" + text + "_final" pdb_file_name = text + "_final.pdb" mtz_file_name = text + "_final.mtz" py_file_name = text + ".py" url_pdb = stub + ".pdb" url_mtz = stub + ".mtz" url_py = stub + ".py" print "getting", url_pdb net_get_url(url_pdb, pdb_file_name) print "getting", url_mtz net_get_url(url_mtz, mtz_file_name) print "getting", url_py net_get_url(url_py, py_file_name) status_imol = read_pdb(pdb_file_name) if status_imol < 0: print "BL INFO:: problem opening pdb file. Most likely \ something went wrong in the download" else: print "make-and-draw-map with", mtz_file_name make_and_draw_map(mtz_file_name, "FWT", "PHWT", "", 0, 0) make_and_draw_map(mtz_file_name, "DELFWT", "PHDELWT", "", 0, 1) execfile(py_file_name) # BL says: to test, some examples #id = "2BSX" #get_ebi_pdb_and_sfs(id) #get_eds_pdb_and_mtz(id)