"""Classes to optionally build comparative models, and then refine the loops""" import sys from modeller.automodel.automodel import automodel from modeller.util.modobject import modobject from modeller import * from modeller.optimizers import conjugate_gradients from modeller.automodel import refine, autosched __docformat__ = "epytext en" class loopmodel(automodel): """Optionally build comparative models, and then refine the loops""" loop = None inimodel = None loop_potential = None _defined_indices = None def __init__(self, env, sequence, alnfile=None, knowns=[], inimodel=None, deviation=None, library_schedule=None, csrfile=None, inifile=None, assess_methods=None, loop_assess_methods=None): automodel.__init__(self, env, alnfile, knowns, sequence, deviation, library_schedule, csrfile, inifile, assess_methods) self.inimodel = inimodel self.loop = LoopData(env) self.loop.assess_methods = loop_assess_methods def make(self, exit_stage=0): """Build all models""" if self.loop.write_selection_only and self.final_malign3d: raise ValueError("Cannot turn on both loop.write_selection_only " "and final_malign3d; please turn off one or both") if self.inimodel: self.env = self.loop.env self.build_seq(self.inimodel, 1) else: automodel.make(self, exit_stage) self.write_summary(self.loop.outputs, 'loop models') def get_loop_model_filename(self, sequence, id1, id2, file_ext): """Returns the loop model PDB name - usually of the form foo.BL000X000Y.pdb""" return modfile.default(file_id='.BL', file_ext=file_ext, root_name=sequence, id1=id1, id2=id2) def multiple_models(self, atmsel): """Build all models, given the restraints and atom selection""" automodel.multiple_models(self, atmsel) envcopy = self.env self.env = self.loop.env try: for models in [a for a in self.outputs if a['failure'] is None]: num = models['num'] filename = models['name'] self.build_seq(filename, num) finally: self.env = envcopy def fit_models_on_template(self): """Superpose each of the generated models on the templates""" automodel.fit_models_on_template(self) for models in [a for a in self.outputs if a['failure'] is None]: num = models['num'] filename = models['name'] mdl = model(self.env, file=filename) aln = alignment(self.env) aln.append_model(mdl, align_codes=self.sequence) aln.append_model(mdl, align_codes=self.sequence) for loopmodels in [a for a in self.loop.outputs \ if a['failure'] is None and a['num'] == num]: filename = loopmodels['name'] id1 = loopmodels['loopnum'] mdl2 = model(self.env, file=filename) atmsel = selection(mdl).only_atom_types('CA') atmsel.superpose(mdl2, aln, fit=True) filename = self.get_loop_model_filename(self.sequence, id1, num, '_fit' + self.pdb_ext) mdl2.write(file=filename, model_format=self.output_model_format) def read_potential(self): """Reads in the group_restraints statistical potential used for loop modeling. Redefine if you want to use a different potential""" return group_restraints(self.env, classes='$(LIB)/atmcls-melo.lib', parameters='$(LIB)/melo1-dist.lib') def read_loop_alignment(self, filename): """Read in the alignment used to build loop models""" aln = alignment(self.env) for i in range(2): aln.append_model(self, align_codes=self.sequence, atom_files=filename) return aln def _check_for_added_atoms(self, loop_selection, defined_selection): # Atoms in the loop region always count as defined s = defined_selection | loop_selection # Note that we store atom indices rather than a selection object, # since selection objects cannot be sent across the network correctly # to slaves when building models in parallel. self._defined_indices, m = s.get_atom_indices() lenall = len(self.atoms) lendef = len(self._defined_indices) if lenall > lendef: s = selection(self) - s print(""" The following %d atoms were not found in the input model's non-loop region, and were added automatically by Modeller in order to determine correct interactions between the loop and the rest of the protein: %s The coordinates for these atoms will be constructed automatically using internal coordinates, and not relaxed, so clashes between these atoms and the rest of the protein may exist (note, however, that the score of the loop does not include protein-protein internal interactions, so will not be adversely affected by any clashes).""" \ % (lenall - lendef, ", ".join([repr(a) for a in s]))) if self.loop.write_defined_only: print(""" Output models will contain only the atoms present in the input model. If you want them to also contain the added atoms, set loopmodel.loop.write_defined_only=False. """) else: print(""" Output models will contain both the atoms present in the input model and the newly-added atoms. If you want them to only contain the atoms from the input model, set loopmodel.loop.write_defined_only=True. """) def create_loop_topology(self, aln, filename): """Build the initial topology of the loop""" self.clear_topology() self.generate_topology(aln[-1]) self.res_num_from(model(self.env, file=filename), aln) self.special_patches(aln) # Save self.seq_id, since otherwise transfer_xyz will set it to 100% seq_id = self.seq_id self.transfer_xyz(aln) self.seq_id = seq_id if not self.loop.write_selection_only: defined_selection = selection(self).only_defined() else: defined_selection = None # Fill in any missing atoms using internal coordinates self.build(build_method='INTERNAL_COORDINATES', initialize_xyz=False) return defined_selection def build_seq(self, filename, num): """Build all loop models for a given starting model""" self.csrfile = self.sequence + '.lrsr' self.read_top_par() if self.loop_potential is None: self.loop_potential = self.read_potential() oldgprsr = self.group_restraints self.group_restraints = None self.read(file=filename, model_format=self.output_model_format) self.group_restraints = self.loop_potential aln = self.read_loop_alignment(filename) defined_selection = self.create_loop_topology(aln, filename) atmsel = self._check_select_loop_atoms() if not self.loop.write_selection_only: self._check_for_added_atoms(atmsel, defined_selection) self.loop_restraints(atmsel, aln) # Select corresponding restraints only: # only necessary to eliminate inefficiencies in 'special_restraints' # because MAKE_RSRS works with selected atoms now: self.restraints.unpick_all() self.restraints.pick(atmsel) self.restraints.condense() self.restraints.write(file=self.csrfile) # Calculate energy for the original (raw) loop: self.env.edat.nonbonded_sel_atoms = 1 atmsel.energy() # Prepare the starting structure (comment it out if # the input PDB file is a better initial structure): self.build_ini_loop(atmsel) ini_model = "%s.IL%04d%04d%s" % (self.sequence, 0, num, self.pdb_ext) self.write(file=ini_model, model_format=self.output_model_format) sched = self.get_loop_schedule() if self.parallel_job is not None: self.parallel_loop_models(atmsel, ini_model, num, sched) else: self.multiple_loop_models(atmsel, ini_model, num, sched) self.group_restraints = oldgprsr def multiple_loop_models(self, atmsel, ini_model, num, sched): """Build all loop models for a given initial model""" for id1 in range(self.loop.starting_model, self.loop.ending_model + 1): r = self.single_loop_model(atmsel, ini_model, num, id1, sched) self.loop.outputs.append(r) def parallel_loop_models(self, atmsel, ini_model, num, sched): """Build all loop models for a given model using a parallel job""" from modeller.automodel.parallel import looptask job = self.parallel_job for id1 in range(self.loop.starting_model, self.loop.ending_model + 1): job.queue_task(looptask(self, atmsel, ini_model, num, id1, sched)) self.loop.outputs.extend(job.run_all_tasks()) def get_loop_schedule(self): """Get loop optimization schedule""" libsched = self.loop.library_schedule return libsched.make_for_model(self) * self.env.schedule_scale def new_loop_trace_file(self, id1, id2): """Open a new loop optimization trace file""" if self.trace_output > 0: filename = modfile.default(file_ext='', file_id='.DL', root_name=self.sequence, id1=id1, id2=id2) return open(filename, 'w') else: return None def get_loop_actions(self): """Get actions to carry out during loop optimization""" return self.get_optimize_actions() def read_ini_loop_model(self, ini_model): """Read in the initial loop model""" # Make sure we read every atom, since when we write out the model, it # writes out every atom, not just non-HET/non-hydrogen/non-water io = io_data(copy=self.env.io) io.hetatm = io.water = io.hydrogen = True self.read(file=ini_model, io=io, model_format=self.output_model_format) def single_loop_model(self, atmsel, ini_model, num, id1, sched, parallel=False): """Build a single loop model""" self.tracefile = self.new_loop_trace_file(id1, num) if parallel: self.read_top_par() self.read_ini_loop_model(ini_model) aln = self.read_loop_alignment(ini_model) self.create_loop_topology(aln, ini_model) else: self.read_ini_loop_model(ini_model) atmsel.randomize_xyz(deviation=5.0) if parallel: self.group_restraints = self.read_potential() self.rd_restraints() filename = self.get_loop_model_filename(self.sequence, id1, num, self.pdb_ext) out = {'name':filename, 'loopnum':id1, 'num':num, 'failure':None} actions = self.get_loop_actions() try: # Refine without the rest of the protein: self.env.edat.nonbonded_sel_atoms = 2 self.optimize_loop(atmsel, sched, actions) # Refine in the context of the rest of the protein: self.env.edat.nonbonded_sel_atoms = 1 self.optimize_loop(atmsel, sched, actions) (out['molpdf'], out['pdfterms']) = atmsel.energy() self.to_iupac() except (ModellerError, OverflowError): detail = sys.exc_info()[1] if len(str(detail)) > 0: out['failure'] = detail else: out['failure'] = 'Optimization failed' del detail else: self.loop_model_analysis(atmsel, ini_model, filename, out, id1, num) del self.tracefile return out def _get_defined_selection(self): s = selection() for i in self._defined_indices: s.add(self.atoms[i-1]) return s def loop_model_analysis(self, atmsel, ini_model, filename, out, id1, num): """Energy evaluation and assessment, and write out the loop model""" # Preserve molpdf and biso (assessment might overwrite them) pdf = self.last_energy biso = [a.biso for a in self.atoms] self.user_after_single_loop_model() # Do model assessment if requested assess_keys = self.assess(atmsel, self.loop.assess_methods, out) # Restore molpdf and biso self.last_energy = pdf for a, b in zip(self.atoms, biso): a.biso = b extra_data = self.get_extra_loop_model_data(assess_keys, out) if self.loop.write_selection_only: self.select_loop_atoms().write(file=filename, model_format=self.output_model_format, extra_data=extra_data) elif self.loop.write_defined_only: self._get_defined_selection().write(file=filename, model_format=self.output_model_format, extra_data=extra_data) else: self.write(file=filename, model_format=self.output_model_format, extra_data=extra_data) if self.accelrys: # Accelrys wants their analysis *after* the model is written, so # that the written-out model keeps the original template-derived # Biso, rather than getting the energy profile Biso: self.loop_model_analysis_accelrys(ini_model, id1, num) def get_extra_loop_model_data(self, assess_keys, out): """Get extra data to insert into mmCIF or PDB output files""" if self.output_model_format == 'MMCIF': return self.get_scores_model_data(assess_keys, out, "_modeller.%s %s", "_", spacer="") else: return self.get_scores_model_data(assess_keys, out, "REMARK 6 %s: %s", " ", spacer="") def loop_model_analysis_accelrys(self, ini_model, id1, num): """Additional model analysis for Accelrys build""" # select all atoms allat = selection(self) # write total violations for (id, norm) in (('.EL', False), ('.NEL', True)): allat.energy(output='LONG ENERGY_PROFILE', normalize_profile=norm, file=modfile.default(file_id=id, file_ext='', root_name=self.sequence, id1=id1, id2=num)) def user_after_single_loop_model(self): """Used for any user analysis after building each loop model. Redefine as you see fit.""" pass def build_ini_loop(self, atmsel): """Create the initial conformation of the loop. By default we place all atoms on a line between the loop termini, but you may want to use a different conformation, in which case you should redefine this routine. For example, if you want to leave the initial PDB file untouched, use a one-line 'pass' routine.""" atmsel.unbuild() self.build(build_method='3D_INTERPOLATION', initialize_xyz=False) def build_charmm_loop_restraints(self, atmsel, rsr, aln): """Build loop restraints from CHARMM libraries""" dih_lib_only = True mnch_lib = 1 res_at = 1 for typ in ('bond', 'angle', 'improper', 'dihedral', 'phi-psi_binormal'): rsr.make(atmsel, aln=aln, restraint_type=typ, spline_on_site=self.spline_on_site, dih_lib_only=dih_lib_only, mnch_lib=mnch_lib, restraint_sel_atoms=res_at) for typ in ('omega', 'chi1', 'chi2', 'chi3', 'chi4'): rsr.make(atmsel, aln=aln, restraint_type=typ+'_dihedral', spline_on_site=self.spline_on_site, dih_lib_only=dih_lib_only, mnch_lib=mnch_lib, restraint_sel_atoms=res_at, spline_range=4.0, spline_dx=0.3, spline_min_points=5) def loop_restraints(self, atmsel, aln): """Construct restraints for loop modeling""" rsr = self.restraints rsr.clear() if self.loop.library_restraints is not None: self.build_library_restraints(atmsel, rsr, self.loop.library_restraints) else: self.build_charmm_loop_restraints(atmsel, rsr, aln) self.special_restraints(aln) def _check_select_loop_atoms(self): """Get loop atoms to be refined, and check for sanity""" atmsel = self.select_loop_atoms() if not hasattr(atmsel, "get_atom_indices"): raise ModellerError("you must return a selection object " + "from select_loop_atoms") elif len(atmsel) == 0: raise ModellerError("no atoms selected for loop refinement") elif atmsel.get_model() is not self: raise ModellerError("selection is defined on the wrong model") elif len(atmsel.only_no_topology()) > 0: raise ModellerError("some selected residues have no topology") else: print("%d atoms selected for loop refinement" % len(atmsel)) return atmsel def optimize_loop(self, atmsel, sched, actions): """Optimize the geometry of a single loop model""" for step in sched: step.optimize(atmsel, max_iterations=self.loop.max_var_iterations, output=self.optimize_output, min_atom_shift=0.001, actions=actions) if self.loop.md_level: self.loop.md_level(atmsel, actions) cg = conjugate_gradients() cg.optimize(atmsel, max_iterations=1000, output=self.optimize_output, min_atom_shift=0.00001, actions=actions) def select_loop_atoms(self): """The default loop atom selection routine. This selects all atoms near gaps in the alignment. You can redefine this routine to select a different region, and in fact this is necessary if you are refining a PDB file, as no alignment is available in this case.""" if len(self.knowns) == 0: raise ModellerError("No alignment: you must redefine " + "select_loop_atoms") aln = self.read_alignment() loops = self.loops(aln, minlength=5, maxlength=15, insertion_ext=2, deletion_ext=1) sel = selection(loops).only_std_residues() if len(sel) == 0: raise ModellerError("No loops detected for refinement: " + "you must redefine select_loop_atoms") return sel class LoopData(modobject): """Methods and data unique to loop modeling""" starting_model = 1 ending_model = 1 write_selection_only = False write_defined_only = False md_level = None env = None outputs = None assess_methods = None library_restraints = None max_var_iterations = 200 library_schedule = autosched.loop def __init__(self, env): self.md_level = refine.slow self.env = env.copy() self.env.edat.contact_shell = 7.0 self.env.edat.dynamic_modeller = True self.env.edat.dynamic_sphere = True self.outputs = [] def use_library_restraints(self, librestraints): """Use a restraints library""" self.library_restraints = librestraints