{+ file: generate_easy.inp +} {+ directory: general +} {+ description: Generate coordinate and structure file for simple models +} {+ comment: This is designed to be a means of generating a coordinate and structure file for commonly encountered models: protein and/or DNA/RNA plus waters and maybe ligands. The coordinates are provided by the user in a single input PDB file. Disulphide bonds will be automatically determined by distance. If required generate hydrogens. Any atoms with unknown coordinates can be automatically generated +} {+ authors: Paul Adams and Axel Brunger +} {+ copyright: Yale University +} {- Guidelines for using this file: - all strings must be quoted by double-quotes - logical variables (true/false) are not quoted - do not remove any evaluate statements from the file -} {- Special patches will have to be entered manually at the relevant points in the file - see comments throughout the file -} {- begin block parameter definition -} define( {============================== important =================================} {* Different chains in the structure must have either unique segid or chainid records. If this is no the case, the end of a chain must be delimited by a TER card. *} {* A break in a chain can be detected automatically or should be delimited by a BREAK card. In this case no patch (head, tail or link) will be applied between the residues that bound the chain break. *} {* NB. The input PDB file must finish with an END statement *} {=========================== coordinate files =============================} {* coordinate file *} {===>} coordinate_infile="fab2hfl.pdb"; {* convert chainid to segid if chainid is non-blank *} {+ choice: true false +} {===>} convert=true; {* separate chains by segid - a new segid starts a new chain *} {+ choice: true false +} {===>} separate=true; {============================ renaming atoms ===============================} {* some atoms may need to be renamed in the topology database to conform to what is present in the coordinate file *} {* delta carbon in isoleucine is named CD in CNS what is it currently called in the coordinate file? *} {* this will not be changed if left blank *} {===>} ile_CD_becomes="CD1"; {* terminal oxygens are named OT1 and OT2 in CNS what are they currently called in the coordinate file? *} {* these will not be changed if left blank *} {===>} OT1_becomes="O"; {===>} OT2_becomes="OXT"; {======================= automatic mainchain breaks ========================} {* automatically detect mainchain breaks in proteins based on distance *} {* the peptide link at break points will be removed *} {+ choice: true false +} {===>} auto_break=true; {* cutoff distance in Angstroms for identification of breaks *} {* the default of 2.5A should be reasonable for most cases. If the input structure has bad geometry it may be necessary to increase this distance *} {===>} break_cutoff=2.5; {* file containing patches to delete peptide links *} {===>} prot_break_infile="CNS_TOPPAR:protein_break.top"; {======================= automatic disulphide bonds ========================} {* cutoff distance in Angstroms for identification of disulphides *} {* the default of 3.0A should be reasonable for most cases. If the input structure has bad geometry it may be necessary to increase this distance *} {===>} disulphide_dist=3.0; {========================= RNA to DNA conversion ==========================} {* All nucleic acid residues initially have ribose sugars (rather than deoxyribose). A patch must be applied to convert the ribose to deoxyribose for DNA residues. Select those residues which need to have the patch applied to make them DNA. *} {* Make sure that the atom selection is specific for the nucleic acid residues *} {===>} dna_sele=(none); {========================= generate parameters =============================} {* hydrogen flag - determines whether hydrogens will be output *} {* must be true for NMR, atomic resolution X-ray crystallography or modelling. Set to false for most X-ray crystallographic applications at resolution > 1A *} {+ choice: true false +} {===>} hydrogen_flag=false; {* which hydrogens to build *} {+ choice: "all" "unknown" +} {===>} hydrogen_build="all"; {* selection of atoms other than hydrogens for which coordinates will be generated *} {* to generate coordinates for all unknown atoms use: (not(known)) *} {===>} atom_build=(not(known)); {* selection of atoms to be deleted *} {* to delete no atoms use: (none) *} {===>} atom_delete=(none); {* set bfactor flag *} {+ choice: true false +} {===>} set_bfactor=false; {* set bfactor value *} {===>} bfactor=15.0; {* set occupancy flag *} {+ choice: true false +} {===>} set_occupancy=false; {* set occupancy value *} {===>} occupancy=1.0; {============================= output files ================================} {* output structure file *} {===>} structure_outfile="generate_easy.mtf"; {* output coordinate file *} {===>} coordinate_outfile="generate_easy.pdb"; {* format output coordinates for use in o *} {* if false then the default CNS output coordinate format will be used *} {+ choice: true false +} {===>} pdb_o_format=true; {================== protein topology and parameter files ===================} {* protein topology file *} {===>} prot_topology_infile="CNS_TOPPAR:protein.top"; {* protein linkage file *} {===>} prot_link_infile="CNS_TOPPAR:protein.link"; {* protein parameter file *} {===>} prot_parameter_infile="CNS_TOPPAR:protein_rep.param"; {================ nucleic acid topology and parameter files =================} {* nucleic acid topology file *} {===>} nucl_topology_infile="CNS_TOPPAR:dna-rna.top"; {* nucleic acid linkage file *} {* use CNS_TOPPAR:dna-rna-pho.link for 5'-phosphate *} {===>} nucl_link_infile="CNS_TOPPAR:dna-rna.link"; {* nucleic acid parameter file *} {===>} nucl_parameter_infile="CNS_TOPPAR:dna-rna_rep.param"; {=================== water topology and parameter files ====================} {* water topology file *} {===>} water_topology_infile="CNS_TOPPAR:water.top"; {* water parameter file *} {===>} water_parameter_infile="CNS_TOPPAR:water_rep.param"; {================= carbohydrate topology and parameter files ===============} {* carbohydrate topology file *} {===>} carbo_topology_infile="CNS_TOPPAR:carbohydrate.top"; {* carbohydrate parameter file *} {===>} carbo_parameter_infile="CNS_TOPPAR:carbohydrate.param"; {============= prosthetic group topology and parameter files ===============} {* prosthetic group topology file *} {===>} prost_topology_infile=""; {* prosthetic group parameter file *} {===>} prost_parameter_infile=""; {=================== ligand topology and parameter files ===================} {* ligand topology file *} {===>} ligand_topology_infile=""; {* ligand parameter file *} {===>} ligand_parameter_infile=""; {===================== ion topology and parameter files ====================} {* ion topology file *} {===>} ion_topology_infile="CNS_TOPPAR:ion.top"; {* ion parameter file *} {===>} ion_parameter_infile="CNS_TOPPAR:ion.param"; {===========================================================================} { things below this line do not need to be changed } {===========================================================================} ) {- end block parameter definition -} checkversion 1.2 evaluate ($log_level=quiet) topology if ( &BLANK%prot_topology_infile = false ) then @@&prot_topology_infile end if if ( &BLANK%nucl_topology_infile = false ) then @@&nucl_topology_infile end if if ( &BLANK%water_topology_infile = false ) then @@&water_topology_infile end if if ( &BLANK%carbo_topology_infile = false ) then @@&carbo_topology_infile end if if ( &BLANK%prost_topology_infile = false ) then @@&prost_topology_infile end if if ( &BLANK%ligand_topology_infile = false ) then @@&ligand_topology_infile end if if ( &BLANK%ion_topology_infile = false ) then @@&ion_topology_infile end if end topology if ( &BLANK%prot_break_infile = false ) then @@&prot_break_infile end if end parameter if ( &BLANK%prot_parameter_infile = false ) then @@&prot_parameter_infile end if if ( &BLANK%nucl_parameter_infile = false ) then @@&nucl_parameter_infile end if if ( &BLANK%water_parameter_infile = false ) then @@&water_parameter_infile end if if ( &BLANK%carbo_parameter_infile = false ) then @@&carbo_parameter_infile end if if ( &BLANK%prost_parameter_infile = false ) then @@&prost_parameter_infile end if if ( &BLANK%ligand_parameter_infile = false ) then @@&ligand_parameter_infile end if if ( &BLANK%ion_parameter_infile = false ) then @@&ion_parameter_infile end if end segment chain if ( &convert = true ) then convert=true end if if ( &separate = true ) then separate=true end if @@&prot_link_infile @@&nucl_link_infile coordinates @@&coordinate_infile end end if ( &BLANK%ile_CD_becomes = false ) then do (name=&ile_CD_becomes) (resname ILE and name CD) end if if ( &BLANK%OT1_becomes = false ) then do (name=&OT1_becomes) (name OT1) end if if ( &BLANK%OT2_becomes = false ) then do (name=&OT2_becomes) (name OT2) end if coordinates if ( &convert = true ) then convert=true end if @@&coordinate_infile set echo=off end show sum(1) ( not(hydrogen) and not(known) ) if ( $select = 0 ) then display %INFO: There are no coordinates missing for non-hydrogen atoms end if set echo=on end if ( $log_level = verbose ) then set message=normal echo=on end else set message=off echo=off end end if if ( &auto_break = true ) then evaluate ($break=0) for $id1 in id ( name C and bondedto(name CA) and bondedto(name O) ) loop break show (segid) (id $id1) evaluate ($segid1=$result) show (resid) (id $id1) evaluate ($resid1=$result) show (resname) (id $id1) evaluate ($resname1=$result) show sum(1) (id $id1 and known) if ( $result = 0 ) then display unknown coordinates for segid $segid1 resname $resname1 resid $resid1 name C display this coordinate must be known for automatic chain break detection abort end if identity (store1) ( name N and bondedto( segid $segid1 and resid $resid1 and name c ) ) if ( $select = 1 ) then show element (store1) (attribute store1 > 0) evaluate ($id2=$result) show (segid) (id $id2) evaluate ($segid2=$result) show (resid) (id $id2) evaluate ($resid2=$result) show (resname) (id $id2) evaluate ($resname2=$result) show sum(1) (id $id2 and known) if ( $result = 0 ) then display unknown coordinates for segid $segid2 resname $resname2 resid $resid2 name N display this coordinate must be known for automatic chain break detection abort end if pick bond (name c and segid $segid1 and resid $resid1) (name n and segid $segid2 and resid $resid2) geometry if ( $result > &break_cutoff ) then evaluate ($break=$break+1) evaluate ($seg1.$break=$segid1) evaluate ($res1.$break=$resid1) evaluate ($seg2.$break=$segid2) evaluate ($res2.$break=$resid2) if ( $resname2 = PRO ) then evaluate ($patch.$break=DPPP) elseif ( $resname2 = CPR ) then evaluate ($patch.$break=DPPP) else evaluate ($patch.$break=DPEP) end if end if end if end loop break evaluate ($counter=1) while ($counter <= $break) loop delete patch $patch.$counter reference=-=(segid $seg1.$counter and resid $res1.$counter) reference=+=(segid $seg2.$counter and resid $res2.$counter) end buffer message display peptide link removed (applied $patch.$counter): from \ $seg1.$counter[a4] $res1.$counter[a4] to $seg2.$counter[a4] $res2.$counter[a4] end evaluate ($counter=$counter+1) end loop delete end if evaluate ($disu=0) for $id1 in id ( resname CYS and name SG ) loop dis1 show (segid) (id $id1) evaluate ($segid1=$result) show (resid) (id $id1) evaluate ($resid1=$result) identity (store1) (all) for $id2 in id ( resname CYS and name SG and ( attr store1 > $id1 ) ) loop dis2 show (segid) (id $id2) evaluate ($segid2=$result) show (resid) (id $id2) evaluate ($resid2=$result) pick bond (id $id1) (id $id2) geometry if ( $result <= &disulphide_dist ) then evaluate ($disu=$disu+1) evaluate ($seg1.$disu=$segid1) evaluate ($seg2.$disu=$segid2) evaluate ($res1.$disu=$resid1) evaluate ($res2.$disu=$resid2) end if end loop dis2 end loop dis1 evaluate ($counter=1) while ( $counter <= $disu ) loop disu patch disu reference=1=(segid $seg1.$counter and resid $res1.$counter) reference=2=(segid $seg2.$counter and resid $res2.$counter) end buffer message display disulphide added: from \ $seg1.$counter[a4] $res1.$counter[a4] to $seg2.$counter[a4] $res2.$counter[a4] end evaluate ($counter=$counter+1) end loop disu {- patching of RNA to DNA -} evaluate ($counter=0) for $id in id ( tag and (&dna_sele) ) loop dna evaluate ($counter=$counter+1) show (segid) (id $id) evaluate ($dna.segid.$counter=$result) show (resid) (id $id) evaluate ($dna.resid.$counter=$result) end loop dna evaluate ($dna.num=$counter) evaluate ($counter=0) while ($counter < $dna.num) loop dnap evaluate ($counter=$counter+1) patch deox reference=nil=(segid $dna.segid.$counter and resid $dna.resid.$counter) end end loop dnap set messages=normal end set echo=on end if (&hydrogen_flag=false) then delete selection=( hydrogen ) end end if delete selection=( &atom_delete ) end identity (store1) (none) identity (store1) (&atom_build) if ( &hydrogen_build = "all" ) then identity (store1) (store1 or hydrogen) elseif ( &hydrogen_build = "unknown" ) then identity (store1) (store1 or (not(known) and hydrogen)) end if show sum(1) (store1) evaluate ($tobuild=$result) if ( $tobuild > 0 ) then fix selection=(not(store1)) end show sum(1) (store1) evaluate ($moving=$result) if ( $moving > 0 ) then for $id in id (tag and byres(store1)) loop avco show ave(x) (byres(id $id) and known) evaluate ($ave_x=$result) show ave(y) (byres(id $id) and known) evaluate ($ave_y=$result) show ave(z) (byres(id $id) and known) evaluate ($ave_z=$result) do (x=$ave_x) (byres(id $id) and store1) do (y=$ave_y) (byres(id $id) and store1) do (z=$ave_z) (byres(id $id) and store1) end loop avco do (x=x+random(2.0)) (store1) do (y=y+random(2.0)) (store1) do (z=z+random(2.0)) (store1) {- start parameter for the side chain building -} parameter nbonds rcon=20. nbxmod=-2 repel=0.9 wmin=0.1 tolerance=1. rexp=2 irexp=2 inhibit=0.25 end end {- Friction coefficient, in 1/ps. -} do (fbeta=100) (store1) evaluate ($bath=300.0) evaluate ($nstep=500) evaluate ($timestep=0.0005) do (refy=mass) (store1) do (mass=20) (store1) igroup interaction (store1) (store1 or known) end {- turn on initial energy terms -} flags exclude * include bond angle vdw end minimize powell nstep=50 nprint=10 end do (vx=maxwell($bath)) (store1) do (vy=maxwell($bath)) (store1) do (vz=maxwell($bath)) (store1) flags exclude vdw include impr end dynamics cartesian nstep=50 timestep=$timestep tcoupling=true temperature=$bath nprint=$nstep cmremove=false end flags include vdw end minimize powell nstep=50 nprint=10 end do (vx=maxwell($bath)) (store1) do (vy=maxwell($bath)) (store1) do (vz=maxwell($bath)) (store1) dynamics cartesian nstep=50 timestep=$timestep tcoupling=true temperature=$bath nprint=$nstep cmremove=false end parameter nbonds rcon=2. nbxmod=-3 repel=0.75 end end minimize powell nstep=100 nprint=25 end do (vx=maxwell($bath)) (store1) do (vy=maxwell($bath)) (store1) do (vz=maxwell($bath)) (store1) dynamics cartesian nstep=$nstep timestep=$timestep tcoupling=true temperature=$bath nprint=$nstep cmremove=false end {- turn on all energy terms -} flags include dihe ? end {- set repel to ~vdw radii -} parameter nbonds repel=0.89 end end minimize powell nstep=500 nprint=50 end flags exclude * include bond angl impr dihe vdw end {- return masses to something sensible -} do (mass=refy) (store1) do (vx=maxwell($bath)) (store1) do (vy=maxwell($bath)) (store1) do (vz=maxwell($bath)) (store1) dynamics cartesian nstep=$nstep timestep=$timestep tcoupling=true temperature=$bath nprint=$nstep cmremove=false end {- some final minimisation -} minimize powell nstep=500 drop=40.0 nprint=50 end print thres=0.02 bonds print thres=5. angles end if fix selection=( none ) end end if set echo=false end show sum(1) (not(known)) if ( $result < 100 ) then for $id in id (not(known)) loop print show (segid) (id $id) evaluate ($segid=$result) show (resname) (id $id) evaluate ($resname=$result) show (resid) (id $id) evaluate ($resid=$result) show (name) (id $id) evaluate ($name=$result) buffer message display unknown coordinates for atom: $segid[a4] $resname[a4] $resid[a4] $name[a4] end end loop print else buffer message display unknown coordinates for more than 100 atoms end end if set echo=true end if (&set_bfactor=true) then do (b=&bfactor) ( all ) else show ave(b) (known and not(store1)) do (b=$result) (store1 and (attr b < 0.01)) end if if (&set_occupancy=true) then do (q=&occupancy) ( all ) end if set echo=false end show sum(1) (store1) if ( $result < 100 ) then for $id in id (store1) loop print show (segid) (id $id) evaluate ($segid=$result) show (resname) (id $id) evaluate ($resname=$result) show (resid) (id $id) evaluate ($resid=$result) show (name) (id $id) evaluate ($name=$result) buffer message display coordinates built for atom: $segid[a4] $resname[a4] $resid[a4] $name[a4] end end loop print else buffer message display coordinates built for more than 100 hundred atoms end end if set echo=true end set remarks=reset end buffer message to=remarks dump end write structure output=&structure_outfile end if ( &pdb_o_format = true ) then write coordinates format=PDBO output=&coordinate_outfile end else write coordinates output=&coordinate_outfile end end if stop