{+ file: generate_easy.inp +} {+ directory: general +} {+ description: Generate a molecular topology file and/or generate coordinates for unknown atoms +} {+ comment: The purpose of this file is two-fold. (1) to generate positions of a few unknown atoms, and (2) to generate molecular topology file. The molecular topology file may be used in subsequent CNS input files, but this is usually not necessary if the automatic topology file generation option is available. Notes: 1. Each distinct protein, DNA, or RNA chain must have a separate segid (or chainid if the chainid is non-blank and the "convert" option is set to true). 2. Each contiguous protein, RNA, or RNA chain must not be disrupted by other types of residues or ligands. Rather, these other residues should be listed after protein, RNA/DNA chains. 3. disulphides are automatically detected based on distances between the sulfur atoms (must be less than 3 A apart). 4. broken polypeptide or nucleotide chains without terminii must be more than 2.5 A apart to be recognized as such. 5. N-linked glycan links are automatically recognized if the bonded atoms are less than 2.5 A apart. 7. PDB atom naming: O for NDG/A2G 8. Automatic generation cannot be used with alternate conformations. 9. For ligands, the user must make suitable topology and parameter files. 10. For non-standard covalent linkages, the custom patch file should be used. If any of these conditions is not met, change the default parameters below or use the generate.inp task file. If specified this script generates hydrogens and atoms with unknown coordinates +} {+ authors: Paul Adams and Axel Brunger +} {+ copyright: Yale University +} {- Guidelines for using this file: - all strings must be quoted by double-quotes - logical variables (true/false) are not quoted - do not remove any evaluate statements from the file -} {- Special patches will have to be entered manually at the relevant points in the file - see comments throughout the file -} {- begin block parameter definition -} define( {============================== important =================================} {* Different chains in the structure must have either unique segid or chainid records. *} {* NB. The input PDB file must finish with an END statement!!! *} {=========================== coordinate file ==============================} {* coordinate file *} {===>} coordinate_infile="fab2hfl_break.pdb"; {* convert chainid to segid if chainid is non-blank *} {+ choice: true false +} {===>} convert=true; {* separate chains by segid - a new segid starts a new chain *} {+ choice: true false +} {===>} separate=true; {============ protein topology, linkage, and parameter files =============} {* topology files *} {===>} topology_infile_1="CNS_TOPPAR:protein.top"; {===>} topology_infile_2="CNS_TOPPAR:dna-rna.top"; {===>} topology_infile_3="CNS_TOPPAR:water.top"; {===>} topology_infile_4="CNS_TOPPAR:ion.top"; {===>} topology_infile_5="CNS_TOPPAR:carbohydrate.top"; {===>} topology_infile_6=""; {===>} topology_infile_7=""; {===>} topology_infile_8=""; {* linkage files for linear, continuous polymers (protein, DNA, RNA) *} {===>} link_infile_1="CNS_TOPPAR:protein.link"; {===>} link_infile_2="CNS_TOPPAR:dna-rna-pho.link"; {===>} link_infile_3=""; {* parameter files *} {===>} parameter_infile_1="CNS_TOPPAR:protein_rep.param"; {===>} parameter_infile_2="CNS_TOPPAR:dna-rna_rep.param"; {===>} parameter_infile_3="CNS_TOPPAR:water_rep.param"; {===>} parameter_infile_4="CNS_TOPPAR:ion.param"; {===>} parameter_infile_5="CNS_TOPPAR:carbohydrate.param"; {===>} parameter_infile_6=""; {===>} parameter_infile_7=""; {===>} parameter_infile_8=""; {====================== other linkages and modifications ==================} {* extra linkages and modifications by custom patches *} {===>} patch_infile=""; {============================ renaming atoms ===============================} {* some atoms may need to be renamed in the topology database to conform to what is present in the coordinate file *} {* CNS uses O5 for NDG and A2G carbohydrates. PDB users O. What is it currently called in the coordinate file? *} {* this will not be changed if left blank *} {===>} O5_becomes="O"; {======================= automatic mainchain breaks ========================} {* automatically detect mainchain breaks in proteins/DNA/RNA based on distance. *} {* Specify the cutoff distance in Angstroms for identification of breaks. The default of 2.5 A should be reasonable for most cases. If the input structure has bad geometry it may be necessary to increase this distance. To turn off, set to zero. *} {===>} break_cutoff=2.5; {======================= automatic disulphide bonds ========================} {* cutoff distance in Angstroms for identification of disulphides *} {* The default of 3.0 A should be reasonable for most cases. If the input structure has bad geometry it may be necessary to increase this distance. To turn off set to zero. *} {* Note: the atom positions for the sulfurs have to be known. *} {===>} disulphide_dist=3.0; {======================= automatic N-linked glycans ========================} {* cutoff distance in Angstroms for identification of linkages in N-linked glycans *} {* The default of 2.5 A should be reasonable for most cases. To turn off set to zero. *} {* Note: the atom positions for the bonded atoms have to be known. *} {===>} carbo_dist=2.5; {========================= generate parameters =============================} {* hydrogen flag - determines whether hydrogens will be output *} {* must be true for NMR, atomic resolution X-ray crystallography or modelling. Set to false for X-ray crystallographic applications in the initial stages of refinement. *} {+ choice: true false +} {===>} hydrogen_flag=false; {* selection of atoms to be deleted *} {* to delete no atoms use: (none) *} {===>} atom_delete=(not known); {* selection of atoms for which coordinates will be (re-)generated *} {* to generate coordinates for all unknown atoms use: (not known) and set the "selection of atoms to be deleted" to (none). *} {===>} atom_build=(none); {* set bfactor flag *} {+ choice: true false +} {===>} set_bfactor=false; {* set bfactor value *} {===>} bfactor=15.0; {* set occupancy flag *} {+ choice: true false +} {===>} set_occupancy=false; {* set occupancy value *} {===>} occupancy=1.0; {============================= output files ================================} {* output structure file *} {===>} structure_outfile="generate_easy.mtf"; {* output coordinate file *} {===>} coordinate_outfile="generate_easy.pdb"; {===========================================================================} { things below this line do not need to be changed } {===========================================================================} ) {- end block parameter definition -} checkversion 1.3 evaluate ($log_level=quiet) set message=off echo=off end {- read topology files -} topology evaluate ($counter=1) evaluate ($done=false) while ( $done = false ) loop read if ( &exist_topology_infile_$counter = true ) then if ( &BLANK%topology_infile_$counter = false ) then @@&topology_infile_$counter end if else evaluate ($done=true) end if evaluate ($counter=$counter+1) end loop read end @CNS_XTALMODULE:mtfautogenerate ( coordinate_infile=&coordinate_infile; convert=&convert; separate=&separate; atom_delete=&atom_delete; hydrogen_flag=&hydrogen_flag; break_cutoff=&break_cutoff; disulphide_dist=&disulphide_dist; carbo_dist=&carbo_dist; patch_infile=&patch_infile; O5_becomes=&O5_becomes; ) {- read parameter files -} parameter evaluate ($counter=1) evaluate ($done=false) while ( $done = false ) loop read if ( &exist_parameter_infile_$counter = true ) then if ( &BLANK%parameter_infile_$counter = false ) then @@¶meter_infile_$counter end if else evaluate ($done=true) end if evaluate ($counter=$counter+1) end loop read end {- build atoms if requested -} identity (store1) (&atom_build) show sum(1) (store1) evaluate ($tobuild=$result) if ( $tobuild > 0 ) then fix selection=(not(store1)) end for $id in id (tag and byres(store1)) loop avco show ave(x) (byres(id $id) and known) evaluate ($ave_x=$result) show ave(y) (byres(id $id) and known) evaluate ($ave_y=$result) show ave(z) (byres(id $id) and known) evaluate ($ave_z=$result) do (x=$ave_x) (byres(id $id) and store1) do (y=$ave_y) (byres(id $id) and store1) do (z=$ave_z) (byres(id $id) and store1) end loop avco do (x=x+random(2.0)) (store1) do (y=y+random(2.0)) (store1) do (z=z+random(2.0)) (store1) {- start parameter for the side chain building -} parameter nbonds rcon=20. nbxmod=-2 repel=0.9 wmin=0.1 tolerance=1. rexp=2 irexp=2 inhibit=0.25 end end {- Friction coefficient, in 1/ps. -} do (fbeta=100) (store1) evaluate ($bath=300.0) evaluate ($nstep=500) evaluate ($timestep=0.0005) do (refy=mass) (store1) do (mass=20) (store1) igroup interaction (store1) (store1 or known) end {- turn on initial energy terms -} flags exclude * include bond angle vdw end minimize lbfgs nstep=50 nprint=10 end do (vx=maxwell($bath)) (store1) do (vy=maxwell($bath)) (store1) do (vz=maxwell($bath)) (store1) flags exclude vdw include impr end dynamics cartesian nstep=50 timestep=$timestep tcoupling=true temperature=$bath nprint=$nstep cmremove=false end flags include vdw end minimize lbfgs nstep=50 nprint=10 end do (vx=maxwell($bath)) (store1) do (vy=maxwell($bath)) (store1) do (vz=maxwell($bath)) (store1) dynamics cartesian nstep=50 timestep=$timestep tcoupling=true temperature=$bath nprint=$nstep cmremove=false end parameter nbonds rcon=2. nbxmod=-3 repel=0.75 end end minimize lbfgs nstep=100 nprint=25 end do (vx=maxwell($bath)) (store1) do (vy=maxwell($bath)) (store1) do (vz=maxwell($bath)) (store1) dynamics cartesian nstep=$nstep timestep=$timestep tcoupling=true temperature=$bath nprint=$nstep cmremove=false end {- turn on all energy terms -} flags include dihe ? end {- set repel to ~vdw radii -} parameter nbonds repel=0.89 end end minimize lbfgs nstep=500 nprint=50 end flags exclude * include bond angl impr dihe vdw end {- return masses to something sensible -} do (mass=refy) (store1) do (vx=maxwell($bath)) (store1) do (vy=maxwell($bath)) (store1) do (vz=maxwell($bath)) (store1) dynamics cartesian nstep=$nstep timestep=$timestep tcoupling=true temperature=$bath nprint=$nstep cmremove=false end {- some final minimisation -} minimize lbfgs nstep=500 drop=40.0 nprint=50 end print thres=0.02 bonds print thres=5. angles fix selection=( none ) end end if show sum(1) (not(known)) if ( $result < 100 ) then for $id in id (not(known)) loop print show (segid) (id $id) evaluate ($segid=$result) show (resname) (id $id) evaluate ($resname=$result) show (resid) (id $id) evaluate ($resid=$result) show (name) (id $id) evaluate ($name=$result) buffer message display unknown coordinates for atom: $segid[a4] $resname[a4] $resid[a4] $name[a4] end end loop print else buffer message display unknown coordinates for more than 100 atoms end end if if (&set_bfactor=true) then do (b=&bfactor) ( all ) else show ave(b) (known and not(store1)) do (b=$result) (store1 and (attr b < 0.01)) end if if (&set_occupancy=true) then do (q=&occupancy) ( all ) end if show sum(1) (store1) if ( $result < 100 ) then for $id in id (store1) loop print show (segid) (id $id) evaluate ($segid=$result) show (resname) (id $id) evaluate ($resname=$result) show (resid) (id $id) evaluate ($resid=$result) show (name) (id $id) evaluate ($name=$result) buffer message display coordinates built for atom: $segid[a4] $resname[a4] $resid[a4] $name[a4] end end loop print else buffer message display coordinates built for more than 100 hundred atoms end end if set remarks=reset end buffer message to=remarks dump end write structure output=&structure_outfile end write coordinates format=PDBO output=&coordinate_outfile selection=( known) end stop