Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
residue_support.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file src/core/chemical/residue_support.hh
11 /// @brief support functions for class residue; functions that
12 /// should not be included as part of the class.
13 /// @author Phil Bradley
14 /// @author Rocco Moretti (rmorettiase@gmail.com)
15 /// @author Steven Combs
16 
17 // Package Headers
19 
22 
23 // Project Headers
24 #include <core/graph/Graph.hh>
25 
27 #include <core/chemical/Atom.hh>
28 #include <core/chemical/Bond.hh>
29 #include <utility/graph/RingDetection.hh>
30 
31 #include <basic/Tracer.hh>
32 
33 #include <utility/vector1.hh>
34 #include <utility/string_util.hh>
35 #include <utility/graph/BFS_prune.hh>
36 
37 #include <ObjexxFCL/FArray2D.hh>
38 #include <ObjexxFCL/string.functions.hh>
39 
40 #include <algorithm>
41 
42 namespace core {
43 namespace chemical {
44 
45 static THREAD_LOCAL basic::Tracer TR( "core.chemical.residue_support" );
46 
47 ObjexxFCL::FArray2D_int
49 {
50  //return res.get_residue_path_distances();
51  using namespace graph;
52  Graph g;
53 
54  g.set_num_nodes( res.natoms() );
55  for ( uint ii = 1; ii <= res.natoms(); ++ii ) {
56  AtomIndices const ii_bonded = res.nbrs( ii );
57  for ( Size jj = 1; jj <= ii_bonded.size(); ++jj ) {
58  if ( ii_bonded[ jj ] > ii ) {
59  g.add_edge( ii, ii_bonded[ jj ] );
60  }
61  }
62  }
63  return g.all_pairs_shortest_paths();
64 }
65 
67  //this is a const reference because vertices change when you copy the graph
68  const core::chemical::ResidueGraph & full_residue_graph = res.graph(); //get the boost graph structure from residuetype
69 
71 
72  //set up the mapping between VD and ED from ResidueGraph for LightWeightResidueGraph
73  boost::property_map<LightWeightResidueGraph, boost::vertex_name_t>::type lwrg_vd_to_VD = boost::get(boost::vertex_name, lwrg);
74  boost::property_map<LightWeightResidueGraph, boost::edge_name_t>::type lwrg_ed_to_ED = boost::get(boost::edge_name, lwrg);
75 
76  //map between the LightWeightResidueGraph vertex and the ResidueGraph vertex. Used when adding edges
77  std::map<core::chemical::VD, lwrg_VD> map_of_vertex;
78  //first, add all the vertex to light weight residue graph, and map the property maps to point at the ResidueGraph
79  for ( core::chemical::VIterPair vp = boost::vertices(full_residue_graph); vp.first != vp.second; ++vp.first ) {
80  VD const & full_residue_graph_vd = *vp.first;
81  lwrg_VD lwrg_vd = boost::add_vertex(lwrg);
82  lwrg_vd_to_VD[lwrg_vd] = full_residue_graph_vd; //set property maps
83  map_of_vertex[full_residue_graph_vd] = lwrg_vd; //set mapping for the edges
84  }
85 
86  //now we add the edges between the vertex
87  for ( EIterPair ep = boost::edges(full_residue_graph); ep.first != ep.second; ++ep.first ) {
88  VD source = boost::source(*ep.first, full_residue_graph);
89  VD target = boost::target(*ep.first, full_residue_graph);
90  lwrg_VD source_lwrg_VD = map_of_vertex[source];
91  lwrg_VD target_lwrg_VD = map_of_vertex[target];
92  lwrg_ED e_added;
93  bool added;
94  boost::tie(e_added, added) = boost::add_edge(source_lwrg_VD, target_lwrg_VD,lwrg );
95  if ( added ) { //only add bonds once!
96  lwrg_ed_to_ED[e_added] = *ep.first;
97  }
98  }
99  debug_assert(boost::num_vertices(lwrg) == full_residue_graph.num_vertices()); //fail if the number of vertex are not the same
100  debug_assert(boost::num_edges(lwrg) == full_residue_graph.num_edges()); //fail if the number of edges are not the same
101 
102 
103  //boost::property_map<LightWeightResidueGraph, boost::vertex_name_t>::type lwrg_vd_to_VD = boost::get(boost::vertex_name, lwrg);
104  //boost::property_map<LightWeightResidueGraph, boost::edge_name_t>::type lwrg_ed_to_ED = boost::get(boost::edge_name, lwrg);
105 
106  utility::graph::RingDetection<LightWeightResidueGraph> ring_detect(lwrg); //initialize the ring detector. Automatically assigns rings
107  utility::vector1<utility::vector1<lwrg_VD> > rings = ring_detect.GetRings(); //these are the path of the rings
108 
109 
110  //our light weight graph has been made. Now return it!
111  return lwrg;
112 }
113 
114 /// @brief Rename atoms in the residue type such that their names are unique.
115 /// If preserve is true, only rename those which have no names or who have
116 /// name conflicts. (Both of the conflicting atoms will be renamed.)
117 void
118 rename_atoms( ResidueType & res, bool preserve/*=true*/ ) {
119  std::map< std::string, core::Size > name_counts;
120  ResidueGraph const & graph( res.graph() );
121  VIter iter, iter_end;
122  for ( boost::tie( iter, iter_end ) = boost::vertices( graph ); iter != iter_end; ++iter ) {
123  Atom const & atom( graph[*iter] );
124  if ( preserve && atom.name().size() != 0 ) {
125  name_counts[ atom.name() ]++; // with map, builtins are zero-initialized according to the C++ spec.
126  }
127  }
128 
129  for ( boost::tie( iter, iter_end ) = boost::vertices( graph ); iter != iter_end; ++iter ) {
130  Atom const & atom( graph[*iter] );
131  if ( !preserve || name_counts[ atom.name() ] != 1 ) {
132  //Find the first unoccupied name Xnnn type string.
133  // Skipping values which were multiply represented in the input is deliberate
134  // There's no fair way to choose which one is the "real" one.
135  debug_assert( atom.element_type() );
136  std::string name;
137  core::Size ii(0);
138  do {
139  ++ii;
140  name = ObjexxFCL::uppercased( atom.element_type()->get_chemical_symbol() ) + utility::to_string( ii );
141  //Align name, preferring to keep start in the second position
142  if ( name.size() == 2 ) {
143  name = ' ' + name + ' ';
144  } else if ( name.size() == 3 ) {
145  name = ' ' + name;
146  } //Can't be 1, and if it's 4 or greater, leave as is.
147  } while ( name_counts.find( name ) != name_counts.end() );
148  // Assign new value and mark it used.
149  if ( TR.Trace.visible() ) {
150  TR.Trace << "Renaming atom from '"<< res.atom(*iter).name() << "' to '" << name << "'" << std::endl;
151  }
152  res.atom(*iter).name( name );
153  res.remap_pdb_atom_names( true ); // We've renamed an atom, so we need to be flexible with PDB loading.
154  name_counts[ name ]++;
155  }
156  }
157 }
158 
159 /// @brief Utility visitor for find_nbr_dist
160 /// Will only traverse those atoms in the "rigid" portion of graph around the starting atom.
161 /// "Rigid" includes direct neighbors and atoms connected by non-rotatable bonds
162 /// e.g. all rings, all double/triple bonds, methyl groups, single atoms, etc.
164  typedef utility::vector1< utility::vector1< core::Real > > Matrix;
165 
166 public:
167  RigidDistanceVisitor( Matrix & distances, ResidueType const & restype, VD start ) :
168  distances_(distances),
169  restype_(restype),
170  start_(start),
171  start_pos_( restype.atom(start).ideal_xyz() )
172  {
173  start_index_ = restype.atom_index( start );
174  }
175 
176  template <class ED, class ResidueGraphType>
177  bool examine_edge( ED edge, ResidueGraphType & graph) {
178  VD source( boost::source( edge, graph ) ), target( boost::target( edge, graph ) );
179  //std::string start( restype_.atom_name( start_ ) );
180  //std::string source( restype_.atom_name( boost::source( edge, graph ) ) );
181  //std::string target( restype_.atom_name( boost::target( edge, graph ) ) );
182  if ( source == start_ ) {
183  // Directly connected atoms are part of the rigid units, for distance purposes,
184  // but we shouldn't propagate across them unless the fit the other criteria
185  // (see TODO below, though)
186  core::Size index( restype_.atom_index( target ) );
187  distances_[start_index_][ index ] = restype_.atom( target ).ideal_xyz().distance( start_pos_ );
188  }
189  // Follow across non-rotatable bonds. (Ring and double bonds)
190  Bond const & bond( restype_.bond( edge ) );
191  if ( bond.ringness() == BondInRing ) {
192  return false; // Follow rings
193  }
194  if ( bond.order() == DoubleBondOrder || bond.order() == TripleBondOrder ) {
195  return false; // Follow double and triple bonds
196  }
197  if ( is_nub( source ) || is_nub( target ) ) {
198  //Need to test both for symmetry - if either is a nub the bond isn't rotatable and should be followed.
199  return false;
200  }
201  return true;
202  // TODO: The atom *directly* across from the rotatable bond is ridgidly connected.
203  // Do we want to include that in the unit? (molfile_to_params doesn't seem to).
204  }
205 
206  template <class VD, class ResidueGraphType>
207  bool examine_vertex( VD vertex, ResidueGraphType & ) {
208  // If we're examining the vertex, it's in the rigid unit -- add distances to the matrix
209  core::Size index( restype_.atom_index( vertex ) );
210  distances_[start_index_][index] = restype_.atom( vertex ).ideal_xyz().distance( start_pos_ );
211  // The reverse will be taken care of in a later iteration.
212  return false; // Always examine out edges.
213  }
214 
215  bool is_nub( VD atom ) {
216  // Additional non-rotatable bonds include single bonds which are connected to atoms
217  core::chemical::AdjacentIter iter, iter_end;
218  core::Size nheavy(0), nhydro(0);
219  for ( boost::tie( iter, iter_end ) = restype_.bonded_neighbor_iterators(atom); iter != iter_end; ++iter ) {
220  if ( restype_.atom(*iter).element_type()->element() == core::chemical::element::H ) {
221  ++nhydro;
222  } else {
223  ++nheavy;
224  }
225  }
226  if ( nheavy >= 2 ) return false; // Multiply-heavy bonded atoms aren't non-rotatable stubs.
227  // Non-considered bonds are all hydrogens (bonds to hydrogens use this function)
228  if ( nhydro != 1 ) return true; // Multiple hydrogens aren't considered rotameric.
229  if ( restype_.atom(atom).element_type()->element() == core::chemical::element::C ) return true; // Carbon with only a single heavy bond is never rotameric
230  // Proton rotamer
231  return false;
232  }
233 
234 
235 private:
241 };
242 
243 /// @brief Calculate the rigid matrix - assume that distances has been initialized to some really large value, and is square
244 void calculate_rigid_matrix( ResidueType const & res, utility::vector1< utility::vector1< core::Real > > & distances ) {
245  // Set up bonded and rigid distances
246  VIter iter, iter_end;
247  for ( boost::tie( iter, iter_end ) = res.atom_iterators(); iter != iter_end; ++iter ) {
248  // The visitor takes the distance matrix as a reference and fills it out.
249  RigidDistanceVisitor vis( distances, res, *iter );
250  utility::graph::breadth_first_search_prune( res.graph(), *iter, vis );
251  }
252  // The Floyd-Warshall algorithm. We do this in-line instead of with boost
253  // because some of the distance weights don't correspond to edge weights.
254  // (Besides, it's easy enough.)
255  for ( core::Size kk(1); kk <= res.natoms(); ++kk ) {
256  for ( core::Size jj(1); jj <= res.natoms(); ++jj ) {
257  for ( core::Size ii(1); ii <= res.natoms(); ++ii ) {
258  core::Real new_dist( distances[ii][kk] + distances[kk][jj] );
259  if ( new_dist < distances[ii][jj] ) {
260  distances[ii][jj] = new_dist;
261  }
262  }
263  }
264  }
265 
266  if ( TR.Trace.visible() ) {
267  // Print out the full distance matrix for debugging purposes.
268  TR.Trace << std::setprecision(4);
269  TR.Trace << "Atom distance matrix for " << res.name() << std::endl;
270  TR.Trace << " " << '\t';
271  for ( core::Size xx(1); xx <= res.natoms(); ++xx ) {
272  TR.Trace << res.atom_name(xx) << '\t';
273  }
274  TR.Trace << std::endl;
275  for ( core::Size yy(1); yy <= res.natoms(); ++yy ) {
276  TR.Trace << res.atom_name(yy) << '\t';
277  for ( core::Size xx(1); xx <= res.natoms(); ++xx ) {
278  TR.Trace << distances[yy][xx] << '\t';
279  }
280  TR.Trace << std::endl;
281  }
282  }
283 }
284 
285 /// @brief Find the neighbor distance to the given neighbor atom.
286 /// If nbr_atom is null_vertex, give the smallest neighbor distance,
287 /// and set nbr_atom to the atom for that distance.
288 /// @details The neighbor distance here is adjusted for rotatable bonds -
289 /// It should be at least as large as the maximum neighbor distance
290 /// in any torsional rotamer
291 /// If the neighbor atom is not provided, the atom chosen will be a
292 /// multiply-bonded heavy atom.
293 ///
294 /// Assumes:
295 /// * All atoms and bond are present
296 /// * All ideal_xyz coordinates have been set
297 /// * All elements have been set
298 /// * All ring bonds have been annotated
300 find_nbr_dist( ResidueType const & res, VD & nbr_atom ) {
301  if ( res.natoms() == 0 ) {
302  utility_exit_with_message("Cannot find neighbor atom distance for empty residue type.");
303  }
304  core::Real maxdist = 1e9; // Hopefully sufficiently large.
305  utility::vector1< utility::vector1< core::Real > > distances(res.natoms(), utility::vector1< core::Real >( res.natoms(), maxdist ) );
306  calculate_rigid_matrix( res, distances );
307 
308  // TODO: Although we throw out hydrogens as potential neighbor atoms,
309  // I believe the meaning of neighbor atoms is heavy-atom distances
310  // We could get smaller values by removing hydrogens from distance considerations.
311 
312  utility::vector1< core::Real > maxdists;
313  for ( core::Size ii(1); ii<= res.natoms(); ++ii ) {
314  maxdists.push_back( *(std::max_element( distances[ii].begin(), distances[ii].end() )) );
315  }
316 
317  if ( nbr_atom != ResidueType::null_vertex ) {
318  // We have an atom in mind - we just need the distance.
319  return maxdists[ res.atom_index( nbr_atom ) ];
320  }
321 
322  //maxdist initialized above
323  for ( core::Size jj(1); jj <= res.natoms(); ++jj ) {
324  VD atom_vd( res.atom_vertex( jj ) );
325  if ( maxdists[jj] < maxdist &&
326  res.atom(jj).element_type()->element() != core::chemical::element::H ) { // not hydrogen
327  //Use graph directly, as other neighbor annotations may not be fully baked
328  core::Size bonded_heavy(0);
329  AdjacentIter itr, itr_end;
330  for ( boost::tie(itr, itr_end) = res.bonded_neighbor_iterators(atom_vd); itr != itr_end; ++itr ) {
331  if ( res.atom( *itr ).element_type()->element() != core::chemical::element::H ) {
332  bonded_heavy += 1;
333  }
334  }
335  if ( bonded_heavy >= 2 ) {
336  maxdist = maxdists[jj];
337  nbr_atom = atom_vd;
338  }
339  }
340  }
341 
342  if ( nbr_atom == ResidueType::null_vertex ) { // No suitable neighbor -- just pick atom 1
343  TR.Warning << "No suitable neighbor atom found for " << res.name() << " -- picking first atom (" << res.atom_name(1) << ") instead." << std::endl;
344  nbr_atom = res.atom_vertex( 1 );
345  maxdist = maxdists[1];
346  }
347  return maxdist;
348 }
349 
350 /// @brief Apply molfile_to_params style partial charges to the ResidueType.
351 /// @details These partial charges are based off of the Rosetta atom type,
352 /// adjusted such that the net partial charge is equal to the net formal charge.
353 ///
354 /// These charges are almost certainly dodgy. If you have any other source of
355 /// partial charges that are at all reasonable, you probably want to consider those instead.
356 ///
357 /// Assumes:
358 /// * All atoms and bond are present.
359 /// * All atom types have been set.
360 /// * Formal charges (if any) have been set.
361 
362 void
364  ResidueGraph const & graph( res.graph() );
365  AtomTypeSet const & ats( res.atom_type_set() );
366  if ( ! ats.has_extra_parameter( "CHARGE" ) ) {
367  TR.Warning << "Atom Type Set " << ats.name() << " is missing charging information - skipping recharging." << std::endl;
368  return;
369  }
370  int charge_extra_param_index( ats.extra_parameter_index("CHARGE") );
371  core::Real desired_net(0), current_net(0);
372  core::Size natoms(0);
373  VIter iter, iter_end;
374  for ( boost::tie( iter, iter_end ) = boost::vertices( graph ); iter != iter_end; ++iter ) {
375  Atom & atm( res.atom( *iter ) );
376  desired_net += atm.formal_charge();
377  core::Real charge(0);
378  if ( atm.atom_type_index() != 0 ) {
379  charge = ats[ atm.atom_type_index() ].extra_parameter( charge_extra_param_index );
380  }
381  atm.charge( charge );
382  TR.Debug << "Residue " << res.name() << ": Charging atom " << atm.name() << " type " << ats[ atm.atom_type_index() ].name() << " at " << atm.charge() << std::endl;
383  current_net += charge;
384  if ( charge != 0 ) {
385  natoms += 1;
386  }
387  }
388 
389  if ( current_net < (desired_net - 0.001) || current_net > (desired_net + 0.001) ) {
390  core::Real correction( (desired_net-current_net)/natoms );
391  for ( boost::tie( iter, iter_end ) = boost::vertices( graph ); iter != iter_end; ++iter ) {
392  Atom & atm( res.atom( *iter ) );
393  if ( atm.charge() != 0 ) {
394  atm.charge( atm.charge() + correction );
395  TR.Debug << "Residue " << res.name() << ": Adjusting charge on atom " << atm.name() << " type " << ats[ atm.atom_type_index() ].name() << " to " << atm.charge() << std::endl;
396  }
397  }
398  }
399 }
400 
401 } // chemical
402 } // core
std::string const & atom_name(Size const index) const
get atom name by index
Definition: ResidueType.cc:754
LightWeightResidueGraph convert_residuetype_to_light_graph(ResidueType const &res)
std::pair< EIter, EIter > EIterPair
utility::vector1< Size > AtomIndices
bool examine_edge(ED edge, ResidueGraphType &graph)
basic chemical Bond
Definition: Bond.hh:39
Vector const & ideal_xyz() const
Definition: Atom.hh:116
Bond & bond(ED const ed)
Definition: ResidueType.cc:546
Atom & atom(Size const atom_index)
Definition: ResidueType.cc:516
AtomTypeSet const & atom_type_set() const
access by reference the atomset for which this residue is constructed
Definition: ResidueType.hh:177
AtomIndices const & nbrs(Size const atomno) const
indices of the bonded neighbors for an atom, shortcut for bonded_neighbor(atomno) ...
Definition: ResidueType.hh:373
ResidueGraph::vertex_descriptor VD
std::string const & name() const
Definition: Atom.hh:104
std::pair< VIter, VIter > VIterPair
boost::graph_traits< ResidueGraph >::adjacency_iterator AdjacentIter
generic graph class header
Real const & charge() const
Definition: Atom.hh:115
bool examine_vertex(VD vertex, ResidueGraphType &)
void calculate_rigid_matrix(ResidueType const &res, utility::vector1< utility::vector1< core::Real > > &distances)
Calculate the rigid matrix - assume that distances has been initialized to some really large value...
ResidueGraph const & graph() const
Constant access to the underlying graph.
Definition: ResidueType.hh:628
boost::graph_traits< ResidueGraph >::vertex_iterator VIter
RigidDistanceVisitor(Matrix &distances, ResidueType const &restype, VD start)
int const & formal_charge() const
Definition: Atom.hh:114
Method declarations and simple accessors/getters for ResidueType.
platform::Size Size
Definition: types.hh:30
boost::adjacency_list< boost::vecS, boost::vecS, boost::undirectedS, boost::property< boost::vertex_name_t, core::chemical::VD >, boost::property< boost::edge_name_t, core::chemical::ED > > LightWeightResidueGraph
void remap_pdb_atom_names(bool rename)
Turn on geometry-based atom renaming when loading this residue type from PDB files.
void rosetta_recharge_fullatom(ResidueType &res)
Apply molfile_to_params style partial charges to the ResidueType.
bool add_vertex(core::Size const v, EdgeList &edges)
add a vertex, splitting edges if necessary
ObjexxFCL::FArray2D_int get_residue_path_distances(ResidueType const &res)
relies on class Graph to find all pairs shortest path information
boost::undirected_graph< Atom, Bond > ResidueGraph
ElementCOP element_type() const
Definition: Atom.hh:109
Size atom_index(std::string const &name) const
get atom index by name
VD atom_vertex(std::string const &name) const
get the vertex descriptor from the name of the atom.
Method definitions for chemical::Atom.
core::Real find_nbr_dist(ResidueType const &res, VD &nbr_atom)
Find the neighbor distance to the given neighbor atom. If nbr_atom is null_vertex, give the smallest neighbor distance, and set nbr_atom to the atom for that distance.
support functions for class residue; functions that should not be included as part of the class...
numeric::xyzVector< Length > Vector
Definition: types.hh:65
platform::Real Real
Definition: types.hh:35
A class for defining a type of residue.
Definition: ResidueType.hh:135
std::string const & name() const
get our (unique) residue name
static THREAD_LOCAL basic::Tracer TR("core.chemical.Atom")
ResidueGraph::edge_descriptor ED
platform::uint uint
Definition: types.hh:32
utility::vector1< utility::vector1< core::Real > > Matrix
A class for defining atom parameters, known as atom_types.
boost::graph_traits< LightWeightResidueGraph >::vertex_descriptor lwrg_VD
std::string to_string(DOF_Type const &type)
Definition: types.hh:50
a set of AtomTypes
Definition: AtomTypeSet.hh:57
Graph structure for ResidueType.
VIterPair atom_iterators() const
Definition: ResidueType.hh:660
Utility visitor for find_nbr_dist Will only traverse those atoms in the "rigid" portion of graph arou...
Size natoms() const
number of atoms
Definition: ResidueType.hh:236
void rename_atoms(ResidueType &res, bool preserve)
Rename atoms in the residue type such that their names are unique. If preserve is true...
boost::graph_traits< LightWeightResidueGraph >::edge_descriptor lwrg_ED
AdjacentIterPair bonded_neighbor_iterators(VD const &atom) const
Definition: ResidueType.cc:823