Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
extract_motifs.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file
11 /// @brief Application to extract motifs from a set of pdb files
12 
13 // libRosetta headers
14 
15 #include <core/types.hh>
16 
17 #include <core/conformation/Conformation.hh>
18 #include <core/pose/Pose.hh>
19 #include <core/pose/PDBInfo.hh>
20 #include <core/scoring/ScoreFunction.hh>
21 #include <core/import_pose/import_pose.hh>
22 #include <core/io/pdb/pose_io.hh>
23 
24 #include <basic/options/util.hh>
25 #include <basic/options/option.hh>
26 #include <basic/options/keys/in.OptionKeys.gen.hh>
27 #include <basic/options/keys/out.OptionKeys.gen.hh>
28 #include <basic/options/keys/motifs.OptionKeys.gen.hh>
29 
30 #include <basic/basic.hh>
31 #include <basic/Tracer.hh>
32 
33 #include <protocols/motifs/Motif.hh>
34 #include <protocols/motifs/SingleMotif.hh>
35 #include <protocols/motifs/MotifLibrary.hh>
36 
37 // C++ headers
38 #include <fstream>
39 //silly using/typedef
40 
41 
42 //utilities
43 #include <protocols/jd2/JobDistributor.hh>
44 #include <devel/init.hh>
46 
47 //using namespace basic;
48 using namespace core;
49 using namespace pose;
50 using namespace chemical;
51 using namespace scoring;
52 using namespace ObjexxFCL;
53 using namespace basic::options;
54 using namespace basic::options::OptionKeys;
55 
56 
57 // These are for aligning all interactions to a common coordinate frame.
58 // They don't need to be realistic
59 
60 Vector const atom1_vector( 1.500, 0.000, 0.000 );
61 Vector const atom2_vector( 0.000, 0.000, 0.000 );
62 Vector const atom3_vector( 0.000, 1.500, 0.000 );
63 
64 ///////////////////////////////////////////////////////////////////////////////
65 ///////////////////////////////////////////////////////////////////////////////
66 ///////////////////////////////////////////////////////////////////////////////
67 
68 static THREAD_LOCAL basic::Tracer TR( "extract_motifs" );
69 
70 void
72  core::chemical::AA const & aa,
73  std::string & oa1,
74  std::string & oa2,
75  std::string & oa3
76 )
77 {
78  switch( aa ) {
79  case aa_ala :
80  oa1 = "1HB";
81  oa2 = "CB";
82  oa3 = "2HB";
83  break;
84  case aa_cys :
85  oa1 = "SG";
86  oa2 = "CB";
87  oa3 = "CA";
88  break;
89  case aa_asp :
90  oa1 = "OD1";
91  oa2 = "CG";
92  oa3 = "OD2";
93  break;
94  case aa_glu :
95  oa1 = "OE1";
96  oa2 = "CD";
97  oa3 = "OE2";
98  break;
99  case aa_phe :
100  oa1 = "CG";
101  oa2 = "CD1";
102  oa3 = "CZ";
103  break;
104  case aa_gly :
105  oa1 = "HA1";
106  oa2 = "CA";
107  oa3 = "HA2";
108  break;
109  case aa_his :
110  oa1 = "ND1";
111  oa2 = "CE1";
112  oa3 = "NE2";
113  break;
114  case aa_ile :
115  oa1 = "CD1";
116  oa2 = "CG1";
117  oa3 = "CB";
118  break;
119  case aa_lys :
120  oa1 = "NZ";
121  oa2 = "CE";
122  oa3 = "CD";
123  break;
124  case aa_leu :
125  oa1 = "CD1";
126  oa2 = "CG";
127  oa3 = "CD2";
128  break;
129  case aa_met :
130  oa1 = "CG";
131  oa2 = "SD";
132  oa3 = "CE";
133  break;
134  case aa_asn :
135  oa1 = "OD1";
136  oa2 = "CG";
137  oa3 = "ND2";
138  break;
139  case aa_gln :
140  oa1 = "OE1";
141  oa2 = "CD";
142  oa3 = "NE2";
143  break;
144  case aa_arg :
145  oa1 = "NH1";
146  oa2 = "CZ";
147  oa3 = "NH2";
148  break;
149  case aa_ser :
150  oa1 = "HG";
151  oa2 = "OG";
152  oa3 = "CB";
153  break;
154  case aa_thr :
155  oa1 = "HG1";
156  oa2 = "OG1";
157  oa3 = "CB";
158  break;
159  case aa_val :
160  oa1 = "CG1";
161  oa2 = "CB";
162  oa3 = "CG2";
163  break;
164  case aa_trp :
165  oa1 = "CG";
166  oa2 = "CE2";
167  oa3 = "CZ3";
168  break;
169  case aa_tyr :
170  oa1 = "CG";
171  oa2 = "CD1";
172  oa3 = "CZ";
173  break;
174  default :
175  TR << "Did not find residue type for " << name_from_aa( aa ) << std::endl;
176  oa1 = "X";
177  oa2 = "X";
178  oa3 = "X";
179  }
180 
181  return;
182 }
183 
184 void
186  Pose & src_pose,
187  AA const & target_aa,
188  std::string & pdb_name,
189  int prot_pos,
190  std::vector< Size > & contacts
191 )
192 {
193 
194  // Make a temporary pose
195  Pose pose;
196 
197  // These need to be assigned be looking at the target amino acid
198  std::string atom1_name;
199  std::string atom2_name;
200  std::string atom3_name;
201  fetch_atom_names( target_aa, atom1_name, atom2_name, atom3_name );
202 
203  pose.append_residue_by_jump( src_pose.residue( prot_pos ), 1 );
204  for ( Size ic = 0 ; ic < contacts.size() ; ++ic ) {
205  Size dna_pos = contacts[ ic ];
206  pose.append_residue_by_jump( src_pose.residue( dna_pos ), 1 );
207  }
208  pose.conformation().insert_chain_ending( 1 );
209 
210  // Make a second pose _just for orientation_
211 
212  Pose centered_pose;
213  centered_pose = pose;
214 
215  // Fix key residues
216  centered_pose.set_xyz( id::AtomID( centered_pose.residue( 1 ).atom_index( atom1_name ), 1 ), atom1_vector );
217  centered_pose.set_xyz( id::AtomID( centered_pose.residue( 1 ).atom_index( atom2_name ), 1 ), atom2_vector );
218  centered_pose.set_xyz( id::AtomID( centered_pose.residue( 1 ).atom_index( atom3_name ), 1 ), atom3_vector );
219 
220  // Move the pose onto this fixed point
221 
222  core::kinematics::Stub end_stub(
223  centered_pose.residue( 1 ).atom( atom2_name ).xyz(),
224  centered_pose.residue( 1 ).atom( atom1_name ).xyz(),
225  centered_pose.residue( 1 ).atom( atom2_name ).xyz(),
226  centered_pose.residue( 1 ).atom( atom3_name ).xyz()
227  );
228 
229  core::kinematics::Stub start_stub(
230  pose.residue( 1 ).atom( atom2_name ).xyz(),
231  pose.residue( 1 ).atom( atom1_name ).xyz(),
232  pose.residue( 1 ).atom( atom2_name ).xyz(),
233  pose.residue( 1 ).atom( atom3_name ).xyz()
234  );
235 
236  for ( Size ires = 1 ; ires <= pose.total_residue() ; ++ires ) {
237  for ( Size iatom = 1 ; iatom <= pose.residue(ires).natoms() ; ++iatom ) {
238  pose.set_xyz( id::AtomID( iatom, ires ),
239  end_stub.local2global( start_stub.global2local( pose.residue(ires).xyz( iatom ) ) ) );
240  }
241  }
242 
243  std::string const output_path( option[ out::path::pdb ]() );
244  std::string delimiter( "_" );
245  //std::string extension( ".pdb" );
246  std::string motif_file_name( output_path + src_pose.residue_type( prot_pos ).name3() +
247  string_of( src_pose.pdb_info()->number( prot_pos ) ) +
248  string_of( src_pose.pdb_info()->chain( prot_pos ) ) + delimiter );
249 
250  for ( Size ic = 0 ; ic < contacts.size() ; ++ic ) {
251  Size dna_pos = contacts[ ic ];
252  motif_file_name = motif_file_name +
253  src_pose.residue_type( dna_pos ).name1() +
254  string_of( src_pose.pdb_info()->number( dna_pos ) ) +
255  string_of( src_pose.pdb_info()->chain( dna_pos ) ) + delimiter;
256  }
257 
258  //motif_file_name = motif_file_name + pdb_name + extension;
259  motif_file_name = motif_file_name + pdb_name;
260 
261  TR << "Writing " << motif_file_name << std::endl;
262  io::pdb::dump_pdb( pose, motif_file_name );
263 
264 }
265 
266 void
268 (
269  protocols::motifs::Motif const & m1,
270  protocols::motifs::Motif const & m2,
271  core::Real & dist_diff,
272  core::Real & angl_diff
273 
274 )
275 {
276 
277  // Handle when motifs have different residue pairs
278  if ( m1.restype_name1() != m2.restype_name1() ||
279  m1.restype_name2() != m2.restype_name2() ) {
280  dist_diff = angl_diff = 9999.0;
281  return;
282  }
283 
284  core::kinematics::jump_distance( m1.forward_jump(), m2.forward_jump(), dist_diff, angl_diff );
285 
286  return;
287 }
288 
289 
290 ///////////////////////////////////////////////////////////////////////////////
291 ///////////////////////////////////////////////////////////////////////////////
292 ///////////////////////////////////////////////////////////////////////////////
293 
294 Real
296  Pose & pose,
297  Size pos1,
298  Size pos2,
299  ScoreFunction & sf
300 )
301 {
302 
303  EnergyMap pack_map;
304 
305  sf.eval_ci_2b_sc_sc( pose.residue( pos1 ), pose.residue( pos2 ), pose, pack_map );
306 
307  return pack_map[ fa_atr ] + pack_map[ fa_rep ];
308 }
309 
310 Real
312  Pose & pose,
313  Size pos1,
314  Size pos2,
315  ScoreFunction & sf
316 )
317 {
318 
319  EnergyMap hbond_map;
320 
321  sf.eval_cd_2b_sc_sc( pose.residue( pos1 ), pose.residue( pos2 ), pose, hbond_map );
322 
323  return hbond_map[ hbond_sc ];
324 }
325 
326 Real
328  Pose & pose,
329  Size pos1,
330  Size pos2,
331  ScoreFunction & sf
332 )
333 {
334 
335  EnergyMap elec_map;
336 
337  sf.eval_cd_2b_sc_sc( pose.residue( pos1 ), pose.residue( pos2 ), pose, elec_map );
338 
339  return elec_map[ fa_elec ];
340 
341 }
342 
343 ///////////////////////////////////////////////////////////////////////////////
344 ///////////////////////////////////////////////////////////////////////////////
345 ///////////////////////////////////////////////////////////////////////////////
346 
347 void
349  Pose & pose,
350  std::string & pdb_name,
351  chemical::AA const target_aa,
352  protocols::motifs::MotifLibrary & motif_lib
353 )
354 {
355  int nres( pose.total_residue() );
356 
357  Real dist_threshold( option[ motifs::duplicate_dist_cutoff ] );
358  Real angl_threshold( option[ motifs::duplicate_angle_cutoff ] );
359 
360  Real pack_min_threshold( option[ motifs::pack_min_threshold ] );
361  Real pack_max_threshold( option[ motifs::pack_max_threshold ] );
362  Real hbond_min_threshold( option[ motifs::hbond_min_threshold ] );
363  Real hbond_max_threshold( option[ motifs::hbond_max_threshold ] );
364  Real elec_min_threshold( option[ motifs::elec_min_threshold ] );
365  Real elec_max_threshold( option[ motifs::elec_max_threshold ] );
366 
367  // Get a simple scorefunction to screen for thresholds
368  ScoreFunction scorefxn;
369  // These are talaris2013 values. Since the thresholds can be
370  // chosen arbitrarily, the only thing that is truly hard-wired
371  // here is the ratio between fa_atr and fa_rep, which are
372  // combined in get_packing_score above.
373  scorefxn.set_weight( fa_atr, 0.8 );
374  scorefxn.set_weight( fa_rep, 0.44 );
375  scorefxn.set_weight( hbond_sc, 1.1 );
376  scorefxn.set_weight( fa_elec, 0.7 );
377  scorefxn.setup_for_scoring( pose );
378 
379  // These need to be assigned be looking at the target amino acid
380  std::string atom1_name;
381  std::string atom2_name;
382  std::string atom3_name;
383  fetch_atom_names( target_aa, atom1_name, atom2_name, atom3_name );
384 
385  // Loop over positions, skipping things that aren't the target amino acid
386  for ( int aaoi_pos = 1 ; aaoi_pos <= nres ; ++aaoi_pos ) {
387  ResidueType const & res_type( pose.residue_type( aaoi_pos ) );
388 
389  if ( res_type.aa() != target_aa ) continue;
390 
391  std::vector< Size > contacts;
392 
393  Real total_pack_score( 0.0 );
394  Real total_hb_score( 0.0 );
395  Real check_hb_score( 0.0 );
396 
397  // Loop over all other positions
398  for ( int other_pos = 1 ; other_pos <= nres ; ++other_pos ) {
399  ResidueType const & other_type( pose.residue_type( other_pos ) );
400  if ( other_pos == aaoi_pos ) continue;
401 
402  // This prevents against getting the same interaction twice
403  if ( ( other_type.aa() == target_aa ) && ( other_pos < aaoi_pos ) ) continue;
404 
405  Real pack_score = get_packing_score( pose, other_pos, aaoi_pos, scorefxn );
406  Real hb_score = get_hbond_score( pose, other_pos, aaoi_pos, scorefxn );
407  Real elec_score = get_elec_score( pose, other_pos, aaoi_pos, scorefxn );
408 
409  int num_contacts( 0 );
410 
411  // Loop over all amino acid atoms
412  for ( Size this_atom = 1 ; this_atom <= res_type.nheavyatoms() ; ++this_atom ) {
413  // Loop over heavy atoms
414  for ( Size other_atom = other_type.first_sidechain_atom() ;
415  other_atom <= other_type.nheavyatoms() ; ++other_atom ) {
416 
417  Real const dis2( pose.residue( other_pos ).xyz( other_atom ).distance_squared(
418  pose.residue( aaoi_pos ).xyz( this_atom ) ) );
419  if ( dis2 < (3.9*3.9) ) {
420  ++num_contacts;
421  }
422 
423  }
424  } // End loop over atoms
425 
426  check_hb_score += hb_score;
427 
428  // if( num_contacts > 0 )
429  // if( pack_score <= -0.5 && hb_score > -0.2 )
430  // if( hb_score <= -1.00 ) {
431  // if( pack_score <= -1.00 ) {
432 
433  if ( ( pack_score > pack_min_threshold && pack_score < pack_max_threshold) &&
434  ( hb_score > hbond_min_threshold && hb_score < hbond_max_threshold) &&
435  ( elec_score > elec_min_threshold && elec_score < elec_max_threshold) ) {
436 
437  TR << "Energies between " << aaoi_pos << " and " << other_pos << " are pack: " << pack_score << " hbond: " << hb_score << std::endl;
438 
439  total_pack_score += pack_score;
440  total_hb_score += hb_score;
441 
442  // Store the motif info
443  std::string other_atom1;
444  std::string other_atom2;
445  std::string other_atom3;
446  fetch_atom_names( pose.aa( other_pos ), other_atom1, other_atom2, other_atom3 );
447  if ( other_atom1 != "X" ) {
448  contacts.push_back( other_pos );
449  protocols::motifs::SingleMotif new_motif( pose, aaoi_pos, atom1_name, atom2_name, atom3_name, other_pos, other_atom1, other_atom2, other_atom3 );
450  std::string motif_origin( pose.residue_type( aaoi_pos ).name3() + " " + string_of( pose.pdb_info()->number( aaoi_pos ) ) + string_of( pose.pdb_info()->chain( aaoi_pos ) ) + " and " + pose.residue_type( other_pos ).name3() + " " + string_of( pose.pdb_info()->number( other_pos ) ) + string_of( pose.pdb_info()->chain( other_pos ) ) + " pdb code: " + pdb_name );
451  new_motif.store_remark( motif_origin );
452  TR << new_motif.remark() << std::endl;
453 
454  protocols::motifs::Motif work_motif( new_motif );
455 
456  bool unique_motif( true );
457 
458  // Check against other motifs
459  core::Size check_i( 1 );
460  for ( protocols::motifs::MotifCOPs::const_iterator this_motif = motif_lib.begin() ; this_motif != motif_lib.end() ; ++this_motif ) {
461  core::Real dist_diff( 0.0 );
462  core::Real angl_diff( 0.0 );
463  motif_distances( work_motif, **this_motif, dist_diff, angl_diff );
464  // TR << "Motif dist from current motif " << check_i << " is translation " << dist_diff << " and angle " << angl_diff << std::endl;
465  check_i++;
466  if ( dist_diff < dist_threshold && angl_diff < angl_threshold ) {
467  unique_motif = false;
468  break;
469  }
470  }
471 
472  if ( unique_motif ) {
473  motif_lib.add_to_library( new_motif );
474  }
475  }
476  }
477 
478  } // End loop over amino acids
479 
480  if ( contacts.size() > 0 ) {
481  TR << "Outputing motif with " << contacts.size() << " contacts " << std::endl;
482  TR << "Energies are pack: " << total_pack_score << " hbond: " << total_hb_score << std::endl;
483  TR << "Check HB Energy is: " << check_hb_score << std::endl;
484  output_single_motif( pose, target_aa, pdb_name, aaoi_pos, contacts );
485  }
486 
487 
488  } // End loop over residues
489 
490  return;
491 }
492 
493 
494 ///////////////////////////////////////////////////////////////////////////////
495 ///////////////////////////////////////////////////////////////////////////////
496 ///////////////////////////////////////////////////////////////////////////////
497 
498 int
499 main( int argc, char * argv [] )
500 {
501  try {
502  using namespace pose;
503  using namespace conformation;
504  using namespace chemical;
505  using namespace import_pose;
506 
507  //using namespace core;
508  devel::init( argc, argv );
509 
510  // Get the name of the file that contains the pdb files for extraction
511  std::string const list_of_pdb_files( option[ in::file::l ]()[1] );
512  std::ifstream file_data( list_of_pdb_files.c_str() );
513 
514  // find the amino acid for which interaction motifs are requested
515  std::string target_3code( option[ motifs::target_aa ]() );
516  chemical::AA target_aa( aa_from_name( uppercase( target_3code ) ) );
517 
518  std::string pdb_code;
519 
520  protocols::motifs::MotifLibrary motif_lib;
521 
522  std::string const output_path( option[ out::path::pdb ]() );
523  std::string motif_path_and_file( output_path + "motif_library" );
524  std::ofstream motif_ostream( motif_path_and_file.c_str() );
525 
526  file_data >> pdb_code;
527  while ( !file_data.eof() ) {
528 
529  TR << "Working on pdb " << pdb_code << std::endl;
530  std::string const pdb_file_path( option[ in::path::pdb ]( 1 ) );
531  std::string pdb_file( pdb_file_path + pdb_code );
532  TR << "PDB file " << pdb_file << std::endl;
533 
534  Pose pose;
535  pose_from_pdb( pose, pdb_file );
536 
537  process_for_motifs( pose, pdb_code, target_aa, motif_lib );
538 
539  file_data >> pdb_code;
540  }
541 
542  motif_ostream << motif_lib;
543  motif_ostream.close();
544 
545  return 0;
546  } catch ( utility::excn::EXCN_Base const & e ) {
547  std::cout << "caught exception" << e.msg() << std::endl;
548  return -1;
549  }
550 }
551 
552 ///////////////////////////////////////////////////////////////////////////////
#define THREAD_LOCAL
virtual std::string const msg() const
Definition: EXCN_Base.hh:70
Vector const atom1_vector(1.500, 0.000, 0.000)
utility::keys::KeyLookup< KeyType >::const_iterator const_iterator
Key collection iterators.
void init(int argc, char *argv[])
Command line init() version.
Definition: init.cc:23
Real get_packing_score(Pose &pose, Size pos1, Size pos2, ScoreFunction &sf)
core::pose::Pose Pose
Definition: supercharge.cc:101
void output_single_motif(Pose &src_pose, AA const &target_aa, std::string &pdb_name, int prot_pos, std::vector< Size > &contacts)
Vector const atom3_vector(0.000, 1.500, 0.000)
common derived classes for thrown exceptions
tuple scorefxn
Definition: PyMOL_demo.py:63
static THREAD_LOCAL basic::Tracer TR("extract_motifs")
int main(int argc, char *argv[])
void fetch_atom_names(core::chemical::AA const &aa, std::string &oa1, std::string &oa2, std::string &oa3)
xyzVector< Real > Vector
Tracer IO system.
void process_for_motifs(Pose &pose, std::string &pdb_name, chemical::AA const target_aa, protocols::motifs::MotifLibrary &motif_lib)
Real get_hbond_score(Pose &pose, Size pos1, Size pos2, ScoreFunction &sf)
rule< Scanner, options_closure::context_t > options
Definition: Tag.cc:377
double Real
Definition: types.hh:39
ocstream cout(std::cout)
Wrapper around std::cout.
Definition: ocstream.hh:287
Real get_elec_score(Pose &pose, Size pos1, Size pos2, ScoreFunction &sf)
Class for handling user debug/warnings/errors. Use instance of this class instead of 'std::cout' for ...
Definition: Tracer.hh:134
void motif_distances(protocols::motifs::Motif const &m1, protocols::motifs::Motif const &m2, core::Real &dist_diff, core::Real &angl_diff)
Vector const atom2_vector(0.000, 0.000, 0.000)
std::string string_of(Fstring const &s)
string of an Fstring
Definition: Fstring.hh:2889
Program options global and initialization function.
platform::Size Size
Definition: random.fwd.hh:30
char & uppercase(char &c)
Uppercase a Character.
rule< Scanner, option_closure::context_t > option
Definition: Tag.cc:378