Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
extract_motifs.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file
11 /// @brief Application to extract motifs from a set of pdb files
12 
13 // libRosetta headers
14 
15 #include <core/types.hh>
16 
17 #include <core/conformation/Conformation.hh>
18 #include <core/pose/Pose.hh>
19 #include <core/pose/PDBInfo.hh>
20 #include <core/scoring/ScoreFunction.hh>
21 #include <core/import_pose/import_pose.hh>
22 #include <core/io/pdb/pdb_writer.hh>
23 #include <basic/options/util.hh>
24 #include <basic/options/option.hh>
28 
29 #include <basic/basic.hh>
30 #include <basic/Tracer.hh>
31 
32 #include <protocols/motifs/Motif.hh>
33 #include <protocols/motifs/SingleMotif.hh>
34 #include <protocols/motifs/MotifLibrary.hh>
35 
36 // C++ headers
37 #include <fstream>
38 //silly using/typedef
39 
40 
41 //utilities
42 #include <protocols/jd2/JobDistributor.hh>
43 #include <devel/init.hh>
45 
46 //using namespace basic;
47 using namespace core;
48 using namespace pose;
49 using namespace chemical;
50 using namespace scoring;
51 using namespace ObjexxFCL;
52 using namespace basic::options;
53 using namespace basic::options::OptionKeys;
54 
55 
56 // These are for aligning all interactions to a common coordinate frame.
57 // They don't need to be realistic
58 
59 Vector const atom1_vector( 1.500, 0.000, 0.000 );
60 Vector const atom2_vector( 0.000, 0.000, 0.000 );
61 Vector const atom3_vector( 0.000, 1.500, 0.000 );
62 
63 ///////////////////////////////////////////////////////////////////////////////
64 ///////////////////////////////////////////////////////////////////////////////
65 ///////////////////////////////////////////////////////////////////////////////
66 
67 static THREAD_LOCAL basic::Tracer TR( "extract_motifs" );
68 
69 void
71  core::chemical::AA const & aa,
72  std::string & oa1,
73  std::string & oa2,
74  std::string & oa3
75 )
76 {
77  switch( aa ) {
78  case aa_ala :
79  oa1 = "1HB";
80  oa2 = "CB";
81  oa3 = "2HB";
82  break;
83  case aa_cys :
84  oa1 = "SG";
85  oa2 = "CB";
86  oa3 = "CA";
87  break;
88  case aa_asp :
89  oa1 = "OD1";
90  oa2 = "CG";
91  oa3 = "OD2";
92  break;
93  case aa_glu :
94  oa1 = "OE1";
95  oa2 = "CD";
96  oa3 = "OE2";
97  break;
98  case aa_phe :
99  oa1 = "CG";
100  oa2 = "CD1";
101  oa3 = "CZ";
102  break;
103  case aa_gly :
104  oa1 = "HA1";
105  oa2 = "CA";
106  oa3 = "HA2";
107  break;
108  case aa_his :
109  oa1 = "ND1";
110  oa2 = "CE1";
111  oa3 = "NE2";
112  break;
113  case aa_ile :
114  oa1 = "CD1";
115  oa2 = "CG1";
116  oa3 = "CB";
117  break;
118  case aa_lys :
119  oa1 = "NZ";
120  oa2 = "CE";
121  oa3 = "CD";
122  break;
123  case aa_leu :
124  oa1 = "CD1";
125  oa2 = "CG";
126  oa3 = "CD2";
127  break;
128  case aa_met :
129  oa1 = "CG";
130  oa2 = "SD";
131  oa3 = "CE";
132  break;
133  case aa_asn :
134  oa1 = "OD1";
135  oa2 = "CG";
136  oa3 = "ND2";
137  break;
138  case aa_gln :
139  oa1 = "OE1";
140  oa2 = "CD";
141  oa3 = "NE2";
142  break;
143  case aa_arg :
144  oa1 = "NH1";
145  oa2 = "CZ";
146  oa3 = "NH2";
147  break;
148  case aa_ser :
149  oa1 = "HG";
150  oa2 = "OG";
151  oa3 = "CB";
152  break;
153  case aa_thr :
154  oa1 = "HG1";
155  oa2 = "OG1";
156  oa3 = "CB";
157  break;
158  case aa_val :
159  oa1 = "CG1";
160  oa2 = "CB";
161  oa3 = "CG2";
162  break;
163  case aa_trp :
164  oa1 = "CG";
165  oa2 = "CE2";
166  oa3 = "CZ3";
167  break;
168  case aa_tyr :
169  oa1 = "CG";
170  oa2 = "CD1";
171  oa3 = "CZ";
172  break;
173  default :
174  TR << "Did not find residue type for " << name_from_aa( aa ) << std::endl;
175  oa1 = "X";
176  oa2 = "X";
177  oa3 = "X";
178  }
179 
180  return;
181 }
182 
183 void
185  Pose & src_pose,
186  AA const & target_aa,
187  std::string & pdb_name,
188  int prot_pos,
189  std::vector< Size > & contacts
190 )
191 {
192 
193  // Make a temporary pose
194  Pose pose;
195 
196  // These need to be assigned be looking at the target amino acid
197  std::string atom1_name;
198  std::string atom2_name;
199  std::string atom3_name;
200  fetch_atom_names( target_aa, atom1_name, atom2_name, atom3_name );
201 
202  pose.append_residue_by_jump( src_pose.residue( prot_pos ), 1 );
203  for ( Size ic = 0 ; ic < contacts.size() ; ++ic ) {
204  Size dna_pos = contacts[ ic ];
205  pose.append_residue_by_jump( src_pose.residue( dna_pos ), 1 );
206  }
207  pose.conformation().insert_chain_ending( 1 );
208 
209  // Make a second pose _just for orientation_
210 
211  Pose centered_pose;
212  centered_pose = pose;
213 
214  // Fix key residues
215  centered_pose.set_xyz( id::AtomID( centered_pose.residue( 1 ).atom_index( atom1_name ), 1 ), atom1_vector );
216  centered_pose.set_xyz( id::AtomID( centered_pose.residue( 1 ).atom_index( atom2_name ), 1 ), atom2_vector );
217  centered_pose.set_xyz( id::AtomID( centered_pose.residue( 1 ).atom_index( atom3_name ), 1 ), atom3_vector );
218 
219  // Move the pose onto this fixed point
220 
221  core::kinematics::Stub end_stub(
222  centered_pose.residue( 1 ).atom( atom2_name ).xyz(),
223  centered_pose.residue( 1 ).atom( atom1_name ).xyz(),
224  centered_pose.residue( 1 ).atom( atom2_name ).xyz(),
225  centered_pose.residue( 1 ).atom( atom3_name ).xyz()
226  );
227 
228  core::kinematics::Stub start_stub(
229  pose.residue( 1 ).atom( atom2_name ).xyz(),
230  pose.residue( 1 ).atom( atom1_name ).xyz(),
231  pose.residue( 1 ).atom( atom2_name ).xyz(),
232  pose.residue( 1 ).atom( atom3_name ).xyz()
233  );
234 
235  for ( Size ires = 1 ; ires <= pose.total_residue() ; ++ires ) {
236  for ( Size iatom = 1 ; iatom <= pose.residue(ires).natoms() ; ++iatom ) {
237  pose.set_xyz( id::AtomID( iatom, ires ),
238  end_stub.local2global( start_stub.global2local( pose.residue(ires).xyz( iatom ) ) ) );
239  }
240  }
241 
242  std::string const output_path( option[ out::path::pdb ]() );
243  std::string delimiter( "_" );
244  //std::string extension( ".pdb" );
245  std::string motif_file_name( output_path + src_pose.residue_type( prot_pos ).name3() +
246  string_of( src_pose.pdb_info()->number( prot_pos ) ) +
247  string_of( src_pose.pdb_info()->chain( prot_pos ) ) + delimiter );
248 
249  for ( Size ic = 0 ; ic < contacts.size() ; ++ic ) {
250  Size dna_pos = contacts[ ic ];
251  motif_file_name = motif_file_name +
252  src_pose.residue_type( dna_pos ).name1() +
253  string_of( src_pose.pdb_info()->number( dna_pos ) ) +
254  string_of( src_pose.pdb_info()->chain( dna_pos ) ) + delimiter;
255  }
256 
257  //motif_file_name = motif_file_name + pdb_name + extension;
258  motif_file_name = motif_file_name + pdb_name;
259 
260  TR << "Writing " << motif_file_name << std::endl;
261  io::pdb::dump_pdb( pose, motif_file_name );
262 
263 }
264 
265 void
267 (
268  protocols::motifs::Motif const & m1,
269  protocols::motifs::Motif const & m2,
270  core::Real & dist_diff,
271  core::Real & angl_diff
272 
273 )
274 {
275 
276  // Handle when motifs have different residue pairs
277  if ( m1.restype_name1() != m2.restype_name1() ||
278  m1.restype_name2() != m2.restype_name2() ) {
279  dist_diff = angl_diff = 9999.0;
280  return;
281  }
282 
283  core::kinematics::jump_distance( m1.forward_jump(), m2.forward_jump(), dist_diff, angl_diff );
284 
285  return;
286 }
287 
288 
289 ///////////////////////////////////////////////////////////////////////////////
290 ///////////////////////////////////////////////////////////////////////////////
291 ///////////////////////////////////////////////////////////////////////////////
292 
293 Real
295  Pose & pose,
296  Size pos1,
297  Size pos2,
298  ScoreFunction & sf
299 )
300 {
301 
302  EnergyMap pack_map;
303 
304  sf.eval_ci_2b_sc_sc( pose.residue( pos1 ), pose.residue( pos2 ), pose, pack_map );
305 
306  return pack_map[ fa_atr ] + pack_map[ fa_rep ];
307 }
308 
309 Real
311  Pose & pose,
312  Size pos1,
313  Size pos2,
314  ScoreFunction & sf
315 )
316 {
317 
318  EnergyMap hbond_map;
319 
320  sf.eval_cd_2b_sc_sc( pose.residue( pos1 ), pose.residue( pos2 ), pose, hbond_map );
321 
322  return hbond_map[ hbond_sc ];
323 }
324 
325 Real
327  Pose & pose,
328  Size pos1,
329  Size pos2,
330  ScoreFunction & sf
331 )
332 {
333 
334  EnergyMap elec_map;
335 
336  sf.eval_cd_2b_sc_sc( pose.residue( pos1 ), pose.residue( pos2 ), pose, elec_map );
337 
338  return elec_map[ fa_elec ];
339 
340 }
341 
342 ///////////////////////////////////////////////////////////////////////////////
343 ///////////////////////////////////////////////////////////////////////////////
344 ///////////////////////////////////////////////////////////////////////////////
345 
346 void
348  Pose & pose,
349  std::string & pdb_name,
350  chemical::AA const target_aa,
351  protocols::motifs::MotifLibrary & motif_lib
352 )
353 {
354  int nres( pose.total_residue() );
355 
356  Real dist_threshold( option[ motifs::duplicate_dist_cutoff ] );
357  Real angl_threshold( option[ motifs::duplicate_angle_cutoff ] );
358 
365 
366  // Get a simple scorefunction to screen for thresholds
367  ScoreFunction scorefxn;
368  // These are talaris2013 values. Since the thresholds can be
369  // chosen arbitrarily, the only thing that is truly hard-wired
370  // here is the ratio between fa_atr and fa_rep, which are
371  // combined in get_packing_score above.
372  scorefxn.set_weight( fa_atr, 0.8 );
373  scorefxn.set_weight( fa_rep, 0.44 );
374  scorefxn.set_weight( hbond_sc, 1.1 );
375  scorefxn.set_weight( fa_elec, 0.7 );
376  scorefxn.setup_for_scoring( pose );
377 
378  // These need to be assigned be looking at the target amino acid
379  std::string atom1_name;
380  std::string atom2_name;
381  std::string atom3_name;
382  fetch_atom_names( target_aa, atom1_name, atom2_name, atom3_name );
383 
384  // Loop over positions, skipping things that aren't the target amino acid
385  for ( int aaoi_pos = 1 ; aaoi_pos <= nres ; ++aaoi_pos ) {
386  ResidueType const & res_type( pose.residue_type( aaoi_pos ) );
387 
388  if ( res_type.aa() != target_aa ) continue;
389 
390  std::vector< Size > contacts;
391 
392  Real total_pack_score( 0.0 );
393  Real total_hb_score( 0.0 );
394  Real check_hb_score( 0.0 );
395 
396  // Loop over all other positions
397  for ( int other_pos = 1 ; other_pos <= nres ; ++other_pos ) {
398  ResidueType const & other_type( pose.residue_type( other_pos ) );
399  if ( other_pos == aaoi_pos ) continue;
400 
401  // This prevents against getting the same interaction twice
402  if ( ( other_type.aa() == target_aa ) && ( other_pos < aaoi_pos ) ) continue;
403 
404  Real pack_score = get_packing_score( pose, other_pos, aaoi_pos, scorefxn );
405  Real hb_score = get_hbond_score( pose, other_pos, aaoi_pos, scorefxn );
406  Real elec_score = get_elec_score( pose, other_pos, aaoi_pos, scorefxn );
407 
408  int num_contacts( 0 );
409 
410  // Loop over all amino acid atoms
411  for ( Size this_atom = 1 ; this_atom <= res_type.nheavyatoms() ; ++this_atom ) {
412  // Loop over heavy atoms
413  for ( Size other_atom = other_type.first_sidechain_atom() ;
414  other_atom <= other_type.nheavyatoms() ; ++other_atom ) {
415 
416  Real const dis2( pose.residue( other_pos ).xyz( other_atom ).distance_squared(
417  pose.residue( aaoi_pos ).xyz( this_atom ) ) );
418  if ( dis2 < (3.9*3.9) ) {
419  ++num_contacts;
420  }
421 
422  }
423  } // End loop over atoms
424 
425  check_hb_score += hb_score;
426 
427  // if( num_contacts > 0 )
428  // if( pack_score <= -0.5 && hb_score > -0.2 )
429  // if( hb_score <= -1.00 ) {
430  // if( pack_score <= -1.00 ) {
431 
432  if ( ( pack_score > pack_min_threshold && pack_score < pack_max_threshold) &&
433  ( hb_score > hbond_min_threshold && hb_score < hbond_max_threshold) &&
434  ( elec_score > elec_min_threshold && elec_score < elec_max_threshold) ) {
435 
436  TR << "Energies between " << aaoi_pos << " and " << other_pos << " are pack: " << pack_score << " hbond: " << hb_score << std::endl;
437 
438  total_pack_score += pack_score;
439  total_hb_score += hb_score;
440 
441  // Store the motif info
442  std::string other_atom1;
443  std::string other_atom2;
444  std::string other_atom3;
445  fetch_atom_names( pose.aa( other_pos ), other_atom1, other_atom2, other_atom3 );
446  if ( other_atom1 != "X" ) {
447  contacts.push_back( other_pos );
448  protocols::motifs::SingleMotif new_motif( pose, aaoi_pos, atom1_name, atom2_name, atom3_name, other_pos, other_atom1, other_atom2, other_atom3 );
449  std::string motif_origin( pose.residue_type( aaoi_pos ).name3() + " " + string_of( pose.pdb_info()->number( aaoi_pos ) ) + string_of( pose.pdb_info()->chain( aaoi_pos ) ) + " and " + pose.residue_type( other_pos ).name3() + " " + string_of( pose.pdb_info()->number( other_pos ) ) + string_of( pose.pdb_info()->chain( other_pos ) ) + " pdb code: " + pdb_name );
450  new_motif.store_remark( motif_origin );
451  TR << new_motif.remark() << std::endl;
452 
453  protocols::motifs::Motif work_motif( new_motif );
454 
455  bool unique_motif( true );
456 
457  // Check against other motifs
458  core::Size check_i( 1 );
459  for ( protocols::motifs::MotifCOPs::const_iterator this_motif = motif_lib.begin() ; this_motif != motif_lib.end() ; ++this_motif ) {
460  core::Real dist_diff( 0.0 );
461  core::Real angl_diff( 0.0 );
462  motif_distances( work_motif, **this_motif, dist_diff, angl_diff );
463  // TR << "Motif dist from current motif " << check_i << " is translation " << dist_diff << " and angle " << angl_diff << std::endl;
464  check_i++;
465  if ( dist_diff < dist_threshold && angl_diff < angl_threshold ) {
466  unique_motif = false;
467  break;
468  }
469  }
470 
471  if ( unique_motif ) {
472  motif_lib.add_to_library( new_motif );
473  }
474  }
475  }
476 
477  } // End loop over amino acids
478 
479  if ( contacts.size() > 0 ) {
480  TR << "Outputing motif with " << contacts.size() << " contacts " << std::endl;
481  TR << "Energies are pack: " << total_pack_score << " hbond: " << total_hb_score << std::endl;
482  TR << "Check HB Energy is: " << check_hb_score << std::endl;
483  output_single_motif( pose, target_aa, pdb_name, aaoi_pos, contacts );
484  }
485 
486 
487  } // End loop over residues
488 
489  return;
490 }
491 
492 
493 ///////////////////////////////////////////////////////////////////////////////
494 ///////////////////////////////////////////////////////////////////////////////
495 ///////////////////////////////////////////////////////////////////////////////
496 
497 int
498 main( int argc, char * argv [] )
499 {
500  try {
501  using namespace pose;
502  using namespace conformation;
503  using namespace chemical;
504  using namespace import_pose;
505 
506  //using namespace core;
507  devel::init( argc, argv );
508 
509  // Get the name of the file that contains the pdb files for extraction
510  std::string const list_of_pdb_files( option[ in::file::l ]()[1] );
511  std::ifstream file_data( list_of_pdb_files.c_str() );
512 
513  // find the amino acid for which interaction motifs are requested
514  std::string target_3code( option[ motifs::target_aa ]() );
515  chemical::AA target_aa( aa_from_name( uppercase( target_3code ) ) );
516 
517  std::string pdb_code;
518 
519  protocols::motifs::MotifLibrary motif_lib;
520 
521  std::string const output_path( option[ out::path::pdb ]() );
522  std::string motif_path_and_file( output_path + "motif_library" );
523  std::ofstream motif_ostream( motif_path_and_file.c_str() );
524 
525  file_data >> pdb_code;
526  while ( !file_data.eof() ) {
527 
528  TR << "Working on pdb " << pdb_code << std::endl;
529  std::string const pdb_file_path( option[ in::path::pdb ]( 1 ) );
530  std::string pdb_file( pdb_file_path + pdb_code );
531  TR << "PDB file " << pdb_file << std::endl;
532 
533  Pose pose;
534  pose_from_file( pose, pdb_file , core::import_pose::PDB_file);
535 
536  process_for_motifs( pose, pdb_code, target_aa, motif_lib );
537 
538  file_data >> pdb_code;
539  }
540 
541  motif_ostream << motif_lib;
542  motif_ostream.close();
543 
544  return 0;
545  } catch ( utility::excn::EXCN_Base const & e ) {
546  std::cout << "caught exception" << e.msg() << std::endl;
547  return -1;
548  }
549 }
550 
551 ///////////////////////////////////////////////////////////////////////////////
RealOptionKey const duplicate_angle_cutoff("motifs:duplicate_angle_cutoff")
RealOptionKey const pack_min_threshold("motifs:pack_min_threshold")
#define THREAD_LOCAL
virtual std::string const msg() const
Definition: EXCN_Base.hh:70
RealOptionKey const elec_max_threshold("motifs:elec_max_threshold")
RealOptionKey const pack_max_threshold
RealOptionKey const elec_min_threshold
Vector const atom1_vector(1.500, 0.000, 0.000)
utility::keys::KeyLookup< KeyType >::const_iterator const_iterator
Key collection iterators.
RealOptionKey const pack_max_threshold("motifs:pack_max_threshold")
StringOptionKey const target_aa
RealOptionKey const duplicate_dist_cutoff("motifs:duplicate_dist_cutoff")
void init(int argc, char *argv[])
Command line init() version.
Definition: init.cc:23
Real get_packing_score(Pose &pose, Size pos1, Size pos2, ScoreFunction &sf)
core::pose::Pose Pose
Definition: supercharge.cc:101
FileVectorOptionKey const l("in:file:l")
void output_single_motif(Pose &src_pose, AA const &target_aa, std::string &pdb_name, int prot_pos, std::vector< Size > &contacts)
StringOptionKey const target_aa("motifs:target_aa")
Vector const atom3_vector(0.000, 1.500, 0.000)
PathOptionKey const pdb("out:path:pdb")
common derived classes for thrown exceptions
RealOptionKey const hbond_max_threshold
RealOptionKey const hbond_min_threshold
tuple scorefxn
Definition: PyMOL_demo.py:63
static THREAD_LOCAL basic::Tracer TR("extract_motifs")
int main(int argc, char *argv[])
void fetch_atom_names(core::chemical::AA const &aa, std::string &oa1, std::string &oa2, std::string &oa3)
xyzVector< Real > Vector
RealOptionKey const hbond_max_threshold("motifs:hbond_max_threshold")
Tracer IO system.
PathVectorOptionKey const pdb("in:path:pdb")
void process_for_motifs(Pose &pose, std::string &pdb_name, chemical::AA const target_aa, protocols::motifs::MotifLibrary &motif_lib)
Real get_hbond_score(Pose &pose, Size pos1, Size pos2, ScoreFunction &sf)
RealOptionKey const pack_min_threshold
RealOptionKey const elec_min_threshold("motifs:elec_min_threshold")
basic::options::OptionKeys collection
rule< Scanner, options_closure::context_t > options
Definition: Tag.cc:377
double Real
Definition: types.hh:39
RealOptionKey const elec_max_threshold
ocstream cout(std::cout)
Wrapper around std::cout.
Definition: ocstream.hh:287
Real get_elec_score(Pose &pose, Size pos1, Size pos2, ScoreFunction &sf)
Class for handling user debug/warnings/errors. Use instance of this class instead of 'std::cout' for ...
Definition: Tracer.hh:134
void motif_distances(protocols::motifs::Motif const &m1, protocols::motifs::Motif const &m2, core::Real &dist_diff, core::Real &angl_diff)
Vector const atom2_vector(0.000, 0.000, 0.000)
basic::options::OptionKeys collection
RealOptionKey const hbond_min_threshold("motifs:hbond_min_threshold")
std::string string_of(Fstring const &s)
string of an Fstring
Definition: Fstring.hh:2889
Program options global and initialization function.
BooleanOptionKey const chemical
platform::Size Size
Definition: random.fwd.hh:30
basic::options::OptionKeys collection
char & uppercase(char &c)
Uppercase a Character.
rule< Scanner, option_closure::context_t > option
Definition: Tag.cc:378