Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
rna_graft.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file
11 /// @brief
12 
13 
14 // libRosetta headers
15 #include <core/scoring/rms_util.hh>
16 #include <core/types.hh>
17 #include <core/conformation/Residue.hh>
18 #include <core/conformation/ResidueFactory.hh>
19 #include <core/chemical/ChemicalManager.hh>
20 #include <core/import_pose/import_pose.hh>
21 #include <core/kinematics/FoldTree.hh>
22 #include <core/pose/Pose.hh>
23 #include <core/pose/annotated_sequence.hh>
24 #include <core/pose/PDBInfo.hh>
25 #include <core/pose/util.hh>
26 #include <core/pose/rna/util.hh>
27 #include <protocols/farna/util.hh>
28 #include <protocols/farna/movers/RNA_LoopCloser.hh>
29 #include <protocols/stepwise/modeler/align/util.hh>
30 #include <protocols/viewer/viewers.hh>
31 #include <devel/init.hh>
32 #include <core/io/pdb/pose_io.hh>
33 #include <utility/vector1.hh>
34 #include <ObjexxFCL/format.hh>
36 
37 // C++ headers
38 //#include <cstdlib>
39 #include <fstream>
40 #include <iostream>
41 #include <string>
42 
43 // option key includes
44 
45 #include <basic/options/keys/out.OptionKeys.gen.hh>
46 #include <basic/options/keys/in.OptionKeys.gen.hh>
47 #include <basic/options/keys/chemical.OptionKeys.gen.hh>
49 
50 using namespace core;
51 using namespace basic;
52 using namespace protocols;
53 using namespace basic::options::OptionKeys;
54 
55 using utility::vector1;
56 
57 using io::pdb::dump_pdb;
58 
60 
61 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
62 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
63 // Quick superimposer for RNA puzzle stuff. Assumes that poses have one chain each!
64 // Could easily generalize in the future. Could also add grafting functionality [which I
65 // current do with a python script].
66 // -- Rhiju, Oct 2012
67 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
68 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
69 
70 //Definition of new OptionKeys
71 // these will be available in the top-level OptionKey namespace:
72 // i.e., OPT_KEY( Type, key ) --> OptionKey::key
73 // to have them in a namespace use OPT_1GRP_KEY( Type, grp, key ) --> OptionKey::grp::key
74 OPT_KEY( IntegerVector, superimpose_res )
75 OPT_KEY( IntegerVector, graft_res )
76 OPT_KEY( IntegerVector, unvirtualize_phosphate_res )
77 OPT_KEY( Boolean, graft_backbone_only )
78 
79 ////////////////////////////////////////////////////////////////////////////
80 // by default all 5' phosphates are virtualized -- keep some of them in.
81 void
82 unvirtualize_phosphates( pose::Pose & pose, utility::vector1< Size > const & unvirtualize_phosphate_residues ){
83  for ( Size n = 1; n <= pose.total_residue(); n++ ) {
84  if ( !unvirtualize_phosphate_residues.has_value( pose.pdb_info()->number( n ) ) ) continue;
85  core::pose::remove_variant_type_from_pose_residue( pose, core::chemical::VIRTUAL_PHOSPHATE, n );
86  }
87 }
88 
89 
90 ////////////////////////////////////////////////////////////////////////////
91 void
93 {
94 
95  using namespace core::chemical;
96  using namespace core::pose;
97  using namespace protocols::farna;
98 
99  ResidueTypeSetCOP rsd_set;
100  rsd_set = core::chemical::ChemicalManager::get_instance()->residue_type_set( FA_STANDARD );
101 
102  import_pose::pose_from_pdb( pose, *rsd_set, pdb_file );
103 
104  core::pose::rna::figure_out_reasonable_rna_fold_tree( pose );
105  core::pose::rna::virtualize_5prime_phosphates( pose );
106 
107  resnum.clear();
108  PDBInfoOP pdb_info = pose.pdb_info();
109  for ( Size n = 1; n <= pose.total_residue(); n++ ) resnum.push_back( pdb_info->number(n) );
110 
111 }
112 
113 
114 ////////////////////////////////////////////////////////////////////////////
115 Size
116 find_index( Size const val, utility::vector1< Size > const & vec ){
117  for ( Size n = 1; n <= vec.size(); n++ ) {
118  if ( val == vec[n] ) return n;
119  }
120  return 0;
121 }
122 
123 ////////////////////////////////////////////////////////////////////////////
124 bool
126  utility::vector1< Size >const & vec_big ){
127  for ( Size n = 1; n <= vec_sub.size(); n++ ) {
128  if ( !find_index(vec_sub[n], vec_big) ) return false;
129  }
130  return true;
131 }
132 
133 
134 ////////////////////////////////////////////////////////////////////////////
135 std::map< Size, Size >
136 calculate_res_map( utility::vector1< Size > const & superimpose_resnum,
137  utility::vector1< Size > const & resnum1,
138  utility::vector1< Size > const & resnum2 ){
139 
140  runtime_assert( is_subset( superimpose_resnum, resnum1 ) );
141  runtime_assert( is_subset( superimpose_resnum, resnum2 ) );
142 
143  std::map< Size, Size > res_map;
144 
145  for ( Size n = 1; n <= superimpose_resnum.size(); n++ ) {
146  Size const i = find_index( superimpose_resnum[n], resnum1 );
147  runtime_assert( i > 0 );
148  Size const j = find_index( superimpose_resnum[n], resnum2 );
149  runtime_assert( j > 0 );
150  res_map[i] = j;
151  }
152 
153  return res_map;
154 }
155 
156 
157 ////////////////////////////////////////////////////////////////////////////
158 void
159 superimpose_pdb( pose::Pose & pose1 /* the 'parent pose'*/,
160  pose::Pose & pose2 /*the one that got superimposed*/,
161  utility::vector1< Size > const & resnum1,
162  utility::vector1< Size > const & resnum2){
163 
164  using namespace core::pose;
165  using namespace basic::options;
166 
167  utility::vector1< Size > superimpose_resnum;
168  if ( option[superimpose_res].user() ) {
169  // check if all specified residues are actually in the file.
170  superimpose_resnum = option[ superimpose_res ]();
171  } else {
172  // find all shared residues
173  for ( Size n = 1; n <= resnum1.size(); n++ ) {
174  Size const n2 = find_index( resnum1[n], resnum2 );
175  if ( n2 == 0 ) continue;
176  superimpose_resnum.push_back( resnum1[n] );
177 
178  if ( pose1.aa(n) != pose2.aa(n2) ) {
179  std::cout << "Mismatch in sequence? " << std::endl;
180  std::cout << " residue " << resnum1[n] << " " << pose1.sequence()[n -1] << " at pose position " << n << std::endl;
181  std::cout << " residue " << resnum2[n2] << " " << pose2.sequence()[n2-1] << " at pose position " << n2 << std::endl;
182  runtime_assert( pose1.aa(n) == pose2.aa( n2 ) );
183  }
184  }
185  }
186 
187  if ( superimpose_resnum.size() > 0 ) {
188 
189  std::map< Size, Size > superimpose_res_map = calculate_res_map( superimpose_resnum, resnum2, resnum1 );
190 
191  // now superimpose based on this subset. Copied from RNA_DeNovoProtocol.cc
192  id::AtomID_Map< id::AtomID > const & alignment_atom_id_map_native =
193  protocols::stepwise::modeler::align::create_alignment_id_map_legacy( pose2, pose1, superimpose_res_map ); // perhaps this should move to toolbox.
194  core::scoring::superimpose_pose( pose2, pose1, alignment_atom_id_map_native );
195  } else {
196  std::cout << "WARNING: no overlap residues found or specified (by -superimpose_res)!!!! " << std::endl;
197  }
198 
199  unvirtualize_phosphates( pose1, option[ unvirtualize_phosphate_res ]() );
200  unvirtualize_phosphates( pose2, option[ unvirtualize_phosphate_res ]() );
201 
202 }
203 
204 
205 ////////////////////////////////////////////////////////////////////////////
206 void
207 output_superimposed_pdb( pose::Pose const & pose2, std::string const & pdb_file2 ){
208 
209  using namespace basic::options;
210 
211  std::string outfile = pdb_file2;
212 
213  std::string::size_type pos = pdb_file2.find( ".pdb", 0 );
214  std::string const new_prefix = ".sup.pdb";
215  if ( pos == std::string::npos ) {
216  utility_exit_with_message( "If you want to output a lores silent file, better use .out suffix ==> " + pdb_file2 );
217  }
218  outfile.replace( pos, new_prefix.length(), new_prefix );
219 
220  std::cout << "Outputting superimposed pdb to: " << outfile << std::endl;
221  pose2.dump_pdb( outfile );
222 
223 }
224 
225 
226 ////////////////////////////////////////////////////////////////////////////
227 void
229  pose::Pose & pose_target,
230  utility::vector1< Size > const & resnum1,
231  utility::vector1< Size > const & resnum_target,
232  utility::vector1< Size > const & resnum_graft,
233  bool const graft_backbone_only){
234 
235  using namespace core::conformation;
236  using namespace core::id;
237 
238  for ( Size i = 1; i <= pose1.total_residue(); i++ ) {
239 
240  Size q = find_index( resnum1[i], resnum_target );
241  if ( q == 0 ) continue;
242 
243  if ( resnum_graft.size() > 0 ) { //only copy over 'graft res'
244  Size r = find_index( resnum1[i], resnum_graft );
245  if ( r == 0 ) continue;
246  }
247 
248  Residue const & rsd_i = pose1.residue(i);
249 
250  for ( Size ii = 1; ii <= rsd_i.natoms(); ii++ ) {
251 
252  if ( rsd_i.is_virtual( ii ) ) continue;
253  if ( graft_backbone_only &&
254  ( ( ii >= rsd_i.first_sidechain_atom() && ii <= rsd_i.nheavyatoms() ) ||
255  ( ii >= rsd_i.first_sidechain_hydrogen() && ii <= rsd_i.natoms() ) ) ) continue;
256  std::string const & atom_name = rsd_i.atom_name( ii ) ;
257 
258  if ( !pose_target.residue( q ).has( atom_name ) ) continue;
259  Size const qq = pose_target.residue( q ).atom_index( atom_name );
260  pose_target.set_xyz( AtomID(qq, q), rsd_i.xyz( ii ) );
261 
262  }
263 
264  }
265 
266 }
267 
268 ////////////////////////////////////////////////////////////////////////////
269 void
271  pose::Pose & pose_target,
272  utility::vector1< Size > const & resnum1,
273  utility::vector1< Size > const & resnum_target) {
274  utility::vector1< Size > resnum_dummy;
275  graft_in_positions( pose1, pose_target, resnum1, resnum_target, resnum_dummy, false );
276 
277 }
278 
279 ////////////////////////////////////////////////////////////////////////////
280 void
282  utility::vector1< Size > const & resnum ) {
283 
284  using namespace core::kinematics;
285 
286  utility::vector1< Size > cutpoints;
287  for ( Size n = 2; n <= resnum.size(); n++ ) {
288  if ( resnum[n] == resnum[n-1] + 1 ) {
289  if ( ( pose.residue( n-1 ).xyz( " O3'" ) - pose.residue( n ).xyz( " P " ) ).length() > 2.0 ) {
290  FoldTree f = pose.fold_tree();
291  f.new_jump( n-1, n, n-1 );
292  f.set_jump_atoms( f.num_jump(),
293  core::chemical::rna::chi1_torsion_atom( pose.residue( n-1 ) ),
294  core::chemical::rna::chi1_torsion_atom( pose.residue( n ) ) );
295  pose.fold_tree( f );
296  correctly_add_cutpoint_variants( pose, n-1 );
297  cutpoints.push_back( n-1 );
298  }
299  }
300  }
301 
302  protocols::farna::movers::RNA_LoopCloser rna_loop_closer;
303  rna_loop_closer.apply( pose, cutpoints );
304 }
305 
306 ////////////////////////////////////////////////////////////////////////////
307 void
308 graft_pdb( pose::Pose const & pose1, pose::Pose const & pose2,
309  utility::vector1< Size > const & resnum1,
310  utility::vector1< Size > const & resnum2,
311  pose::Pose & pose_target,
312  utility::vector1< Size > & resnum_target ){
313 
314  using namespace core::pose;
315  using namespace core::chemical;
316  using namespace core::kinematics;
317  using namespace basic::options;
318  using namespace protocols::farna;
319 
320  pose_target.clear();
321  resnum_target.clear();
322 
323  // make a generic RNA pose with the desired sequence. God I hope this works. Need to figure out PDBInfo crap too.
324  // what's the sequence?
325 
326  utility::vector1< Size > graft_resnum;
327  if ( option[ graft_res ].user() ) {
328  graft_resnum = option[ graft_res ]();
329  } else {
330  graft_resnum = resnum2; // copy over everything in pose2.
331  }
332 
333  std::list< std::pair< Size, char > > resnum_seq_list; // need to use a list, since we can sort it.
334 
335  for ( Size n = 1; n <= pose1.total_residue(); n++ ) {
336  if ( !find_index( resnum1[n], graft_resnum ) ) {
337  resnum_seq_list.push_back( std::make_pair( resnum1[n], pose1.sequence()[ n-1 ] ) );
338  }
339  }
340 
341  for ( Size n = 1; n <= pose2.total_residue(); n++ ) {
342  if ( find_index( resnum2[n], graft_resnum ) ) {
343  resnum_seq_list.push_back( std::make_pair( resnum2[n], pose2.sequence()[ n-1 ] ) );
344  }
345  }
346 
347  resnum_seq_list.sort();
348 
349  std::string sequence_target;
350  for ( std::list< std::pair< Size, char > >::const_iterator iter = resnum_seq_list.begin(),
351  end = resnum_seq_list.end(); iter != end; ++iter ) {
352  resnum_target.push_back( iter->first );
353  sequence_target += iter->second;
354  }
355 
356  ResidueTypeSetCOP rsd_set;
357  rsd_set = core::chemical::ChemicalManager::get_instance()->residue_type_set( FA_STANDARD );
358  make_pose_from_sequence( pose_target, sequence_target, *rsd_set );
359 
360  // Copy in non-virtual atoms from pose1;
361  graft_in_positions( pose1, pose_target, resnum1, resnum_target );
362  // Copy in non-virtual atoms from pose2;
363  graft_in_positions( pose2, pose_target, resnum2, resnum_target, graft_resnum, option[ graft_backbone_only ]() );
364 
365  // close loops
366  close_loops( pose_target, resnum_target );
367 
368  // kind of ad hoc. let's see if it works.
369  core::pose::rna::figure_out_reasonable_rna_fold_tree( pose_target );
370  core::pose::rna::virtualize_5prime_phosphates( pose_target );
371 
372  // chain boundaries -- make sure they are virtualized [this could be semi-dangerous for group I ribozymes with
373  // discontinuous numbering -- so don't do it?
374  for ( Size n = 1; n <= resnum_target.size(); n++ ) {
375  if ( n == 1 || ( resnum_target[n] > resnum_target[n-1] + 1 ) ) {
376  add_variant_type_to_pose_residue( pose_target, core::chemical::VIRTUAL_PHOSPHATE, n );
377  }
378  }
379 
380  // Need to fill in PDBInfo!
381  PDBInfoOP pdb_info( new PDBInfo( pose_target ) );
382  pdb_info->set_numbering( resnum_target );
383  pose_target.pdb_info( pdb_info );
384  std::cout << pose_target.annotated_sequence() << std::endl;
385  unvirtualize_phosphates( pose_target, option[ unvirtualize_phosphate_res ]() );
386  std::cout << pose_target.annotated_sequence() << std::endl;
387 
388  std::string outfile = "graft.pdb";
389  if ( option[ out::file::o ].user() ) outfile = option[ out::file::o ]();
390 
391  std::cout << "Outputting: " << outfile << std::endl;
392  pose_target.dump_pdb( outfile );
393 
394 }
395 
396 ////////////////////////////////////////////////////////////////////////////
397 void
399 
400  using namespace basic::options;
401 
403  runtime_assert( pdb_files.size() >= 2 );
404 
405  pose::Pose pose1, pose2, pose_target;
406  utility::vector1< Size > resnum1, resnum2, resnum_target;
407 
408  // note that following will also try to estimate a fold_tree & virtualize 5' phosphates
409  // (which will be ignored in superposition).
410  get_pose_and_numbering( pdb_files[ 1 ], pose1, resnum1 );
411 
412  for ( Size i = 2; i <= pdb_files.size(); i++ ) {
413  get_pose_and_numbering( pdb_files[ i ], pose2, resnum2 );
414  superimpose_pdb( pose1 /* the 'parent pose'*/,
415  pose2 /*the one that got superimposed*/,
416  resnum1,
417  resnum2); // in principle could store resnum, pdb_file1 inside PDBInfo object!
418  graft_pdb( pose1, pose2, resnum1, resnum2, pose_target, resnum_target );
419 
420  resnum1 = resnum_target;
421  pose1 = pose_target;
422  }
423 
424 }
425 
426 
427 ///////////////////////////////////////////////////////////////
428 void*
429 my_main( void* )
430 {
431 
432  using namespace basic::options;
433 
435 
436  exit( 0 );
437 
438 }
439 
440 
441 ///////////////////////////////////////////////////////////////////////////////
442 int
443 main( int argc, char * argv [] )
444 {
445 
446  try {
447 
448  using namespace basic::options;
449 
450  utility::vector1< Size > blank_size_vector;
451 
452  //Uh, options?
453  NEW_OPT( superimpose_res, "residues over which to superimpose second pose onto first pose", blank_size_vector );
454  NEW_OPT( graft_res, "residues to graft from second pose", blank_size_vector );
455  NEW_OPT( unvirtualize_phosphate_res, "do not virtualize these phosphate (useful for cdiAMP)", blank_size_vector );
456  NEW_OPT( graft_backbone_only, "only graft backbone from 2nd PDB", false );
457 
458  ////////////////////////////////////////////////////////////////////////////
459  // setup
460  ////////////////////////////////////////////////////////////////////////////
461  devel::init(argc, argv);
462 
463  ////////////////////////////////////////////////////////////////////////////
464  // end of setup
465  ////////////////////////////////////////////////////////////////////////////
466 
467  protocols::viewer::viewer_main( my_main );
468 
469 
470  } catch ( utility::excn::EXCN_Base const & e ) {
471  std::cout << "caught exception " << e.msg() << std::endl;
472  return -1;
473  }
474 
475 }
#define utility_exit_with_message(m)
Exit with file + line + message.
Definition: exit.hh:47
void superimpose_pdb(pose::Pose &pose1, pose::Pose &pose2, utility::vector1< Size > const &resnum1, utility::vector1< Size > const &resnum2)
Definition: rna_graft.cc:159
void output_superimposed_pdb(pose::Pose const &pose2, std::string const &pdb_file2)
Definition: rna_graft.cc:207
virtual std::string const msg() const
Definition: EXCN_Base.hh:70
void close_loops(core::pose::Pose &pose, utility::vector1< Size > const &resnum)
Definition: rna_graft.cc:281
void * my_main(void *)
Definition: rna_graft.cc:429
utility::keys::KeyLookup< KeyType >::const_iterator const_iterator
Key collection iterators.
void init(int argc, char *argv[])
Command line init() version.
Definition: init.cc:23
BooleanOptionKey const user("options:user")
Definition: OptionKeys.hh:40
Size find_index(Size const val, utility::vector1< Size > const &vec)
Definition: rna_graft.cc:116
core::pose::Pose Pose
Definition: supercharge.cc:101
utility::keys::lookup::end< KeyType > const end
#define runtime_assert(_Expression)
Assert that the condition holds. Evaluated for both debug and release builds.
Definition: exit.hh:63
void graft_pdb(pose::Pose const &pose1, pose::Pose const &pose2, utility::vector1< Size > const &resnum1, utility::vector1< Size > const &resnum2, pose::Pose &pose_target, utility::vector1< Size > &resnum_target)
Definition: rna_graft.cc:308
void rna_superimpose_and_graft_test()
Definition: rna_graft.cc:398
numeric::xyzMatrix< Real > Matrix
Definition: rna_graft.cc:59
bool is_subset(utility::vector1< Size > const &vec_sub, utility::vector1< Size >const &vec_big)
Definition: rna_graft.cc:125
std::vector with 1-based indexing
Definition: vector1.fwd.hh:44
rule< Scanner, options_closure::context_t > options
Definition: Tag.cc:377
#define OPT_KEY(type, key)
list resnum
if line_edit[13:14]=='P': #Nucleic acid? Skip.
std::map< Size, Size > calculate_res_map(utility::vector1< Size > const &superimpose_resnum, utility::vector1< Size > const &resnum1, utility::vector1< Size > const &resnum2)
Definition: rna_graft.cc:136
#define NEW_OPT(akey, help, adef)
ocstream cout(std::cout)
Wrapper around std::cout.
Definition: ocstream.hh:287
BooleanOptionKey const exit("options:exit")
Definition: OptionKeys.hh:51
vector1: std::vector with 1-based indexing
xyzMatrix: Fast 3x3 xyz matrix template
void graft_in_positions(pose::Pose const &pose1, pose::Pose &pose_target, utility::vector1< Size > const &resnum1, utility::vector1< Size > const &resnum_target, utility::vector1< Size > const &resnum_graft, bool const graft_backbone_only)
Definition: rna_graft.cc:228
int main(int argc, char *argv[])
Definition: rna_graft.cc:443
void unvirtualize_phosphates(pose::Pose &pose, utility::vector1< Size > const &unvirtualize_phosphate_residues)
Definition: rna_graft.cc:82
platform::Size Size
Definition: random.fwd.hh:30
void get_pose_and_numbering(std::string const &pdb_file, pose::Pose &pose, utility::vector1< Size > &resnum)
Definition: rna_graft.cc:92
rule< Scanner, option_closure::context_t > option
Definition: Tag.cc:378