Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
ensemble_analysis.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file
11 /// @brief analyse sets of structures
12 /// @details This tool allows to superimpose structures using the wRMSD method [ Damm&Carlson, Biophys J (2006) 90:4558-4573 ]
13 /// @details Superimposed structures can be written as output pdbs and the converged residues can be determined
14 /// @author Oliver Lange
15 
16 #include <protocols/moves/Mover.hh>
17 #include <core/pose/Pose.hh>
18 #include <devel/init.hh>
19 #include <core/types.hh>
20 #include <core/chemical/ChemicalManager.hh>
21 
22 #include <protocols/jd2/JobDistributor.hh>
23 #include <protocols/jd2/NoOutputJobOutputter.hh>
24 #include <protocols/jd2/SilentFileJobInputter.hh>
25 
26 #include <protocols/toolbox/DecoySetEvaluation.hh>
27 
28 #include <protocols/toolbox/superimpose.hh>
29 #include <protocols/loops/Loop.hh>
30 #include <protocols/loops/Loops.hh>
31 #include <protocols/loops/LoopsFileIO.hh>
32 
33 // Utility headers
35 #include <utility/io/ozstream.hh>
36 
37 #include <utility/vector1.hh>
38 #include <basic/Tracer.hh>
39 #include <utility/exit.hh>
40 
41 #include <basic/options/keys/in.OptionKeys.gen.hh>
42 #include <basic/options/keys/out.OptionKeys.gen.hh>
43 
44 // ObjexxFCL includes
45 #include <ObjexxFCL/FArray1D.hh>
46 
47 #include <string>
48 
49 //Auto Headers
50 #include <core/import_pose/import_pose.hh>
52 
53 
54 static THREAD_LOCAL basic::Tracer tr( "main" );
55 
56 using namespace core;
57 using namespace protocols;
58 using namespace protocols::jd2;
59 //using namespace pose;
60 using namespace basic::options;
61 using namespace basic::options::OptionKeys;
62 using namespace toolbox;
63 using namespace ObjexxFCL;
64 //using namespace ObjexxFCL::format;
65 
66 OPT_KEY( Real, wRMSD )
67 OPT_KEY( Real, tolerance )
68 OPT_1GRP_KEY( File, rmsf, out )
69 OPT_1GRP_KEY( File, rigid, out )
70 OPT_1GRP_KEY( Real, rigid, cutoff )
71 OPT_1GRP_KEY( Integer, rigid, min_gap )
72 OPT_1GRP_KEY( File, rigid, in )
73 OPT_1GRP_KEY( Boolean, calc, rmsd )
74 
76  using namespace basic::options;
77  using namespace basic::options::OptionKeys;
78  OPT( in::file::s );
81  OPT( out::pdb );
82  NEW_OPT( wRMSD, "compute wRMSD with this sigma", 2 );
83  NEW_OPT( tolerance, "stop wRMSD iteration if <tolearance change in wSum", 0.00001);
84  // NEW_OPT( dump_fit, "output pdbs of fitted structures ", false );
85  NEW_OPT( rigid::in, "residues that are considered for wRMSD or clustering", "rigid.loop");
86  NEW_OPT( rmsf::out, "write rmsf into this file", "rmsf.dat" );
87  NEW_OPT( rigid::out, "write a RIGID definition", "rigid.loops" );
88  NEW_OPT( rigid::cutoff, "residues with less then loop_cutoff will go into RIGID definition", 2 );
89  NEW_OPT( rigid::min_gap, "don't have less then min_gap residues between rigid regions", 1 );
90  NEW_OPT( calc::rmsd, "calculate RMSD from mean, and average pairwise RMSD", false);
91 }
92 
93 // Forward
94 class RmsfMover;
95 
96 // Types
97 typedef utility::pointer::shared_ptr< RmsfMover > RmsfMoverOP;
98 typedef utility::pointer::shared_ptr< RmsfMover const > RmsfMoverCOP;
99 
100 class RmsfMover : public moves::Mover {
101 public:
102  virtual void apply( core::pose::Pose& );
103  std::string get_name() const { return "RmsfMover"; }
104 
105  DecoySetEvaluation eval_;
106 };
107 
109  if ( eval_.n_decoys_max() < eval_.n_decoys() + 1 ) {
110  eval_.reserve( eval_.n_decoys() + 100 );
111  }
112  eval_.push_back( pose );
113 }
114 
115 class FitMover : public moves::Mover {
116 public:
117  virtual void apply( core::pose::Pose& );
118  std::string get_name() const { return "FitMover"; }
119  FitMover() : first( true ), iref( 0 ) {};
122  bool first;
124 };
125 
126 typedef utility::pointer::shared_ptr< FitMover > FitMoverOP;
127 
129  if ( iref && first ) {
130  --iref;
131  return;
132  }
133  if ( first ) {
134  ref_pose_ = pose;
135  first = false;
136  return;
137  }
138  runtime_assert( !first );
139  CA_superimpose( weights_, ref_pose_, pose );
140 }
141 
142 void read_structures( RmsfMoverOP rmsf_tool ) {
143  //get silent-file job-inputter if available
144  SilentFileJobInputterOP sfd_inputter (
145  utility::pointer::dynamic_pointer_cast< protocols::jd2::SilentFileJobInputter > ( JobDistributor::get_instance()->job_inputter() ) );
146  if ( sfd_inputter ) {
147  //this allows very fast reading of CA coords
148  io::silent::SilentFileData const& sfd( sfd_inputter->silent_file_data() );
149  rmsf_tool->eval_.push_back_CA_xyz_from_silent_file( sfd, false /*store energies*/ );
150  } else {
151  JobDistributor::get_instance()->go( rmsf_tool, JobOutputterOP( new jd2::NoOutputJobOutputter ) );
152  }
153 }
154 
156  if ( !option[ rigid::in ].user() ) return natoms;
157  loops::PoseNumberedLoopFileReader loop_file_reader;
158  loop_file_reader.hijack_loop_reading_code_set_loop_line_begin_token( "RIGID" );
159  std::ifstream is( option[ rigid::in ]().name().c_str() );
160  if ( !is ) utility_exit_with_message( "[ERROR] Error opening RBSeg file '" + option[ rigid::in ]().name() + "'" );
161  loops::SerializedLoopList loops = loop_file_reader.read_pose_numbered_loops_file(is, option[ rigid::in ](), false );
162  loops::Loops rigid = loops::Loops( loops );
163  core::Size count_expected( 0 );
164  for ( Size i=1; i<=natoms; ++i ) {
165  if ( rigid.is_loop_residue( i ) ) {
166  weights( i )=1.0;
167  ++count_expected;
168  } else weights( i )=0.0;
169  }
170  return count_expected;
171 }
172 
173 Size superimpose( DecoySetEvaluation& eval, utility::vector1< Real >& rmsf_result, FArray1D_double& weights ) {
174  //Size icenter = 1;
175  eval.superimpose();
176  Size icenter = eval.wRMSD( option[ wRMSD ], option[ tolerance ](), weights );
177  eval.rmsf( rmsf_result );
178  return icenter;
179 }
180 
182  for ( Size i=1; i<=digits; ++i ) {
183  d*=10;
184  }
185  d = floor( d + 0.5 );
186  for ( Size i=1; i<=digits; ++i ) {
187  d/=10;
188  }
189  return d;
190 }
191 
192 
193 void run() {
194  using namespace basic::options;
195  using namespace basic::options::OptionKeys;
196  using namespace protocols::jd2;
197 
198  //store structures in rmsf_tool ...
199  RmsfMoverOP rmsf_tool( new RmsfMover );
200  read_structures( rmsf_tool );
201 
202  //initialize wRMSD weights with 1.0 unless we have -rigid:in file active
203  FArray1D_double weights( rmsf_tool->eval_.n_atoms(), 1.0 );
204  FArray1D_double input_weights( rmsf_tool->eval_.n_atoms(), 1.0 );
205  core::Size const expected_rigid_atoms( read_input_weights(weights,rmsf_tool->eval_.n_atoms()) );
206 
207  input_weights=weights;
208  rmsf_tool->eval_.set_weights( weights );
209 
210  //superposition...
211  utility::vector1< Real > rmsf_result;
212  Size icenter=0;
213  if ( option[ rmsf::out ].user()
214  || option[ rigid::out ].user()
215  || option[ out::pdb ].user()
216  || option[ calc::rmsd ].user()
217  ) { //output rmsf file
218  icenter=superimpose( rmsf_tool->eval_, rmsf_result, weights );
219  }
220 
221  //output rmsf file...
222  if ( option[ rmsf::out ].user() ) {
223  utility::io::ozstream os_rmsf( option[ rmsf::out ]());
224  Size ct = 1;
225  for ( utility::vector1<Real>::const_iterator it = rmsf_result.begin(); it != rmsf_result.end(); ++it, ++ct ) {
226  os_rmsf << ct << " " << *it << " " << weights( ct ) << std::endl;
227  }
228  }
229 
230  ///write rigid-out loops file
231  if ( option[ rigid::out ].user() ) { //create RIGID output
232  Real const cutoff ( option[ rigid::cutoff ] );
233  tr.Info << "make rigid with cutoff " << cutoff << " and write to file... " << option[ rigid::out ]() << std::endl;
234  loops::Loops rigid;
235  for ( Size i=1; i<=rmsf_result.size(); ++i ) {
236  if ( rmsf_result[ i ]<cutoff && input_weights( i ) > 0 ) rigid.add_loop( loops::Loop( i, i ), option[ rigid::min_gap ]() );
237  }
238  tr.Info << rigid << std::endl;
239  rigid.write_loops_to_file( option[ rigid::out ](), "RIGID" );
240  std::string fname = option[ rigid::out ]();
241  loops::Loops loops = rigid.invert( rmsf_result.size() );
242  loops.write_loops_to_file( fname + ".loopfile" , "LOOP" );
243  }
244 
245  ///write superimposed structures
246  if ( option[ out::pdb ]() ) {
247  FitMoverOP fit_tool( new FitMover );
248  fit_tool->weights_ = weights;
249  if ( icenter > 1 ) {
250  fit_tool->iref = icenter - 1; //ignores the first iref structures before it takes the pose as reference pose.
251  JobDistributor::get_instance()->restart();
252  JobDistributor::get_instance()->go( fit_tool, JobOutputterOP( new jd2::NoOutputJobOutputter ) );
253  }
254  JobDistributor::get_instance()->restart();
255  JobDistributor::get_instance()->go( fit_tool );
256  if ( option[ in::file::native ].user() ) {
258  core::import_pose::pose_from_pdb( native_pose,
259  *core::chemical::ChemicalManager::get_instance()->residue_type_set( chemical::CENTROID ), option[ in::file::native ]() );
260  fit_tool->apply( native_pose );
261  native_pose.dump_pdb( "fit_native.pdb");
262  }
263  }
264 
265  //calculate RMSD from mean
266  if ( option[ calc::rmsd ]() ) {
267  Real const cutoff ( option[ rigid::cutoff ] );
268  tr.Info << "make rigid with cutoff " << cutoff << " and calculate RMSD" << std::endl;
269  FArray1D_double weights( rmsf_tool->eval_.n_atoms(), 0.0 );
270  //get weights 0 or 1 for residues that take part in RMSD calculation
271  Size ct( 0 );
272  loops::Loops rigid;
273  for ( Size i=1; i<=rmsf_result.size(); ++i ) {
274  if ( rmsf_result[ i ]<cutoff && input_weights( i ) > 0 ) {
275  rigid.add_loop( loops::Loop( i, i ), option[ rigid::min_gap ]() );
276  weights(i)=1.0;
277  ++ct;
278  }
279  }
280  tr.Info << "computed RMSD on " << std::endl;;
281  rigid.write_loops_to_stream( tr.Info, "RIGID" );
282  tr.Info << std::endl;
283 
284  FArray2D_double average_structure( 3, rmsf_tool->eval_.n_atoms(), 0.0 );
285  rmsf_tool->eval_.compute_average_structure( average_structure );
286  Real mean_rmsd( 0.0 );
287  for ( Size n=1; n<=rmsf_tool->eval_.n_decoys(); n++ ) {
288  FArray2D_double xx( FArray2P_double( rmsf_tool->eval_.coords()(1, 1, n), 3, rmsf_tool->eval_.n_atoms() ) ); //, 3, rmsf_tool->eval_.n_atoms() );
289  mean_rmsd+=1.0/rmsf_tool->eval_.n_decoys()*
290  rmsf_tool->eval_.rmsd( weights, average_structure, xx );
291  }
292  tr.Info << "number of atoms from " << expected_rigid_atoms << " for mean RMSD: " << ct << std::endl;
293  tr.Info << "fraction of residues converged: " << round(1.0*ct/expected_rigid_atoms,2) << std::endl;
294  tr.Info << "mean RMSD to average structure: " << round(mean_rmsd,2) << std::endl;
295  Size ct_pairs( 0 );
296  Real rmsd_pairs( 0.0 );
297  for ( Size n=1; n<=rmsf_tool->eval_.n_decoys(); n++ ) {
298  for ( Size m=n+1; m<=rmsf_tool->eval_.n_decoys(); m++ ) {
299  ++ct_pairs;
300  FArray2D_double xx1( FArray2P_double( rmsf_tool->eval_.coords()(1, 1, n), 3, rmsf_tool->eval_.n_atoms() ) ); //, 3, rmsf_tool->eval_.n_atoms() );
301  FArray2D_double xx2( FArray2P_double( rmsf_tool->eval_.coords()(1, 1, m), 3, rmsf_tool->eval_.n_atoms() ) ); //, 3, rmsf_tool->eval_.n_atoms() );
302  rmsd_pairs+=rmsf_tool->eval_.rmsd( weights, xx1, xx2 );
303  }
304  }
305  core::Real fraction=1.0*ct/expected_rigid_atoms;
306  tr.Info << "mean pairwise RMSD: " << round(rmsd_pairs/ct_pairs,2) << std::endl;
307  tr.Info << "mean pairwise RMSD * superposed_fraction_of_atoms^-1: " << round(rmsd_pairs/ct_pairs/fraction,2 ) << std::endl;
308  }
309  return;
310 }
311 
312 /////////////////////////////////////////////////////////////////////////////////////////////////////////
313 /// =============================== MAIN ============================================================
314 /////////////////////////////////////////////////////////////////////////////////////////////////////////
315 int
316 main( int argc, char * argv [] )
317 {
318  try {
320  devel::init( argc, argv );
321 
322  try{
323  run();
324  } catch ( utility::excn::EXCN_Base& excn ) {
325  excn.show( std::cerr );
326  }
327  } catch ( utility::excn::EXCN_Base const & e ) {
328  std::cout << "caught exception " << e.msg() << std::endl;
329  return -1;
330  }
331 
332  return 0;
333 }
334 
335 
ocstream cerr(std::cerr)
Wrapper around std::cerr.
Definition: ocstream.hh:290
#define utility_exit_with_message(m)
Exit with file + line + message.
Definition: exit.hh:47
#define THREAD_LOCAL
void register_options()
virtual std::string const msg() const
Definition: EXCN_Base.hh:70
DecoySetEvaluation eval_
static THREAD_LOCAL basic::Tracer tr("main")
virtual void apply(core::pose::Pose &)
void init(int argc, char *argv[])
Command line init() version.
Definition: init.cc:23
BooleanOptionKey const user("options:user")
Definition: OptionKeys.hh:40
std::string get_name() const
core::pose::Pose Pose
Definition: supercharge.cc:101
virtual void apply(core::pose::Pose &)
#define runtime_assert(_Expression)
Assert that the condition holds. Evaluated for both debug and release builds.
Definition: exit.hh:63
FArray2P< double > FArray2P_double
Definition: FArray2P.fwd.hh:48
TracerProxy Info
Definition: Tracer.hh:262
void read_structures(RmsfMoverOP rmsf_tool)
virtual void show(std::ostream &) const =0
utility::pointer::shared_ptr< FitMover > FitMoverOP
core::Size read_input_weights(FArray1D_double &weights, Size natoms)
Program exit functions and macros.
Tracer IO system.
rule< Scanner, options_closure::context_t > options
Definition: Tag.cc:377
super::const_iterator const_iterator
Definition: vector1.hh:62
#define OPT_KEY(type, key)
Output file stream wrapper for uncompressed and compressed files.
list native
Definition: ContactMap.py:108
double Real
Definition: types.hh:39
#define NEW_OPT(akey, help, adef)
ocstream cout(std::cout)
Wrapper around std::cout.
Definition: ocstream.hh:287
vector1: std::vector with 1-based indexing
core::Real round(core::Real d, core::Size digits)
core::pose::Pose ref_pose_
base class for Exception system
Class for handling user debug/warnings/errors. Use instance of this class instead of 'std::cout' for ...
Definition: Tracer.hh:134
int main(int argc, char *argv[])
=============================== MAIN ============================================================ ...
rule< Scanner, string_closure::context_t > name
Definition: Tag.cc:376
utility::pointer::shared_ptr< RmsfMover > RmsfMoverOP
std::string get_name() const
Size superimpose(DecoySetEvaluation &eval, utility::vector1< Real > &rmsf_result, FArray1D_double &weights)
ozstream: Output file stream wrapper for uncompressed and compressed files
Definition: ozstream.hh:53
#define OPT_1GRP_KEY(type, grp, key)
#define OPT(akey)
platform::Size Size
Definition: random.fwd.hh:30
int const silent
Named verbosity levels.
utility::pointer::shared_ptr< RmsfMover const > RmsfMoverCOP
rule< Scanner, option_closure::context_t > option
Definition: Tag.cc:378
void run()