Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
recces_turner.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file
11 /// @brief
12 
13 // libRosetta headers
14 #include <core/init/init.hh>
15 
16 #include <core/pose/Pose.hh>
17 #include <core/pose/util.hh>
18 #include <core/pose/annotated_sequence.hh>
19 
20 #include <core/scoring/ScoreFunctionFactory.hh>
21 #include <core/scoring/ScoreFunction.hh>
22 #include <core/scoring/ScoreType.hh>
23 
24 #include <protocols/viewer/viewers.hh>
25 
26 #include <protocols/moves/SimulatedTempering.hh>
27 #include <protocols/stepwise/modeler/rna/helix/RNA_HelixAssembler.hh>
28 #include <protocols/stepwise/sampler/rna/RNA_MC_Suite.hh>
29 #include <protocols/stepwise/sampler/rna/RNA_MC_MultiSuite.hh>
30 
31 #include <utility/io/ozstream.hh>
32 
33 // C++ headers
34 #include <string>
35 
36 // option key includes
37 #include <basic/options/option.hh>
39 #include <basic/options/keys/out.OptionKeys.gen.hh>
40 #include <basic/options/keys/score.OptionKeys.gen.hh>
41 #include <basic/options/keys/in.OptionKeys.gen.hh>
42 
43 // Exception handling
45 
46 using namespace core;
47 using namespace core::pose;
48 using namespace protocols;
49 using namespace protocols::stepwise;
50 using namespace basic::options::OptionKeys;
51 using namespace basic::options;
52 
53 OPT_KEY( String, seq1 )
54 OPT_KEY( String, seq2 )
55 OPT_KEY( Integer, n_cycle )
56 OPT_KEY( Real, a_form_range )
57 OPT_KEY( RealVector, temps )
58 OPT_KEY( RealVector, st_weights )
59 OPT_KEY( String, out_prefix )
60 OPT_KEY( Boolean, save_score_terms )
61 OPT_KEY( Boolean, dump_pdb )
62 OPT_KEY( Integer, n_intermediate_dump )
63 //////////////////////////////////////////////////////////////////////////////
64 // Histogram class for accumulating samples
65 class Histogram {
66  public:
67  Histogram( Real const min, Real const max, Real const spacing ):
68  min_( min ),
69  max_( max ),
70  spacing_( spacing )
71  {
72  runtime_assert( max > min );
73  n_elem_ = static_cast<Size>( ( max - min ) / spacing ) + 1;
74  for ( Size i = 1; i <= n_elem_; ++i ) hist_.push_back( 0 );
75 }
76 
77 void add( float const value, Size const n_items ) {
78  Size bin_index;
79  if ( value <= min_ ) {
80  bin_index = 1;
81  } else if ( value >= max_ ) {
82  bin_index = n_elem_;
83  } else {
84  bin_index = static_cast<Size>( ( value - min_ ) / spacing_ ) + 1;
85  }
86  hist_[bin_index] += n_items;
87 }
88 
89 void clear() {
90  for ( Size i = 0; i <= n_elem_; ++i ) hist_[i] = 0;
91 }
92 
95  for ( Size i = 1; i <= n_elem_; ++i ) {
96  scores.push_back( min_ + spacing_ * ( i - 0.5 ) );
97  }
98  return scores;
99 }
100 
101 utility::vector1<Size> get_hist() const { return hist_; }
102 
103 private:
104 Real const min_, max_, spacing_;
107 };
108 //////////////////////////////////////////////////////////////////////////////
109 // score types to be recorded
111  using namespace scoring;
112  static utility::vector1<ScoreType> scoretypes;
113  if ( !scoretypes.empty() ) return scoretypes;
114 
115  // List of score types to be cached
116  scoretypes.push_back( fa_atr );
117  scoretypes.push_back( fa_rep );
118  scoretypes.push_back( fa_intra_rep );
119  scoretypes.push_back( fa_stack );
120  scoretypes.push_back( rna_torsion );
121  scoretypes.push_back( hbond_sc );
122  scoretypes.push_back( lk_nonpolar );
123  scoretypes.push_back( geom_sol_fast );
124  scoretypes.push_back( stack_elec );
125  scoretypes.push_back( fa_elec_rna_phos_phos );
126  return scoretypes;
127 }
128 //////////////////////////////////////////////////////////////////////////////
131  Pose & pose,
132  scoring::ScoreFunctionOP const scorefxn
133 ) {
134  using namespace scoring;
135  scores.clear();
136  scores.push_back( ( *scorefxn )( pose ) );
138  for ( Size i = 1; i<= score_types.size(); ++i ) {
139  scores.push_back( scorefxn->score_by_scoretype(
140  pose, score_types[i], false /*weighted*/ ) );
141  }
142 }
143 //////////////////////////////////////////////////////////////////////////////
146  Size const count,
148 ) {
149  using namespace scoring;
150  data.push_back( count );
151  data.insert( data.end(), scores.begin(), scores.end() );
152 }
153 //////////////////////////////////////////////////////////////////////////////
154 // Simple heuristic for gaussian stdev
155 Real gaussian_stdev( Real const n_rsd, Real const temp, bool const is_bp ) {
156  // Negative temp is infinite
157  if ( temp < 0 ) return -1;
158  if ( is_bp ) return 5 * temp / n_rsd;
159  return 6 * pow( temp / n_rsd, 0.75 );
160 }
161 //////////////////////////////////////////////////////////////////////////////
162 template<typename T>
164  std::string const & out_filename,
165  utility::vector1<T> const & out_vector
166 ) {
168  out_filename.c_str(), std::ios::out | std::ios::binary );
169  out.write( (const char*) &out_vector[1], sizeof(T) * out_vector.size() );
170  out.close();
171 }
172 //////////////////////////////////////////////////////////////////////////////
173 template<typename T>
175  std::string const & out_filename,
176  Size const dim1,
177  Size const dim2,
178  utility::vector1<T> const & out_vector
179 ) {
181  out_filename.c_str(), std::ios::out | std::ios::binary );
182  runtime_assert( dim1 * dim2 == out_vector.size() );
183  out.write( (const char*) &dim1, sizeof(Size) );
184  out.write( (const char*) &dim2, sizeof(Size) );
185  out.write( (const char*) &out_vector[1], sizeof(T) * out_vector.size() );
186  out.close();
187 }
188 //////////////////////////////////////////////////////////////////////////////
189 
191  sampler::rna::RNA_MC_MultiSuite & sampler,
192  moves::SimulatedTempering const & tempering,
193  utility::vector1<Size> const & bp_rsd,
194  utility::vector1<Size> const & dangling_rsd
195 ) {
196  Size const n_rsd( bp_rsd.size() + dangling_rsd.size() );
197  Real const temp( tempering.temperature() );
198  Real const bp_stdev( gaussian_stdev( n_rsd, temp, true ) );
199  Real const dangling_stdev( gaussian_stdev( n_rsd, temp, false ) );
200  for ( Size i = 1; i <= bp_rsd.size(); ++ i ) {
201  sampler.set_gaussian_stdev( bp_stdev, bp_rsd[i] );
202  }
203  for ( Size i = 1; i <= dangling_rsd.size(); ++ i ) {
204  sampler.set_gaussian_stdev( dangling_stdev, dangling_rsd[i] );
205  }
206 }
207 //////////////////////////////////////////////////////////////////////////////
208 PoseOP pose_setup(
209  std::string const & seq1,
210  std::string const & seq2,
211  Size const len1
212 ) {
213  protocols::stepwise::modeler::rna::helix::RNA_HelixAssembler assembler;
214  assembler.use_phenix_geo( true );
215  PoseOP pose( assembler.build_init_pose( seq1, seq2 ) );
216  add_variant_type_to_pose_residue( *pose, chemical::VIRTUAL_PHOSPHATE, 1 );
217  if ( seq1 != "" && seq2 != "" ) {
218  add_variant_type_to_pose_residue(
219  *pose, chemical::VIRTUAL_PHOSPHATE, len1 + 1 );
220  }
221  return pose;
222 }
223 ////////////////////////////////////////////////////////////////////////////////
225  using namespace scoring;
227  return score_types.size() + 2;
228 }
229 //////////////////////////////////////////////////////////////////////////////
230 void
231 MC_run () {
232  using namespace protocols::stepwise::sampler::rna;
233  using namespace protocols::moves;
234  using namespace scoring;
235 
236  clock_t const time_start( clock() );
237 
238  utility::vector1<Real> const & temps_( option[ temps ]() );
239  runtime_assert( temps_.size() != 0 );
240 
241  utility::vector1<Real> weights_;
242  utility::vector1<Real> const & orig_weights( option[ st_weights ]() );
243  if ( temps_.size() != orig_weights.size() ) {
244  weights_.push_back( 0 );
245  }
246  weights_.insert( weights_.end(), orig_weights.begin(),
247  orig_weights.end() );
248  runtime_assert( temps_.size() == weights_.size() );
249 
250  Size const n_cycle_( option[n_cycle]() );
251  std::string const & seq1_( option[seq1]() );
252  std::string const & seq2_( option[seq2]() );
253  Size const len1( get_sequence_len( seq1_ ) );
254  Size const len2( get_sequence_len( seq2_ ) );
255 
256  // Score function setup
257  ScoreFunctionOP scorefxn;
258  if ( option[ score::weights ].user() ) {
259  scorefxn = get_score_function();
260  } else {
261  scorefxn = ScoreFunctionFactory::create_score_function( RNA_HIRES_WTS );
262  }
263 
264  // Pose setup
265  Pose pose( *pose_setup( seq1_, seq2_, len1 ) );
266 
267  // Figure out bp and dangling residues
268  utility::vector1<Size> bp_rsd, dangling_rsd;
269  Size const n_bp( std::min( len1, len2 ) );
270  Size const total_len( len1 + len2 );
271  for ( Size i = 1; i <= total_len; ++i ) {
272  if ( i > n_bp && i <= total_len - n_bp ) {
273  dangling_rsd.push_back( i );
274  } else {
275  bp_rsd.push_back( i );
276  }
277  }
278 
279  // Sampler setup
280  RNA_MC_MultiSuite sampler;
281  for ( Size i = 1; i <= total_len; ++i ) {
282  bool const sample_near_a_form( bp_rsd.has_value( i ) );
283  if ( i == 1 || ( i > len1 && i != total_len ) ) {
284  RNA_MC_SuiteOP suite_sampler( new RNA_MC_Suite( i ) );
285  suite_sampler->set_sample_bb( i != 1 );
286  suite_sampler->set_sample_lower_nucleoside( true );
287  suite_sampler->set_sample_upper_nucleoside( false );
288  suite_sampler->set_sample_near_a_form( sample_near_a_form );
289  suite_sampler->set_a_form_range( option[a_form_range]() );
290  sampler.add_external_loop_rotamer( suite_sampler );
291  } else {
292  RNA_MC_SuiteOP suite_sampler( new RNA_MC_Suite( i - 1 ) );
293  suite_sampler->set_sample_bb( len1 == total_len || i != total_len );
294  suite_sampler->set_sample_lower_nucleoside( false );
295  suite_sampler->set_sample_upper_nucleoside( true );
296  suite_sampler->set_sample_near_a_form( sample_near_a_form );
297  suite_sampler->set_a_form_range( option[a_form_range]() );
298  sampler.add_external_loop_rotamer( suite_sampler );
299  }
300  }
301  sampler.init();
302  sampler.apply( pose );
303 
304  // Simulated Tempering setup
305  SimulatedTempering tempering( pose, scorefxn, temps_, weights_ );
306  // tempering.set_rep_cutoff( 100 );
307  set_gaussian_stdev( sampler, tempering, bp_rsd, dangling_rsd );
308 
309  // Setup for data saving for output
310  Size curr_counts( 1 );
312  update_scores( scores, pose, scorefxn );
313  utility::vector1<float> const null_arr_;
315  temps_.size(), null_arr_ );
316 
317  Real const min( -100.05 ), max( 800.05 ), spacing( 0.1 );
318  Histogram null_hist( min, max, spacing);
319  utility::vector1<Histogram> hist_list( temps_.size(), null_hist );
320 
321  // Useful coounters and variables during the loop
322  Size n_accept_total( 0 ), n_t_jumps_accept( 0 );
323  Size const t_jump_interval( 10 );
324  Size const n_t_jumps( n_cycle_ / t_jump_interval );
325  Size temp_id( tempering.temp_id() );
326  bool const is_save_scores( option[save_score_terms]() );
327 
328  // Min-score pose
329  Pose min_pose = pose;
330  Real min_score( 99999 );
331 
332  std::cout << "Start the main sampling loop." << std::endl;
333  if ( option[dump_pdb]() ) pose.dump_pdb( "init.pdb" );
334 
335  Size const n_dump = option[n_intermediate_dump]();
336  Size curr_dump = 1;
337 
338 
339  // Main sampling cycle
340  for ( Size n = 1; n <= n_cycle_; ++n ) {
341  ++sampler;
342  sampler.apply( pose );
343  if ( tempering.boltzmann( pose ) || n == n_cycle_ ) {
344  if ( is_save_scores ) fill_data( data[temp_id], curr_counts, scores );
345  ++n_accept_total;
346  hist_list[temp_id].add( scores[1], curr_counts );
347  update_scores( scores, pose, scorefxn );
348  if ( n == n_cycle_ ) break;
349  sampler.update();
350  curr_counts = 1;
351  if ( option[dump_pdb]() && scores[1] < min_score ) {
352  min_score = scores[1];
353  min_pose = pose;
354  }
355  if ( n_dump != 0 && n * (n_dump + 1) / double(n_cycle_) >= curr_dump ) {
356  std::ostringstream oss;
357  oss << "intermediate" << '_' << curr_dump << ".pdb";
358  pose.dump_pdb(oss.str());
359  ++curr_dump;
360  }
361  } else {
362  ++curr_counts;
363  }
364 
365  if ( n % t_jump_interval == 0 && tempering.t_jump() ) {
366  ++n_t_jumps_accept;
367  if ( is_save_scores ) fill_data( data[temp_id], curr_counts, scores );
368  hist_list[temp_id].add( scores[1], curr_counts );
369  curr_counts = 1;
370  set_gaussian_stdev( sampler, tempering, bp_rsd, dangling_rsd );
371  temp_id = tempering.temp_id();
372  }
373  }
374  if ( option[dump_pdb]() ) {
375  pose.dump_pdb( "end.pdb" );
376  min_pose.dump_pdb( "min.pdb" );
377  scorefxn->show( min_pose );
378  }
379 
380  // Output simple statistics and the data
381  //pose.dump_pdb("final.pdb");
382 
383  std::cout << "n_cycles: " << n_cycle_ << std::endl;
384  std::cout << "Accept rate: " << double( n_accept_total ) / n_cycle_
385  << std::endl;
386  std::cout << "T_jump accept rate: " << double( n_t_jumps_accept ) / n_t_jumps
387  << std::endl;
388  Real const time_in_test = static_cast<Real>( clock() - time_start )
389  / CLOCKS_PER_SEC;
390  std::cout << "Time in sampler: " << time_in_test << std::endl;
391 
392  for ( Size i = 1; i <= temps_.size(); ++i ) {
393  if ( is_save_scores ) {
394  std::ostringstream oss;
395  oss << option[out_prefix]() << '_' << std::fixed << std::setprecision(2)
396  << temps_[i] << ".bin.gz";
397  Size const data_dim2( data_dim() );
398  Size const data_dim1( data[i].size() / data_dim2 );
399  vector2disk_in2d( oss.str(), data_dim1, data_dim2, data[i] );
400  }
401  std::ostringstream oss;
402  oss << option[out_prefix]() << '_' << std::fixed << std::setprecision(2)
403  << temps_[i] << ".hist.gz";
404  utility::vector1<Size> const & hist( hist_list[i].get_hist() );
405  utility::vector1<Real> const & scores( hist_list[i].get_scores() );
406  vector2disk_in1d( oss.str(), hist );
407 
408  std::ostringstream oss1;
409  oss1 << option[out_prefix]() << "_hist_scores.gz";
410  vector2disk_in1d( oss1.str(), scores );
411  }
412 }
413 //////////////////////////////////////////////////////////////////////////////
414 void*
415 my_main( void* )
416 {
417  MC_run();
418  exit( 0 );
419 }
420 //////////////////////////////////////////////////////////////////////////////
421 int
422 main( int argc, char * argv [] )
423 {
424  using namespace core;
425  utility::vector1< Real > null_real_vector;
426  NEW_OPT( seq1, "sequence 1 to model, 3' to 5' ", "" );
427  NEW_OPT( seq2, "sequence 2 to model, 3' to 5' ", "" );
428  NEW_OPT( n_cycle, "cycle number for random sampling", 0 );
429  NEW_OPT( a_form_range, "Range of sampling near A-form for duplexes.", 60.0);
430  NEW_OPT( temps, "Simulated tempering temperatures", null_real_vector );
431  NEW_OPT( st_weights, "Simulated tempering weights", null_real_vector );
432  NEW_OPT( out_prefix, "prefix for the out file", "turner" );
433  NEW_OPT( save_score_terms,
434  "Save scores and individual score terms"
435  " of all sampled conformers", false );
436  NEW_OPT( dump_pdb, "Dump pdb files", false );
437  NEW_OPT( n_intermediate_dump,
438  "Number of intermediate conformations to be dumped", 0 );
439 
440  try {
441  core::init::init ( argc, argv );
442  protocols::viewer::viewer_main( my_main );
443  } catch ( utility::excn::EXCN_Base const & e ) {
444  std::cout << "caught exception " << e.msg() << std::endl;
445  return -1;
446  }
447  return 0;
448 }
449 
static T min(T x, T y)
Definition: Svm.cc:16
virtual std::string const msg() const
Definition: EXCN_Base.hh:70
Real gaussian_stdev(Real const n_rsd, Real const temp, bool const is_bp)
std::string String
Definition: remodel.cc:69
dictionary size
Definition: amino_acids.py:44
void set_gaussian_stdev(sampler::rna::RNA_MC_MultiSuite &sampler, moves::SimulatedTempering const &tempering, utility::vector1< Size > const &bp_rsd, utility::vector1< Size > const &dangling_rsd)
utility::vector1< Size > hist_
Size data_dim()
BooleanOptionKey const user("options:user")
Definition: OptionKeys.hh:40
core::pose::Pose Pose
Definition: supercharge.cc:101
utility::vector1< Size > get_hist() const
#define runtime_assert(_Expression)
Assert that the condition holds. Evaluated for both debug and release builds.
Definition: exit.hh:63
common derived classes for thrown exceptions
tuple scorefxn
Definition: PyMOL_demo.py:63
void clear()
member1 value
Definition: Tag.cc:296
void fill_data(utility::vector1< float > &data, Size const count, utility::vector1< float > &scores)
Histogram(Real const min, Real const max, Real const spacing)
rule< Scanner, options_closure::context_t > options
Definition: Tag.cc:377
#define OPT_KEY(type, key)
utility::vector1< scoring::ScoreType > const & get_scoretypes()
Output file stream wrapper for uncompressed and compressed files.
void close()
Close the ofstream and reset the state.
Definition: ozstream.hh:430
double Real
Definition: types.hh:39
PoseOP pose_setup(std::string const &seq1, std::string const &seq2, Size const len1)
utility::vector1< Real > get_scores() const
DimensionExpressionPow pow(Dimension const &dim1, Dimension const &dim2)
pow( Dimension, Dimension )
#define NEW_OPT(akey, help, adef)
Real const spacing_
void vector2disk_in2d(std::string const &out_filename, Size const dim1, Size const dim2, utility::vector1< T > const &out_vector)
int main(int argc, char *argv[])
ocstream cout(std::cout)
Wrapper around std::cout.
Definition: ocstream.hh:287
ozstream & write(char const *str, std::streamsize const count)
Write a string.
Definition: ozstream.hh:285
BooleanOptionKey const exit("options:exit")
Definition: OptionKeys.hh:51
void vector2disk_in1d(std::string const &out_filename, utility::vector1< T > const &out_vector)
ozstream: Output file stream wrapper for uncompressed and compressed files
Definition: ozstream.hh:53
void update_scores(utility::vector1< float > &scores, Pose &pose, scoring::ScoreFunctionOP const scorefxn)
void init()
set global 'init_was_called' to true
Definition: init.cc:26
Program options global and initialization function.
void MC_run()
static T max(T x, T y)
Definition: Svm.cc:19
void add(float const value, Size const n_items)
platform::Size Size
Definition: random.fwd.hh:30
void * my_main(void *)
rule< Scanner, option_closure::context_t > option
Definition: Tag.cc:378