Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
sequence_tolerance.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file sequence_tolerance.cc
11 /// @brief App for predicting the sequence tolerance of a structure or set of structures
12 
13 #include <protocols/genetic_algorithm/GeneticAlgorithm.hh>
14 #include <protocols/multistate_design/MultiStatePacker.hh>
15 #include <protocols/multistate_design/PackingState.hh>
16 #include <protocols/multistate_design/MetricCalculatorFitnessFunction.hh>
17 #include <protocols/multistate_design/MultiStateEntity.hh>
18 #include <protocols/toolbox/pose_metric_calculators/DecomposeAndReweightEnergiesCalculator.hh>
19 #include <protocols/toolbox/pose_metric_calculators/ResidueDecompositionByChainCalculator.hh>
20 #include <core/pose/metrics/simple_calculators/SasaCalculatorLegacy.hh>
21 #include <protocols/toolbox/pose_metric_calculators/NumberHBondsCalculator.hh>
22 #include <protocols/toolbox/pose_metric_calculators/BuriedUnsatisfiedPolarsCalculator.hh>
23 #include <protocols/toolbox/pose_metric_calculators/SurfaceCalculator.hh>
24 #include <protocols/toolbox/pose_metric_calculators/MetricValueGetter.hh>
25 using namespace protocols::genetic_algorithm;
26 using namespace protocols::multistate_design;
27 
28 //#include <protocols/dna/DnaChains.hh>
29 #include <protocols/dna/PDBOutput.hh>
30 //#include <protocols/dna/RestrictDesignToProteinDNAInterface.hh>
31 //#include <protocols/dna/RotamerDNAHBondFilter.hh>
32 #include <protocols/dna/util.hh> // add_constraints_from_file
33 //using namespace protocols::dna;
34 
35 #include <protocols/viewer/viewers.hh>
36 
37 #include <devel/init.hh>
38 #include <core/types.hh>
39 #include <core/chemical/ResidueType.hh>
40 #include <basic/options/option.hh>
41 #include <core/pack/task/PackerTask.hh>
42 #include <core/pack/task/TaskFactory.hh>
43 #include <core/pack/task/operation/TaskOperations.hh>
44 #include <core/pose/Pose.hh>
45 #include <core/pose/metrics/CalculatorFactory.hh>
46 #include <core/scoring/ScoreFunction.hh>
47 #include <core/scoring/ScoreFunctionFactory.hh>
48 //#include <core/scoring/dna/setup.hh>
49 
50 #include <utility/file/FileName.hh>
51 #include <utility/vector1.hh>
52 
53 #include <basic/prof.hh>
54 #include <basic/Tracer.hh>
55 using basic::t_info;
56 using basic::t_debug;
57 using basic::t_trace;
58 static THREAD_LOCAL basic::Tracer TR( "app.sequence_tolerance" );
59 
60 #include <ObjexxFCL/format.hh>
61 #include <ObjexxFCL/string.functions.hh> // lead_zero_string_of
62 
63 #include <iostream>
64 #include <string>
65 #include <list> // PackerTask allowed_residue_types
66 
67 // option key includes
68 #include <basic/options/keys/out.OptionKeys.gen.hh>
69 #include <basic/options/keys/in.OptionKeys.gen.hh>
70 #include <basic/options/keys/ms.OptionKeys.gen.hh>
71 #include <basic/options/keys/packing.OptionKeys.gen.hh>
73 
74 using namespace core;
75 using namespace pose::metrics;
76 using namespace conformation;
77 using namespace chemical;
78 using namespace pack;
79 using namespace task;
80 using namespace operation;
81 using namespace scoring;
82 using namespace ObjexxFCL;
83 using namespace ObjexxFCL::format;
84 using namespace protocols::toolbox::pose_metric_calculators;
85 
87 
88 #include <core/import_pose/import_pose.hh>
89 #include <utility/vector0.hh>
90 #include <basic/MetricValue.hh>
91 
92 
93 OPT_1GRP_KEY(RealVector, seq_tol, fitness_master_weights)
94 OPT_1GRP_KEY(Boolean, seq_tol, unsat_polars)
95 OPT_1GRP_KEY(Boolean, seq_tol, surface)
96 
97 void *
98 sequence_tolerance_main( void * );
99 
100 int
101 main( int argc, char * argv[] )
102 {
103  try {
104  NEW_OPT(seq_tol::fitness_master_weights, "fitness function master weights", utility::vector1<core::Real>());
105  NEW_OPT(seq_tol::unsat_polars, "calculate the number of buried unsatisfied polar atoms", false);
106  NEW_OPT(seq_tol::surface, "calculate the the surface score", false);
107  devel::init( argc, argv );
108  protocols::viewer::viewer_main( sequence_tolerance_main );
109  } catch ( utility::excn::EXCN_Base const & e ) {
110  std::cout << "caught exception " << e.msg() << std::endl;
111  return -1;
112  }
113  return 0;
114 }
115 
116 // for sorting std::pairs by the second element
117 template <typename A, typename B>
118 bool cmpscnd( std::pair<A,B> const & a, std::pair<A,B> const & b )
119 {
120  return a.second < b.second;
121 }
122 
123 void *
125 {
126  using namespace basic::options;
127  using namespace basic::options::OptionKeys;
128 
130 
131  // get filenames
133  Filenames pdbnames;
134 
135  if ( !option[ in::file::s ].user() ) return 0;
137  // load pdb
138  pose::PoseOP pose( new pose::Pose );
139  core::import_pose::pose_from_pdb( *pose, pdbnames.front() );
140  protocols::dna::add_constraints_from_file( *pose ); // (if specified by options)
141 
142  // set up genetic algorithm
143  GeneticAlgorithm ga;
144  ga.set_max_generations( option[ ms::generations ] );
145  ga.set_max_pop_size( option[ ms::pop_size ]() );
146  ga.set_num_to_propagate( 1 );
147  ga.set_frac_by_recomb( option[ ms::fraction_by_recombination ]() );
148 
149  // create an entity template object from which all entities will be cloned
150  Entity::OP entity_template( new protocols::multistate_design::MultiStateEntity );
151  ga.set_entity_template(entity_template);
152 
153  // enable checkpointing and load if the checkpoint files exist
154  ga.set_checkpoint_prefix( option[ ms::checkpoint::prefix ]() );
155  ga.set_checkpoint_write_interval( option[ ms::checkpoint::interval ]() );
156  ga.set_checkpoint_gzip( option[ ms::checkpoint::gz ]() );
157  ga.set_checkpoint_rename( option[ ms::checkpoint::rename ]() );
158  ga.read_checkpoint();
159 
160  // set up the EntityRandomizer
161  PositionSpecificRandomizer::OP rand( new PositionSpecificRandomizer );
162  rand->set_mutation_rate( option[ ms::mutate_rate ]() );
163  rand->set_entity_template(entity_template);
164 
165  TaskFactoryOP taskfactory( new TaskFactory );
166  taskfactory->push_back( TaskOperationCOP( new operation::InitializeFromCommandline ) );
167  if ( option[ packing::resfile ].user() ) {
168  taskfactory->push_back( TaskOperationCOP( new operation::ReadResfile ) );
169  }
170  PackerTaskOP ptask = taskfactory->create_task_and_apply_taskoperations( *pose );
171 
172  // figure out design positions/choices from PackerTask
173  utility::vector1< Size > design_positions;
174  for ( Size i(1), end( ptask->total_residue() ); i <= end; ++i ) {
175  // ignore DNA positions
176  if ( !pose->residue_type(i).is_protein() ) continue;
177  ResidueLevelTask const & rtask( ptask->residue_task(i) );
178  if ( rtask.being_designed() ) {
179  design_positions.push_back(i);
180  // will be passed to randomizer
181  EntityElements choices;
182  // to avoid duplicate AA's (such as for multiple histidine ResidueTypes)
183  std::set< core::chemical::AA > aaset;
184  std::list< ResidueTypeCOP > const & allowed( rtask.allowed_residue_types() );
185  for ( std::list< ResidueTypeCOP >::const_iterator t( allowed.begin() ), end( allowed.end() );
186  t != end; ++t ) {
187  core::chemical::AA aa( (*t)->aa() );
188  // avoid duplicate AA's (such as for multiple histidine ResidueTypes)
189  if ( aaset.find( aa ) != aaset.end() ) continue;
190  aaset.insert(aa);
191  TR(t_debug) << "adding choice " << aa << std::endl;
192  choices.push_back( protocols::genetic_algorithm::EntityElementOP( new PosType( i, aa ) ) );
193  }
194  rand->append_choices( choices );
195  }
196  }
197 
198  // done setting up the EntityRandomizer
199  ga.set_rand( rand );
200 
201  // set up the FitnessFunction
202  MultiStatePackerOP func( new MultiStatePacker(option[ ms::num_packs ]()) );
203  func->set_aggregate_function(MultiStateAggregateFunction::COP( MultiStateAggregateFunction::OP( new MultiStateAggregateFunction() ) ));
204  ScoreFunctionOP score_function( core::scoring::get_score_function() );
205  func->set_scorefxn( score_function );
206 
207  // set up the PoseMetricCalculators to be used by the SingleStateFitnessFunction
208  core::pose::metrics::CalculatorFactory & calculator_factory(core::pose::metrics::CalculatorFactory::Instance());
209 
210  ResidueDecompositionByChainCalculatorOP res_decomp_calculator( new ResidueDecompositionByChainCalculator() );
211  calculator_factory.register_calculator("residue_decomposition", res_decomp_calculator);
212 
213  DecomposeAndReweightEnergiesCalculatorOP decomp_reweight_calculator( new DecomposeAndReweightEnergiesCalculator("residue_decomposition") );
214  if ( option[ seq_tol::fitness_master_weights ].user() ) {
215  decomp_reweight_calculator->master_weight_vector(option[ seq_tol::fitness_master_weights ]());
216  }
217  calculator_factory.register_calculator("fitness", decomp_reweight_calculator);
218 
219  // done setting up the PoseMetricCalculators
220  protocols::multistate_design::MetricCalculatorFitnessFunctionOP metric_fitness_function( new protocols::multistate_design::MetricCalculatorFitnessFunction("fitness", "weighted_total") );
221 
222  // evaluate and output the fitness of the input pose
223  func->scorefxn()->score(*pose);
224  TR(t_info) << "Residue Decomposition: " << pose->print_metric("residue_decomposition", "residue_decomposition") << std::endl;
225  TR(t_info) << "Fitness Function Master Weights: " << pose->print_metric("fitness", "master_weight_vector") << std::endl;
226  TR(t_info) << "Fitness of Starting Structure: " << '\n' << pose->print_metric("fitness", "summary") << std::endl;
227 
228  // tell the FitnessFunction to add the vector of components without master weighting to MultiStateEntity objects
229  func->add_metric_value_getter(
230  "fitness_comp",
231  MetricValueGetter("fitness", "weighted_total_no_master_vector", basic::MetricValueBaseCOP( basic::MetricValueBaseOP( new basic::MetricValue<utility::vector1<Real> > ) ))
232  );
233 
234  if ( option[ seq_tol::unsat_polars ] ) {
235  // tell the FitnessFunction to add buried unsatisfied polars to MultiStateEntity objects
236  calculator_factory.register_calculator("sasa", PoseMetricCalculatorOP( new core::pose::metrics::simple_calculators::SasaCalculatorLegacy() ));
237  calculator_factory.register_calculator("num_hbonds", PoseMetricCalculatorOP( new NumberHBondsCalculator() ));
238  calculator_factory.register_calculator("unsat_polars", PoseMetricCalculatorOP( new BuriedUnsatisfiedPolarsCalculator("sasa", "num_hbonds") ));
239  func->add_metric_value_getter(
240  "unsat_polars",
241  MetricValueGetter("unsat_polars", "all_bur_unsat_polars", basic::MetricValueBaseCOP( basic::MetricValueBaseOP( new basic::MetricValue<Size> ) ))
242  );
243  }
244 
245  if ( option[ seq_tol::surface ] ) {
246  // tell the FitnessFunction to add the surface score to MultiStateEntity objects
247  calculator_factory.register_calculator("surface", PoseMetricCalculatorOP( new SurfaceCalculator() ));
248  func->add_metric_value_getter(
249  "surface",
250  MetricValueGetter("surface", "total_surface", basic::MetricValueBaseCOP( basic::MetricValueBaseOP( new basic::MetricValue<Real> ) ))
251  );
252  }
253 
254  // add target state to the FitnessFunction
255  PackingStateOP target_state( new PackingState( *pose, true ) );
256  target_state->fitness_function( metric_fitness_function );
257  target_state->create_packer_data( func->scorefxn(), ptask );
258  func->add_state( target_state );
259 
260  TR(t_info) << "There are " << func->num_positive_states() << " positive states and "
261  << func->num_negative_states() << " negative states" << std::endl;
262 
263  // do single-state designs to find best theoretical single-state energy
264  func->single_state_design();
265 
266  // done setting up the FitnessFunction
267  ga.set_func( func );
268 
269  // start the genetic algorithm from scratch if not resuming from a checkpoint
270  if ( ga.population(ga.current_generation()).size() == 0 ) {
271  // add single-state design sequence(s) to genetic algorithm starting population and parents
272  SingleStateCOPs states( func->positive_states() );
273  TR(t_info) << "Adding single-state design entities:" << std::endl;
274  for ( SingleStateCOPs::const_iterator s( states.begin() ), end( states.end() ); s != end; ++s ) {
275  EntityElements traits;
276  for ( utility::vector1<Size>::const_iterator i( design_positions.begin() ),
277  end( design_positions.end() ); i != end; ++i ) {
278  PosType pt( *i, (*s)->pose().residue_type(*i).aa() );
279  traits.push_back( protocols::genetic_algorithm::EntityElementOP( new PosType( pt ) ) );
280  TR(t_info) << pt.to_string() << " ";
281  }
282  ga.add_entity( traits );
283  ga.add_parent_entity( traits );
284  TR(t_info) << std::endl;
285  }
286 
287  // make more entities by mutation of single-state seeds
288  ga.fill_by_mutation( option[ ms::pop_from_ss ]() );
289  // the rest are fully random
290  ga.fill_with_random_entities();
291  // clear parents for the next generation
292  ga.clear_parents();
293  }
294 
295  // loop over generations
296  while ( !ga.complete() ) {
297  if ( ga.current_generation_complete() ) ga.evolve_next_generation();
298  TR(t_info) << "Generation " << ga.current_generation() << ":" << std::endl;
299  ga.evaluate_fitnesses();
300  ga.print_generation_statistics(TR(t_info), ga.current_generation());
301  }
302 
303  // output resulting solution(s)
304  protocols::dna::PDBOutputOP pdboutput( new protocols::dna::PDBOutput );
305  pdboutput->reference_pose( *pose );
306  pdboutput->score_function( *score_function );
307 
308  std::string prefix("result");
309  if ( option[ OptionKeys::out::prefix ].user() ) prefix = option[ out::prefix ]();
310 
311  // sort local copy of sequence/fitness cache
312  typedef GeneticAlgorithm::TraitEntityHashMap TraitEntityHashMap;
313  TraitEntityHashMap const & cache( ga.entity_cache() );
315  // std::copy( cache.begin(), cache.end(), sortable.begin() ); // FAIL(?)
316  for ( TraitEntityHashMap::const_iterator it( cache.begin() ), end( cache.end() ); it != end; ++it ) {
317  sortable.push_back( it->second );
318  }
319  std::sort( sortable.begin(), sortable.end(), lt_OP_deref< Entity > );
320 
321  TR(t_info) << "Evaluated " << sortable.size() << " sequences out of " << rand->library_size()
322  << " possible.\nBest sequences:\n";
323  // list and output top solutions
324  int counter(1);
325  for ( utility::vector1<Entity::OP>::const_iterator it( sortable.begin() ), end( sortable.end() );
326  it != end && counter <= option[ ms::numresults ](); ++it, ++counter ) {
327  protocols::genetic_algorithm::Entity & entity(**it);
328  // apply sequence to existing positive state(s)
329  func->evaluate_positive_states( entity );
330  // copy pose
331  pose::Pose solution_pose = func->positive_states().front()->pose();
332  // output pdb with information
333  std::string traitstring;
334  for ( core::Size traitnum = 1; traitnum <= entity.traits().size(); ++traitnum ) {
335  PosTypeCOP postype = utility::pointer::dynamic_pointer_cast< protocols::multistate_design::PosType const > ( entity.traits()[traitnum] );
336  if ( ! postype ) {
337  utility_exit_with_message( "PosType dynamic cast failed for entity element with name " + entity.traits()[traitnum]->name() );
338  }
339  traitstring += core::chemical::oneletter_code_from_aa( postype->type() );
340  }
341  std::string pdbname( prefix + "_" + lead_zero_string_of(counter,4) + "_" + traitstring + ".pdb" );
342  std::list< std::string > extra_lines;
343  std::ostringstream ms_info;
344  ms_info << "MultiState Fitness: " << F(5,4,entity.fitness());
345  extra_lines.push_back( ms_info.str() );
346  ms_info.str(""); // funky way to 'empty' ostringstream
347  ms_info << "MultiState Fitness Offset: " << F(5,4,entity.fitness() - (*(sortable.begin()))->fitness());
348  extra_lines.push_back( ms_info.str() );
349  ms_info.str(""); // funky way to 'empty' ostringstream
350  ms_info << "MultiState Fitness Offset: " << F(5,4,entity.fitness() - (*(sortable.begin()))->fitness());
351  extra_lines.push_back( ms_info.str() );
352  ms_info.str(""); // funky way to 'empty' ostringstream
353  ms_info << "MultiState Sequence:";
355  pos( entity.traits().begin() ), end( entity.traits().end() );
356  pos != end; ++pos ) {
357  ms_info << " " << (*pos)->to_string();
358  TR(t_info) << (*pos)->to_string() << " ";
359  }
360  TR(t_info) << "fitness " << F(5,4,entity.fitness()) << '\n';
361  extra_lines.push_back( ms_info.str() );
362  pdboutput->add_info( "multistate_design", extra_lines, false );
363  (*pdboutput)( solution_pose, pdbname );
364  }
365  TR(t_info) << std::flush;
366 
368  return 0;
369 }
#define utility_exit_with_message(m)
Exit with file + line + message.
Definition: exit.hh:47
def vector
Definition: Equations.py:5
#define THREAD_LOCAL
virtual std::string const msg() const
Definition: EXCN_Base.hh:70
std::string lead_zero_string_of(T const &t, int const w)
Leading-Zero Right-Justified string of a Template Argument Type Supporting Stream Output...
utility::keys::KeyLookup< KeyType >::const_iterator const_iterator
Key collection iterators.
dictionary size
Definition: amino_acids.py:44
vector0: std::vector with assert-checked bounds
void init(int argc, char *argv[])
Command line init() version.
Definition: init.cc:23
BooleanOptionKey const user("options:user")
Definition: OptionKeys.hh:40
list surface
Definition: PDBInfo.py:43
core::pose::Pose Pose
Definition: supercharge.cc:101
utility::keys::lookup::end< KeyType > const end
static THREAD_LOCAL basic::Tracer TR("app.sequence_tolerance")
int main(int argc, char *argv[])
common derived classes for thrown exceptions
void * sequence_tolerance_main(void *)
Tracer IO system.
File name class supporting Windows and UN*X/Linux format names.
rule< Scanner, options_closure::context_t > options
Definition: Tag.cc:377
super::const_iterator const_iterator
Definition: vector1.hh:62
utility::pointer::shared_ptr< MetricValueBase const > MetricValueBaseCOP
void prof_reset()
Definition: prof.cc:382
#define NEW_OPT(akey, help, adef)
std::string F(int const w, int const d, float const &t)
Fixed Point Format: float.
Definition: format.cc:387
tuple pack
Definition: loops.py:50
ocstream cout(std::cout)
Wrapper around std::cout.
Definition: ocstream.hh:287
vector1: std::vector with 1-based indexing
Class for handling user debug/warnings/errors. Use instance of this class instead of 'std::cout' for ...
Definition: Tracer.hh:134
utility::pointer::shared_ptr< MetricValueBase > MetricValueBaseOP
#define OPT_1GRP_KEY(type, grp, key)
bool cmpscnd(std::pair< A, B > const &a, std::pair< A, B > const &b)
Program options global and initialization function.
void prof_show()
Definition: prof.cc:326
platform::Size Size
Definition: random.fwd.hh:30
rule< Scanner, option_closure::context_t > option
Definition: Tag.cc:378