Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
cluster.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file Mike Tyka
11 /// @brief
12 
13 
14 // libRosetta headers
15 #include <protocols/simple_moves/ScoreMover.hh>
16 #include <core/scoring/ScoreFunction.hh>
17 #include <core/scoring/symmetry/SymmetricScoreFunction.hh>
18 #include <core/scoring/ScoreFunctionFactory.hh>
19 
20 #include <core/chemical/ChemicalManager.hh>
21 
22 #include <protocols/cluster/cluster.hh>
23 #include <protocols/loops/Loops.hh>
24 #include <core/import_pose/pose_stream/MetaPoseInputStream.hh>
25 #include <core/import_pose/pose_stream/util.hh>
26 
27 #include <basic/options/option.hh>
28 
29 #include <devel/init.hh>
30 
31 // C++ headers
32 #include <iostream>
33 #include <string>
34 #include <deque>
35 
36 // option key includes
37 
38 #include <basic/options/keys/in.OptionKeys.gen.hh>
39 #include <basic/options/keys/out.OptionKeys.gen.hh>
40 #include <basic/options/keys/cluster.OptionKeys.gen.hh>
41 #include <basic/options/keys/symmetry.OptionKeys.gen.hh>
42 
43 #include <utility/vector1.hh>
45 
46 
47 #if defined(WIN32) || defined(__CYGWIN__)
48 #include <ctime>
49 #endif
50 
51 
52 using namespace core;
53 using namespace ObjexxFCL;
54 using namespace core::pose;
55 using namespace protocols;
56 using namespace basic::options;
57 
58 int
59 main( int argc, char * argv [] ) {
60  try {
61 
62  using namespace protocols;
63  using namespace protocols::moves;
64  using namespace core::scoring;
65  using namespace basic::options;
66  using namespace basic::options::OptionKeys;
67  using namespace utility::file;
68  using namespace protocols::cluster;
69  using namespace basic::options::OptionKeys::cluster;
70 
71  option.add_relevant( OptionKeys::cluster::input_score_filter );
72  option.add_relevant( OptionKeys::cluster::output_score_filter );
73  option.add_relevant( OptionKeys::cluster::exclude_res );
74  option.add_relevant( OptionKeys::cluster::thinout_factor );
75  option.add_relevant( OptionKeys::cluster::radius );
76  option.add_relevant( OptionKeys::cluster::limit_cluster_size );
77  option.add_relevant( OptionKeys::cluster::limit_cluster_size_percent );
78  option.add_relevant( OptionKeys::cluster::random_limit_cluster_size_percent );
79  option.add_relevant( OptionKeys::cluster::limit_clusters );
80  option.add_relevant( OptionKeys::cluster::limit_total_structures );
81  option.add_relevant( OptionKeys::cluster::sort_groups_by_energy );
82  option.add_relevant( OptionKeys::cluster::remove_highest_energy_member );
83  option.add_relevant( OptionKeys::cluster::limit_dist_matrix );
84  option.add_relevant( OptionKeys::cluster::make_ensemble_cst );
86 
87  // initialize core
88  devel::init(argc, argv);
89 
90  std::cout << std::endl;
91  std::cout << std::endl;
92  std::cout << std::endl;
93  std::cout << " Rosetta Tool: cluster - clustering tool for PDBs and or silent files " << std::endl;
94  std::cout << " Usage: " << std::endl;
95  std::cout << " PDB input: -in:file:s *.pdb or " << std::endl;
96  std::cout << " -in:file:l list_of_pdbs " << std::endl;
97  std::cout << " -no_optH Dont change positions of Hydrogen atoms! " << std::endl;
98  std::cout << " Silent input: -in:file:silent silent.out silent input filesname " << std::endl;
99  std::cout << " -in:file:s specify specific tags to be extracted, if left out all will be taken " << std::endl;
100  std::cout << " -in:file:fullatom for full atom structures " << std::endl;
101  std::cout << " -in:file:silent_struct_type <type> specify the input silent-file format " << std::endl;
102  std::cout << " Native: -in:file:native native PDB if CaRMS is required " << std::endl;
103  std::cout << " Scorefunction: -score:weights weights weight set or weights file " << std::endl;
104  std::cout << " -score:patch patch patch set " << std::endl;
105  std::cout << " -rescore:verbose display score breakdown " << std::endl;
106  std::cout << " -rescore:output_only don't rescore " << std::endl;
107  std::cout << " Output: -nooutput don't print PDB structures " << std::endl;
108  std::cout << " -out:prefix myprefix prefix the output structures with a string " << std::endl;
109  std::cout << " Clustering: -cluster:radius <float> Cluster radius in A (for RMS clustering) or in inverse GDT_TS for GDT clustering. Use \"-1\" to trigger automatic radius detection" << std::endl;
110  std::cout << " -cluster:gdtmm Cluster by gdtmm instead of rms" << std::endl;
111  std::cout << " -cluster:input_score_filter <float> Ignore structures above certain energy " << std::endl;
112  std::cout << " -cluster:exclude_res <int> [<int> <int> ..] Exclude residue numbers " << std::endl;
113  std::cout << " -cluster:radius <float> Cluster radius" << std::endl;
114  std::cout << " -cluster:limit_cluster_size <int> Maximal cluster size" << std::endl;
115  std::cout << " -cluster:limit_cluster_size_percent <float> Maximal cluster size by percentage" << std::endl;
116  std::cout << " -cluster:random_limit_cluster_size_percent <float> Maximal cluster size by percentage, cut randomly" << std::endl;
117  std::cout << " -cluster:limit_clusters <int> Maximal number of clusters" << std::endl;
118  std::cout << " -cluster:limit_total_structures <int> Maximal number of structures in total" << std::endl;
119  std::cout << " -cluster:sort_groups_by_energy Sort clusters by energy." << std::endl;
120  std::cout << " -cluster:remove_highest_energy_member Remove highest energy member of each cluster" << std::endl;
121  std::cout << " -symmetry:symmetric_rmsd \t\t\t\t\t\t For symmetric systems find the lowest rms by testing all chain combinations. Works only with silent file input that contain symmetry info and with all CA rmsd" << std::endl;
122  std::cout << " Examples: " << std::endl;
123  std::cout << " cluster -database ~/minirosetta_database -in:file:silent silent.out -in::file::binary_silentfile -in::file::fullatom -native 1a19.pdb " << std::endl;
124  std::cout << "clustered Poses are given output names in the form of:" << std::endl;
125  std::cout << " c.i.j, which denotes the jth member of the ith cluster." << std::endl;
126  std::cout << std::endl;
127  std::cout << std::endl;
128  std::cout << std::endl;
129 
130  if ( !option[ out::output ].user() ) {
131  option[ out::nooutput ].value( true );
132  }
133 
134  int time_start = time(NULL);
135 
136  MoverOP mover;
137  core::scoring::ScoreFunctionOP sfxn;
138  sfxn = core::scoring::get_score_function();
139  if ( option[ basic::options::OptionKeys::symmetry::symmetric_rmsd ]() ) {
140  core::scoring::ScoreFunctionOP sfxn_sym =
141  core::scoring::symmetry::symmetrize_scorefunction( *sfxn );
142  sfxn = sfxn_sym;
143  }
144 
145  ClusterPhilStyleOP clustering;
146 
148  loops::Loops loops( true );
149  clustering = ClusterPhilStyleOP( new ClusterPhilStyle_Loop(loops ) );
150  } else {
151  clustering = ClusterPhilStyleOP( new ClusterPhilStyle() );
152  }
153 
154  clustering->set_score_function( sfxn );
155  if ( option[ basic::options::OptionKeys::cluster::input_score_filter ].user() ) {
156  clustering->set_filter( option[ basic::options::OptionKeys::cluster::input_score_filter ] );
157  }
158  clustering->set_cluster_radius(
159  option[ basic::options::OptionKeys::cluster::radius ]()
160  );
161  clustering->set_population_weight(
162  option[ basic::options::OptionKeys::cluster::population_weight ]()
163  );
164 
165 
166  // Figure out the thinout factor,
167  //mjo commenting out 'thinout_factor' because it is unused and causes a warning
168  //core::Real thinout_factor = 0.0;
169 
170  /// std::cerr << "STAARTING to cluster" << std::endl;
171  /// if( option[ basic::options::OptionKeys::cluster::thinout_factor ].user() ){
172  /// thinout_factor = option[ basic::options::OptionKeys::cluster::thinout_factor ]();
173  /// } else {
174  /// // autodetermine a good thinout factor
175  /// core::Size max_O2_clustering = 400;
176  /// core::Size expected_n_decoys = n_input_structures();
177  /// // figure out the number of decoys:
178  /// thinout_factor = 1.0 - core::Real( max_O2_clustering ) / core::Real( expected_n_decoys );
179  /// if( thinout_factor < 0 ) thinout_factor = 0.0; // few decoys, no thinout required
180  /// std::cout << "Estimated thinout factor to seed with " << max_O2_clustering << " structures: " << thinout_factor << " ( " << expected_n_decoys << " ) " << std::endl;
181  /// }
182 
183  core::chemical::ResidueTypeSetCOP rsd_set;
184  if ( option[ in::file::fullatom ]() ) {
185  rsd_set = core::chemical::ChemicalManager::get_instance()->residue_type_set( "fa_standard" );
186  } else {
187  rsd_set = core::chemical::ChemicalManager::get_instance()->residue_type_set( "centroid" );
188  }
189 
190 
191  // Cluster the first up-to-400 structures by calculating a full rms matrix
192  core::import_pose::pose_stream::MetaPoseInputStream input = core::import_pose::pose_stream::streams_from_cmd_line();
193  while ( input.has_another_pose() && (clustering->nposes() < 400 ) ) {
195  input.fill_pose( pose, *rsd_set );
196  clustering->apply( pose );
197  }
198 
199  mover = clustering;
200 
201  int time_readin = time(NULL);
202  clustering->do_clustering( option[ OptionKeys::cluster::max_total_cluster ]() );
203  int time_initialc = time(NULL);
204  clustering->do_redistribution();
205 
206  // Process any remaining structures by asigning to clusters or forming new clusters
207  std::cout << "Assigning extra structures ... " << std::endl;
208  AssignToClustersMoverOP mover_add_structures( new AssignToClustersMover( clustering ) );
209  mover_add_structures->set_score_function( sfxn );
210  //mover_add_structures->set_cluster_radius( clustering.get_cluster_radius() );
211 
212  while ( input.has_another_pose() ) {
214  input.fill_pose( pose, *rsd_set );
215  mover_add_structures->apply( pose );
216  }
217 
218 
219  clustering->print_summary();
220 
221  // Post processing
222  int time_total = time(NULL);
223  // clustering->sort_each_group_by_energy();
224  if ( option[ sort_groups_by_energy ] ) {
225  clustering->sort_groups_by_energy();
226  }
227  if ( option[ remove_singletons ] ) {
228  clustering->remove_singletons();
229  }
230  if ( option[ limit_cluster_size ].user() ) {
231  clustering->limit_groupsize( option[ limit_cluster_size ] );
232  }
233  if ( option[ limit_cluster_size_percent ].user() ) {
234  clustering->limit_groupsize( option[ limit_cluster_size_percent ] );
235  }
236  if ( option[ random_limit_cluster_size_percent ].user() ) {
237  clustering->random_limit_groupsize( option[ random_limit_cluster_size_percent ] );
238  }
239  if ( option[ limit_clusters ].user() ) {
240  clustering->limit_groups( option[ limit_clusters ] );
241  }
242  if ( option[ limit_total_structures ].user() ) {
243  clustering->limit_total_structures( option[ limit_total_structures] );
244  }
245  if ( option[ remove_highest_energy_member ] ) {
246  clustering->remove_highest_energy_member_of_each_group();
247  }
248 
249  if ( option[ export_only_low ] ) {
250  clustering->sort_each_group_by_energy();
251  clustering->sort_groups_by_energy( );
252  clustering->export_only_low( option[ export_only_low ]() );
253  }
254  // --------------------------------------------------------------------
255  // Results:
256  clustering->print_summary();
258  clustering->print_clusters_silentfile( option[ out::prefix ]() );
259  } else {
260  clustering->print_cluster_PDBs( option[ out::prefix ]() );
261  }
262 
263  if ( option[ basic::options::OptionKeys::cluster::make_ensemble_cst]() ) {
264  EnsembleConstraints_Simple cec( 1.0 );
265  clustering->create_constraints( option[ out::prefix ](), cec );
266  }
267 
268  clustering->print_cluster_assignment();
269  std::vector < Cluster > const & clusterlist=clustering->get_cluster_list();
270  std::list < int > sorted_list;
271  for ( int i=0; i<(int)clusterlist.size(); i++ ) {
272  for ( int j=0; j<(int)clusterlist[i].size(); j++ ) {
273  sorted_list.push_back( clusterlist[i][j] );
274  }
275  }
276 
277  sorted_list.sort();
278 
279  std::cout << "Timing: " << std::endl;
280  std::cout << " Readin:" << time_readin - time_start
281  << "s\n Cluster: " << time_initialc - time_readin
282  << "s\n Additional Clustering: " << time_total - time_initialc
283  << "s\n Total: " << time_total - time_start
284  << std::endl;
285 
286  } catch ( utility::excn::EXCN_Base const & e ) {
287  std::cout << "caught exception " << e.msg() << std::endl;
288  return -1;
289  }
290  return 0;
291 }
292 
virtual std::string const msg() const
Definition: EXCN_Base.hh:70
void register_options()
dictionary size
Definition: amino_acids.py:44
void init(int argc, char *argv[])
Command line init() version.
Definition: init.cc:23
BooleanOptionKey const user("options:user")
Definition: OptionKeys.hh:40
core::pose::Pose Pose
Definition: supercharge.cc:101
string output
Definition: contacts.py:31
common derived classes for thrown exceptions
int main(int argc, char *argv[])
Definition: cluster.cc:59
rule< Scanner, options_closure::context_t > options
Definition: Tag.cc:377
ocstream cout(std::cout)
Wrapper around std::cout.
Definition: ocstream.hh:287
vector1: std::vector with 1-based indexing
Program options global and initialization function.
int const silent
Named verbosity levels.
rule< Scanner, option_closure::context_t > option
Definition: Tag.cc:378