19 #include <protocols/sewing/hashing/Hasher.hh>
20 #include <protocols/sewing/conformation/Model.hh>
21 #include <protocols/sewing/util/io.hh>
24 #include <core/types.hh>
25 #include <core/id/AtomID.hh>
26 #include <core/pose/Pose.hh>
27 #include <core/import_pose/import_pose.hh>
38 #include <basic/options/keys/sewing.OptionKeys.gen.hh>
39 #include <basic/options/keys/in.OptionKeys.gen.hh>
40 #include <basic/options/keys/inout.OptionKeys.gen.hh>
52 main(
int argc,
char * argv [] ) {
54 using namespace basic::options::OptionKeys;
55 using namespace protocols::sewing;
62 std::stringstream err;
63 err <<
"You must provide a mode for sewing_hasher to run in using the -sewing:mode flag. Valid options are" << std::endl;
64 err <<
" -generate: generates a model file from an sqlite database" << std::endl;
65 err <<
" -generate_five_ss_model: generates a 3~5 ss model file from an sqlite database" << std::endl;
66 err <<
" -hash: score all models against each other and create a plain text score file (MPI required)" << std::endl;
67 err <<
" -convert: convert a plain text score file to a binary score file. This is required by the SEWING movers" << std::endl;
72 std::map< int, Model > models;
73 std::string model_filename =
option[sewing::model_file_name];
74 if ( !
option[sewing::model_file_name].
user() ) {
75 std::stringstream err;
76 err <<
"You must provide a model file name to the sewing_hasher using the -model_file_name flag. To generate a new model file use the "
77 <<
"-generate_models_from_db flag and a new model file with that name will be written. Otherwise, the model file will be read";
87 std::stringstream comments;
96 bool hash_tag_only_terminal_Es =
option[sewing::hash_tag_only_terminal_Es].def(
false);
97 TR <<
"hash_tag_only_terminal_Es: " << hash_tag_only_terminal_Es << std::endl;
98 std::string hash_between;
103 comments <<
"#Models generated from PDB input (-l flag)" << std::endl;
106 for (
core::Size i = 1; i <= input_lists.size(); i++ ) {
108 if ( !current_input_list.
good() ) {
111 while ( current_input_list.
good() ) {
113 current_input_list.
getline(name);
118 for (
core::Size i=1; i<=pdb_library.size(); ++i ) {
120 core::import_pose::pose_from_pdb(pose, pdb_library[i]);
122 segments.push_back(std::make_pair(1, pose.total_residue()));
123 Model pdb_model = create_model_from_pose(pose, segments, (
int)i);
124 models.insert(std::make_pair(i, pdb_model));
130 if ( hash_tag_only_terminal_Es ) {
131 hash_between =
"hash_tag_only_terminal_Es";
132 comments <<
"#Only terminal Es are hash bool true to be merged with other nodes later (but as of 2015/11/14, later model assembly has 'ERROR: alignment_scores.size() == 1'" << std::endl;
136 hash_between =
"hash_between_any_HEs";
137 comments <<
"# hash between any HEs are bool true to be merged with other nodes" << std::endl;
142 if ( !
option[ sewing::assembly_type ].
user() ) {
143 std::stringstream err;
144 err <<
"You must provide an assembly_type (continuous or discontinuous) with the -sewing:assembly_type flag in order to extract models";
147 if (
option[ sewing::assembly_type ].
value() ==
"discontinuous" ) {
148 comments <<
"#Discontinuous models generated from sqlite database " <<
option[inout::dbms::database_name].value() << std::endl;
149 models = get_discontinuous_models_from_db();
150 }
else if (
option[ sewing::assembly_type ].
value() ==
"continuous" ) {
151 comments <<
"#Continuous models generated from sqlite database " <<
option[inout::dbms::database_name].value() << std::endl;
152 models = get_continuous_models_from_db(hash_between);
156 write_model_file(comments.str(), models, model_filename);
157 TR <<
"New model file " << model_filename <<
" successfully written." << std::endl;
162 std::stringstream comments;
172 comments <<
"# 3 or 5 secondary structures based models generated from sqlite database " <<
option[inout::dbms::database_name].value() << std::endl;
174 bool hash_tag_only_terminal_Es =
option[sewing::hash_tag_only_terminal_Es].def(
false);
175 TR <<
"hash_tag_only_terminal_Es: " << hash_tag_only_terminal_Es << std::endl;
176 std::string hash_between;
177 std::string model_five_ss_filename;
178 if ( hash_tag_only_terminal_Es ) {
179 hash_between =
"hash_tag_only_terminal_Es";
180 comments <<
"# only_terminal_Es_are_hash_bool_true_to_be_merged_with_other_node " << std::endl;
181 model_five_ss_filename = model_filename +
"_three_or_five_ss_will_be_hashed_only_between_Es";
183 hash_between =
"hash_between_any_HEs";
184 comments <<
"# hash_between_any_HEs_are_bool_true_to_be_merged_with_other_node " << std::endl;
185 model_five_ss_filename = model_filename +
"_three_or_five_ss_will_be_hashed_between_any_HEs";
188 std::map< int, Model > models = get_5_ss_models_from_db(hash_between);
190 write_model_file(comments.str(), models, model_five_ss_filename);
191 TR <<
"New model file with 3~5 ss " << model_five_ss_filename <<
" successfully written." << std::endl;
197 models = read_model_file(model_filename);
202 std::string binary_filename =
option[sewing::score_file_name].value();
203 SewGraphOP graph(
new SewGraph(models, 1) );
204 graph->report_binary_stats(models, binary_filename);
211 if ( !
option[sewing::score_file_name].
user() ) {
212 std::stringstream err;
213 err <<
"You must provide a score file name to the sewing_hasher for binary conversion.";
220 if (
option[ sewing::assembly_type ].
value() ==
"discontinuous" ) {
221 graph = SewGraphOP(
new SewGraph(models, 2) );
222 }
else if (
option[ sewing::assembly_type ].
value() ==
"continuous" ) {
223 graph = SewGraphOP(
new SewGraph(models, 1) );
225 graph->generate_binary_score_file(
option[sewing::score_file_name].
value(), binary_filename);
234 if ( !
option[sewing::score_file_name].
user() ) {
237 std::string score_file_name =
option[sewing::score_file_name];
241 core::Size num_segments_to_match =
option[sewing::num_segments_to_match].def(0);
244 TR <<
"Bundle Hasher options:" << std::endl;
245 TR <<
"\tMinimum Score: " << min_score << std::endl;
246 TR <<
"\tMaximum Clash Score: " << max_clash_score << std::endl;
247 TR <<
"\tNumber of segments to match: " << num_segments_to_match << std::endl;
248 TR <<
"\tScore file name: " << score_file_name << std::endl;
249 TR <<
"\tNeighborhood lookup box_length: " << box_length << std::endl;
257 n_models = models.size();
260 int starting_model = 0;
261 if (
option[sewing::starting_model].
user() ) {
262 starting_model =
option[sewing::starting_model].value();
269 TR <<
"Master node has " << n_models <<
" jobs to submit" << std::endl;
276 std::advance(it_end, n_models);
278 if(num_procs > n_models) {
279 utility_exit_with_message(
"You have more processors than number of models to hash. Reduce your number of processors for efficiency");
283 for(; it != it_end; ++it) {
285 if(it->first >= starting_model) {
287 TR <<
"Master node sent a job for model " << it->first <<
" to processor " << curr_proc << std::endl;
289 if(curr_proc == num_procs) {
break; }
294 while( it != it_end ){
298 TR <<
"Master node received a message from processor " << received_node << std::endl;
300 TR <<
"Master node sent a new job for model " << it->first <<
" to processor " << received_node << std::endl;
306 while(counter != num_procs){
311 TR <<
"Master node finished sending jobs" << std::endl;
318 if(model_id == 0){
break; }
320 TR <<
"Processor " << rank <<
" received job for model number " << model_id << std::endl;
329 for(; it != it_end; ++it) {
330 hasher.insert(it->second);
338 ScoreResults group_scores = hasher.score(models[model_id], num_segments_to_match, min_score, max_clash_score,
false, box_length);
339 hasher.remove_connection_inconsistencies(models, group_scores);
341 write_hashing_scores_to_file(group_scores, node_score_file_name);
342 TR <<
"Processor " << rank <<
" done scoring. Found " << group_scores.size() <<
" valid comparisons" << std::endl;
367 TR <<
"Processor " << rank <<
" was told to stop working" << std::endl;
369 MPI_Barrier( MPI_COMM_WORLD );
376 for ( ; it1 != it_end; ++it1 ) {
378 for ( ; it2 != it1; ++it2 ) {
379 hasher.insert(it2->second);
382 scores = hasher.score(it1->second, num_segments_to_match, min_score, max_clash_score,
true, box_length);
383 hasher.remove_connection_inconsistencies(models, scores);
384 TR <<
"Done scoring " << it2->first <<
" found " << scores.size() <<
" valid comparisons" << std::endl;
385 if ( scores.size() > 0 &&
TR.
Debug ) {
386 BasisPair bp = scores.begin()->first;
387 std::map< SegmentPair, core::Size > segment_matches = scores.begin()->second.segment_match_counts;
388 std::map< SegmentPair, AtomMap > segment_matches2 = scores.begin()->second.segment_matches;
389 TR.
Debug <<
"After scoring." << std::endl;
390 TR.
Debug <<
"\tModels: " << bp.first.model_id <<
" " << bp.second.model_id << std::endl;
391 TR.
Debug <<
"\tBasis Residues: " << bp.first.resnum <<
" " << bp.second.resnum << std::endl;
392 TR.
Debug <<
"\tNumber of matched segments: " << segment_matches.size() << std::endl;
396 for ( ; it != it_end; ++it ) {
397 TR.
Debug <<
"\tSegments " << it->first.first <<
" and " << it->first.second <<
" have " << it->second <<
" overlapping atoms." << std::endl;
398 TR.
Debug <<
"\tSegments " << it2->first.first <<
" and " << it2->first.second <<
" have " << it2->second.size() <<
" overlapping atoms." << std::endl;
402 write_hashing_scores_to_file(scores, score_file_name);
407 std::cerr <<
"Exception : " << std::endl;
ocstream cerr(std::cerr)
Wrapper around std::cerr.
#define utility_exit_with_message(m)
Exit with file + line + message.
int main(int argc, char *argv[])
static basic::Tracer TR("sewing_hasher")
int receive_integer_from_node(int source)
Use MPI to receive a single integer from a particular node.
utility::keys::KeyLookup< KeyType >::const_iterator const_iterator
Key collection iterators.
izstream & getline(char *line, std::streamsize const count)
Get the rest of the line.
void init(int argc, char *argv[])
Command line init() version.
BooleanOptionKey const user("options:user")
File name class supporting Windows and UN*X/Linux format names.
virtual void show(std::ostream &) const =0
static THREAD_LOCAL basic::Tracer TR("basic.options")
izstream: Input file stream wrapper for uncompressed and compressed files
Input file stream wrapper for uncompressed and compressed files.
void send_integer_to_node(int destination, int message)
std::vector with 1-based indexing
File name class supporting Windows and UN*X/Linux format names.
int receive_integer_from_anyone()
Use MPI to wait until some node sends an integer – usually its own mpi_rank so that it can send furt...
rule< Scanner, options_closure::context_t > options
utility::options::OptionCollection option
OptionCollection global.
BooleanOptionKey const exit("options:exit")
vector1: std::vector with 1-based indexing
Class for handling user debug/warnings/errors. Use instance of this class instead of 'std::cout' for ...
rule< Scanner, string_closure::context_t > name
std::string to_string(const T &t)
Some std::string helper functions.
Program options global and initialization function.