19 #include <core/conformation/PointGraph.hh>
20 #include <core/conformation/find_neighbors.hh>
21 #include <core/io/pdb/pose_io.hh>
23 #include <core/pack/task/PackerTask.hh>
24 #include <core/pack/task/operation/TaskOperations.hh>
25 #include <core/pack/task/operation/TaskOperationFactory.hh>
26 #include <core/pack/task/TaskFactory.hh>
27 #include <core/pose/Pose.hh>
28 #include <core/pose/PDBInfo.hh>
29 #include <core/conformation/Residue.hh>
30 #include <core/scoring/EnergyMap.hh>
31 #include <core/scoring/ScoreType.hh>
32 #include <core/scoring/ScoreFunction.hh>
33 #include <core/scoring/ScoreFunctionFactory.hh>
49 #include <basic/options/keys/run.OptionKeys.gen.hh>
61 #include <core/conformation/PointGraphData.hh>
62 #include <core/graph/UpperEdgeGraph.hh>
63 #include <core/import_pose/import_pose.hh>
64 #include <protocols/filters/Filter.fwd.hh>
71 using namespace protocols;
73 using namespace basic::options::OptionKeys;
77 namespace sequence_recovery {
92 std::stringstream usage_stream;
93 usage_stream <<
"No files given: Use either -file:s or -file:l to designate a single pdb or a list of pdbs.\n\n"
95 <<
"\n\t-native_pdb_list <list file>"
96 <<
"\n\t-redesign_pdb_list <list file>"
98 <<
"\n\t[-seq_recov_filename <file>] file to output sequence recoveries to (default: sequencerecovery.txt)"
99 <<
"\n\t[-sub_matrix_filename <file>] file to output substitution matrix to (default: submatrix.txt)"
100 <<
"\n\t[-parse_taskops_file <file>] tagfile which contains task operations to apply before measuring recovery (optional)"
101 <<
"\n\t[-ignore_unrecognized_res]"
114 core::pack::task::TaskFactoryOP
setup_tf( core::pack::task::TaskFactoryOP task_factory_ ) {
116 using namespace core::pack::task::operation;
120 TaskOperationFactory::TaskOperationOPs
tops;
122 TaskOperationFactory::get_instance()->newTaskOperations( tops, dm, tagfile_name );
123 for ( TaskOperationFactory::TaskOperationOPs::iterator it( tops.begin() ), itend( tops.end() ); it != itend; ++it ) {
124 task_factory_->push_back( *it );
127 task_factory_->push_back( TaskOperationCOP(
new pack::task::operation::InitializeFromCommandline ) );
130 return task_factory_;
138 using core::conformation::PointGraph;
139 using core::conformation::PointGraphOP;
141 PointGraphOP pg(
new PointGraph );
142 core::conformation::residue_point_graph_from_conformation( pose.conformation(), *pg );
143 core::conformation::find_neighbors<core::conformation::PointGraphVertexData,core::conformation::PointGraphEdgeData>( pg, 10.0 );
145 num_nbs.resize( pose.n_residue(), 0 );
154 num_nbs[
ii ] = pg->get_vertex(
ii).num_neighbors_counting_self();
165 scoring::ScoreFunctionOP
scorefxn = scoring::get_score_function();
168 std::set< Size > designable_set;
169 core::pack::task::PackerTaskOP design_task( tf->create_task_and_apply_taskoperations( pose ) );
172 TR<<
"Task for " << pose.pdb_info()->name() <<
" is: \n" << *(design_task) << std::endl;
176 for (
Size ii = 1;
ii<= design_task->total_residue(); ++
ii ) {
177 if ( design_task->being_designed(
ii ) ) {
178 designable_set.insert(
ii );
182 return designable_set;
205 Size n_correct_total(0);
Size n_total(0);
206 Size n_correct_total_core(0);
Size n_total_core(0);
207 Size n_correct_total_surface(0);
Size n_total_surface(0);
210 Size core_cutoff = 24;
216 while ( ( native_itr != native_last ) && (redesign_itr != redesign_last ) ) {
223 core::pack::task::TaskFactoryOP task_factory(
new core::pack::task::TaskFactory );
224 std::set< Size > design_set;
235 Size const nres( native_pose.total_residue() );
241 if ( ! native_pose.residue(*it).is_protein() ) {
242 native_sequence[ *it ] = chemical::aa_unk;
246 native_sequence[ *it ] = native_pose.residue( *it ).aa();
247 n_native[ native_pose.residue(*it).aa() ]++;
250 if ( num_neighbors[*it] >= core_cutoff ) {
251 n_native_core[ native_pose.residue(*it).aa() ]++;
255 if ( num_neighbors[*it] < surface_exposed_cutoff ) {
256 n_native_surface[ native_pose.residue(*it).aa() ]++;
266 if ( redesign_pose.residue( *it ).is_protein() ) {
270 n_designed[ redesign_pose.residue(*it).aa() ]++;
272 if ( num_neighbors[*it] >= core_cutoff ) { n_designed_core[ redesign_pose.residue(*it).aa() ]++; }
273 if ( num_neighbors[*it] < surface_exposed_cutoff ) { n_designed_surface[ redesign_pose.residue(*it).aa() ]++; }
276 if ( native_sequence[ *it ] == redesign_pose.residue(*it).aa() ) {
277 n_correct[ redesign_pose.residue(*it).aa() ]++;
279 if ( num_neighbors[*it] >= core_cutoff ) {
280 n_correct_core[ redesign_pose.residue(*it).aa() ]++;
281 n_correct_total_core++;
283 if ( num_neighbors[*it] < surface_exposed_cutoff ) {
284 n_correct_surface[ redesign_pose.residue(*it).aa() ]++;
285 n_correct_total_surface++;
291 sub_matrix( native_pose.residue(*it).aa(), redesign_pose.residue(*it).aa() )++;
297 native_itr++; redesign_itr++;
304 outputFile <<
"Residue\tNo.correct core\tNo.native core\tNo.designed core\tNo.correct/ No.native core\tNo.correct/ No.designed core\t"
305 <<
"No.correct\tNo.native\tNo.designed\tNo.correct/ No.native\tNo.correct/ No.designed\t"
306 <<
"Residue\tNo.correct surface\tNo.native surface\tNo.designed surface\tNo.correct/ No.native\tNo.correct/ No.designed" << std::endl;
309 for (
Size ii = 1;
ii <= chemical::num_canonical_aas; ++
ii ) {
311 outputFile << chemical::name_from_aa( chemical::AA(
ii) ) <<
"\t"
312 << n_correct_core[
ii ] <<
"\t" << n_native_core[
ii ] <<
"\t" << n_designed_core[
ii ] <<
"\t";
314 if ( n_native_core[
ii] != 0 ) outputFile <<
F(4,2, (
float)n_correct_core[
ii]/n_native_core[
ii] ) <<
"\t";
315 else outputFile <<
"---\t";
316 if ( n_designed_core[ii] != 0 ) outputFile <<
F(4,2, (
float)n_correct_core[ii]/n_designed_core[ii] ) <<
"\t";
317 else outputFile <<
"---\t";
323 outputFile << n_correct[
ii ] <<
"\t" << n_native[
ii ] <<
"\t" << n_designed[
ii ] <<
"\t";
324 if ( n_native[ii] != 0 ) outputFile <<
F(4,2, (
float)n_correct[ii]/n_native[ii] ) <<
"\t";
325 else outputFile <<
"---\t";
326 if ( n_designed[ii] != 0 ) outputFile <<
F(4,2, (
float)n_correct[ii]/n_designed[ii] ) <<
"\t";
327 else outputFile <<
"---\t";
333 outputFile << chemical::name_from_aa( chemical::AA(ii) ) <<
"\t"
334 << n_correct_surface[
ii ] <<
"\t" << n_native_surface[
ii ] <<
"\t" << n_designed_surface[
ii ] <<
"\t";
336 if ( n_native_surface[ii] != 0 ) outputFile <<
F(4,2, (
float)n_correct_surface[ii]/n_native_surface[ii] ) <<
"\t";
337 else outputFile <<
"---\t";
338 if ( n_designed_surface[ii] != 0 ) outputFile <<
F(4,2, (
float)n_correct_surface[ii]/n_designed_surface[ii] ) <<
"\t";
339 else outputFile <<
"---\t";
345 outputFile << std::endl;
349 outputFile <<
"Total\t"
350 << n_correct_total_core <<
"\t" << n_total_core <<
"\t\t" <<
F(5,3, (
float)n_correct_total_core/n_total_core ) <<
"\t\t"
351 << n_correct_total <<
"\t" << n_total <<
"\t\t" <<
F(5,3, (
float)n_correct_total/n_total ) <<
"\t\tTotal\t"
352 << n_correct_total_surface <<
"\t" << n_total_surface <<
"\t\t" <<
F(5,3, (
float)n_correct_total_surface/n_total_surface )
360 matrixFile <<
"AA_TYPE" <<
"\t" ;
361 for (
Size ii = 1;
ii <= chemical::num_canonical_aas; ++
ii ) {
362 matrixFile <<
"nat_"<<chemical::name_from_aa( chemical::AA(
ii) ) <<
"\t";
364 matrixFile<<std::endl;
367 for (
Size ii = 1;
ii <= chemical::num_canonical_aas; ++
ii ) {
368 matrixFile <<
"sub_" << chemical::name_from_aa( chemical::AA(
ii) );
369 for (
Size jj = 1; jj <= chemical::num_canonical_aas; ++jj ) {
371 matrixFile<<
"\t" << sub_matrix( jj,
ii );
373 matrixFile << std::endl;
403 int main(
int argc,
char* argv[] ) {
429 std::vector< FileName > native_pdb_file_names;
431 std::ifstream native_data( native_pdb_list_file_name.c_str() );
432 std::string native_line;
433 if ( !native_data.good() ) {
436 while (
getline( native_data, native_line ) ) {
437 native_pdb_file_names.push_back( FileName( native_line ) );
443 std::vector< FileName > redesign_pdb_file_names;
445 std::ifstream redesign_data( redesign_pdb_list_file_name.c_str() );
446 std::string redesign_line;
447 if ( !redesign_data.good() ) {
450 while (
getline( redesign_data, redesign_line ) ) {
451 redesign_pdb_file_names.push_back( FileName( redesign_line ) );
453 redesign_data.close();
456 if ( native_pdb_file_names.size() != redesign_pdb_file_names.size() ) {
457 utility_exit_with_message(
"Size of native pdb list file: " + native_pdb_list_file_name +
" does not equal size of redesign pdb list: " + redesign_pdb_list_file_name +
"!\n" );
464 std::vector< FileName >::iterator native_pdb( native_pdb_file_names.begin() ), native_last_pdb(native_pdb_file_names.end());
465 std::vector< FileName >::iterator redesign_pdb( redesign_pdb_file_names.begin() ), redesign_last_pdb(redesign_pdb_file_names.end());
467 while ( ( native_pdb != native_last_pdb ) && ( redesign_pdb != redesign_last_pdb ) ) {
477 TR <<
"Reading in poses " << *native_pdb <<
" and " << *redesign_pdb << std::endl;
479 core::import_pose::pose_from_pdb( native_pose, *native_pdb );
480 core::import_pose::pose_from_pdb( redesign_pose, *redesign_pdb );
482 native_poses.push_back( native_pose ); redesign_poses.push_back( redesign_pose );
483 native_pdb++; redesign_pdb++;
488 TR <<
"Measuring rotamer recovery" << std::endl;
491 TR <<
"Measuring sequence recovery" << std::endl;
495 std::cout <<
"caught exception " << e.
msg() << std::endl;
#define utility_exit_with_message(m)
Exit with file + line + message.
#define utility_exit_with_message_status(m, s)
Exit with file + line + message + status.
core::pack::task::TaskFactoryOP setup_tf(core::pack::task::TaskFactoryOP task_factory_)
load custom TaskOperations according to an xml-like utility::tag file
virtual std::string const msg() const
Automatic hidden index key for integer options.
BooleanOptionKey const native_sequence("usec::native_sequence")
FileOptionKey const native_pdb_list("sequence_recovery::native_pdb_list")
utility::keys::KeyLookup< KeyType >::const_iterator const_iterator
Key collection iterators.
vector0: std::vector with assert-checked bounds
void measure_rotamer_recovery(utility::vector1< core::pose::Pose > &, utility::vector1< core::pose::Pose > &)
std::istream & getline(std::istream &stream, Fstring &s)
Get Line from Stream.
void init(int argc, char *argv[])
Command line init() version.
BooleanOptionKey const user("options:user")
utility::keys::lookup::end< KeyType > const end
Automatic hidden index key for file options.
IntegerOptionKey const se_cutoff("sequence_recovery::se_cutoff")
int main(int argc, char *argv[])
Automatic hidden index key for string options.
Platform independent operations on files (except I/O)
BooleanOptionKey const rotamer_recovery("sequence_recovery::rotamer_recovery")
File name class supporting Windows and UN*X/Linux format names.
StringOptionKey const seq_recov_filename("sequence_recovery::seq_recov_filename")
common derived classes for thrown exceptions
StringOptionKey const sub_matrix_filename("sequence_recovery::sub_matrix_filename")
FileOptionKey const redesign_pdb_list("sequence_recovery::redesign_pdb_list")
Automatic hidden index key for boolean options.
rule< Scanner, options_closure::context_t > options
FileOptionKey const parse_taskops_file("sequence_recovery::parse_taskops_file")
general-purpose store for any reference-count derived object
Output file stream wrapper for uncompressed and compressed files.
void measure_sequence_recovery(utility::vector1< core::pose::Pose > &native_poses, utility::vector1< core::pose::Pose > &redesign_poses)
iterates over all designed positions and determines identity to native. outputs recoveries to file...
void fill_num_neighbors(pose::Pose &pose, utility::vector1< core::Size > &num_nbs)
helper method which uses the tenA nb graph in the pose object to fill a vector with nb counts ...
static THREAD_LOCAL basic::Tracer TR("sequence_recovery")
ocstream cout(std::cout)
Wrapper around std::cout.
vector1: std::vector with 1-based indexing
Class for handling user debug/warnings/errors. Use instance of this class instead of 'std::cout' for ...
bool file_exists(std::string const &path)
Does File Exist?
std::set< Size > fill_designable_set(pose::Pose &pose, pack::task::TaskFactoryOP &tf)
return the set of residues that are designable based given pose
ozstream: Output file stream wrapper for uncompressed and compressed files
void init_usage_prompt(std::string exe)
rule< Scanner, option_closure::context_t > option
FArray2D: Fortran-Compatible 2D Array.