15 #include <core/types.hh>
16 #include <core/chemical/AA.hh>
17 #include <core/conformation/Residue.hh>
18 #include <core/chemical/ResidueTypeSet.hh>
19 #include <core/chemical/VariantType.hh>
20 #include <core/chemical/util.hh>
21 #include <core/chemical/ChemicalManager.hh>
22 #include <core/import_pose/import_pose.hh>
23 #include <core/scoring/ScoreFunction.hh>
24 #include <core/scoring/ScoreFunctionFactory.hh>
25 #include <core/sequence/Sequence.hh>
26 #include <core/sequence/util.hh>
28 #include <core/kinematics/FoldTree.hh>
29 #include <core/kinematics/tree/Atom.hh>
30 #include <core/id/AtomID_Map.hh>
31 #include <core/id/AtomID.hh>
32 #include <core/id/DOF_ID.hh>
33 #include <core/kinematics/AtomTree.hh>
34 #include <core/kinematics/Jump.hh>
35 #include <core/kinematics/MoveMap.hh>
36 #include <core/pose/PDBInfo.hh>
37 #include <protocols/farna/util.hh>
38 #include <protocols/viewer/viewers.hh>
39 #include <core/pose/Pose.hh>
40 #include <core/pose/util.hh>
41 #include <core/pose/rna/util.hh>
44 #include <core/io/pdb/pose_io.hh>
65 #include <basic/options/keys/out.OptionKeys.gen.hh>
66 #include <basic/options/keys/score.OptionKeys.gen.hh>
67 #include <basic/options/keys/in.OptionKeys.gen.hh>
73 using namespace basic;
74 using namespace protocols;
75 using namespace basic::options::OptionKeys;
79 using io::pdb::dump_pdb;
103 utility::
vector1<
sequence::SequenceOP > const & sequences_from_alignment )
106 using namespace basic::options::OptionKeys;
109 for (
Size n=1; n <= sequences_from_alignment.size(); n++ ) sequences.push_back( sequences_from_alignment[n]->sequence() );
111 Size const alignment_length = sequences[1].size();
116 for (
Size i = 1; i <= alignment_length; i++ ) {
118 bool found_a_gap(
false );
119 for (
Size n = 1; n <= sequences.size(); n++ ) {
120 if ( sequences[n][i-1] ==
'-' ) {
126 if ( !found_a_gap ) {
127 sequence_mask( i ) =
true;
134 bool look_for_fishy_gaps(
false );
136 if ( look_for_fishy_gaps ) {
137 Size const look_for_gap( 4 );
138 for (
int i = 1; i <=
int(alignment_length); i++ ) {
140 if ( sequence_mask(i) == false )
continue;
142 bool found_gap_before(
false ), found_gap_after(
false );
143 for (
int offset = -1 * (
int)look_for_gap; offset < 0; ++offset ) {
144 if ( i+offset > 1 && sequence_mask( i+offset ) ==
false ) {
145 found_gap_before =
true;
149 if ( !found_gap_before )
continue;
151 for (
int offset = 1;
Size(offset) <= look_for_gap; offset ++ ) {
152 if ( i+offset <=
int(alignment_length) && sequence_mask( i+offset ) == false ) {
153 found_gap_after =
true;
157 if ( !found_gap_after )
continue;
159 std::cout <<
"MASK: Region that is not nominally gapped but looks fishy: " << i << std::endl;
160 sequence_mask( i ) =
false;
168 for (
Size i = 1; i <= alignment_length; i++ ) {
169 if ( sequences[1][i-1] ==
'-' )
continue;
171 out << sequence_mask(i);
182 std::string
const & sequence_from_alignment ){
184 for (
Size i = 1; i <= sequence_from_alignment.size(); i++ ) {
185 if ( sequence_from_alignment[ i-1 ] !=
'-' ) {
187 mapping[ i ] =
count;
197 std::string s_out(
"" );
198 for (
Size i = 0; i < s.size(); i++ ) {
199 if ( s[i] !=
'-' ) s_out += s[i];
208 std::string
const & target_sequence_from_alignment,
209 std::string
const & template_sequence_from_alignment,
215 using namespace core::chemical;
216 using namespace core::conformation;
218 using namespace protocols::farna;
219 using namespace utility;
228 Size const alignment_length = target_sequence_from_alignment.size();
229 runtime_assert( alignment_length == template_sequence_from_alignment.size() );
234 std::map <Size, Size> template_alignment2sequence, target_alignment2sequence;
239 std::string target_sequence;
241 for (
Size i = 1; i <= alignment_length; i++ ) {
243 if ( !sequence_mask( i ) )
continue;
245 Size const pdb_number( template_alignment2sequence[i] );
247 if ( i == 1 || ( sequence_mask( i-1 ) && pdb_number > 1 && !template_pose.fold_tree().is_cutpoint( pdb_number-1 ) ) ) {
248 pose.append_residue_by_bond( template_pose.residue( pdb_number ) );
250 pose.append_residue_by_jump( template_pose.residue( pdb_number ), pose.total_residue() );
256 target_sequence += target_sequence_from_alignment[i-1];
257 working_res.push_back( target_alignment2sequence[i] + offset);
261 PDBInfoOP pdb_info(
new PDBInfo( pose ) );
262 pdb_info->set_numbering( working_res );
263 pose.pdb_info( pdb_info );
265 std::string current_sequence = pose.sequence();
266 std::cout <<
"TARGET: " << target_sequence << std::endl;
267 std::cout <<
"CURRENT: " << current_sequence << std::endl;
277 for (
Size i = 1; i <= target_sequence.size(); i++ ) {
279 char const new_seq = target_sequence[i-1];
280 if ( pose::rna::mutate_position( pose, i, new_seq ) ) {
281 changed_pos.push_back( i );
282 changed_pos_working.push_back( working_res[i] );
298 std::string
const & template_file_name ){
300 Size which_sequence( 0 );
302 for (
Size n=1; n<=sequences_from_alignment.size(); n++ ) {
303 if ( sequences_from_alignment[n]->
id() == template_file_name ) {
311 std::string
const template_sequence = template_pose.sequence();
312 if ( which_sequence == 0 ) {
313 for (
Size n=1; n<=sequences_from_alignment.size(); n++ ) {
314 if ( sequences_from_alignment[n]->ungapped_sequence() == template_sequence ) {
319 if ( which_sequence > 0 )
std::cout <<
"Using " << sequences_from_alignment[which_sequence]->id() <<
" from alignment FASTA file, as its sequence corresponds to input template PDB!" << std::endl;
322 if ( which_sequence == 0 )
utility_exit_with_message(
"Could not figure out which sequence in fasta file was the template based on name or on sequence!!" );
324 if ( which_sequence == 1 )
std::cout <<
"WARNING! WARNING! Assuming template corresponds to first sequence in the alignment FASTA file " << sequences_from_alignment[which_sequence]->id() <<
". But that first sequence should be the target sequence!" << std::endl;
328 return which_sequence;
338 using namespace basic::options::OptionKeys;
339 using namespace core::scoring;
347 core::chemical::ResidueTypeSetCOP rsd_set = core::chemical::ChemicalManager::get_instance()->residue_type_set( core::chemical::FA_STANDARD );
348 core::import_pose::pose_from_pdb(
pose, *rsd_set, template_file );
349 core::pose::rna::figure_out_reasonable_rna_fold_tree(
pose );
353 std::string template_sequence_from_alignment, target_sequence_from_alignment;
358 template_sequence_from_alignment =
pose.sequence();
359 target_sequence_from_alignment =
option[
seq ]();
360 if ( target_sequence_from_alignment.size() != template_sequence_from_alignment.size() )
utility_exit_with_message(
"Input sequence from -seq should match size of template pose specified by -s!" );
364 std::string
const fasta_file (
option[ in::file::fasta ]()[1] );
368 template_sequence_from_alignment = sequences_from_alignment[ which_sequence ]->sequence();
369 target_sequence_from_alignment = sequences_from_alignment[ 1 ]->sequence();
376 setup_mask( sequence_mask, sequences_from_alignment );
381 template_sequence_from_alignment,
382 sequence_mask,
option[ seq_offset ]() );
383 core::pose::rna::virtualize_5prime_phosphates(
pose );
385 std::string
outfile(
"threaded.pdb" );
388 std::cout <<
"Outputting: " << outfile << std::endl;
389 pose.dump_pdb( outfile );
408 main(
int argc,
char * argv [] )
414 NEW_OPT(
seq,
"target sequence (can include dashes). Must specify either this or -fasta. Length of this sequence must exactly equal length of template pose specified by -s",
"");
415 NEW_OPT( seq_offset,
"Integer to add to all residue numbers in output PDB", 0 );
416 NEW_OPT( sequence_mask_file,
"Output name for sequence mask file (not in use at the moment)",
"sequence_mask.txt" );
427 protocols::viewer::viewer_main(
my_main );
429 std::cout <<
"caught exception " << e.
msg() << std::endl;
#define utility_exit_with_message(m)
Exit with file + line + message.
virtual std::string const msg() const
int main(int argc, char *argv[])
void setup_mask(ObjexxFCL::FArray1D_bool &sequence_mask, utility::vector1< sequence::SequenceOP > const &sequences_from_alignment)
void init(int argc, char *argv[])
Command line init() version.
BooleanOptionKey const user("options:user")
#define runtime_assert(_Expression)
Assert that the condition holds. Evaluated for both debug and release builds.
Conversions between degrees and radians.
common derived classes for thrown exceptions
FArray1D< bool > FArray1D_bool
Input file stream wrapper for uncompressed and compressed files.
std::vector with 1-based indexing
std::string make_tag_with_dashes(utility::vector1< int > res_vector, char const delimiter)
Compactifies vectors of ints: 1 2 3 9 10 11 to "1-3 9-11".
rule< Scanner, options_closure::context_t > options
#define OPT_KEY(type, key)
Output file stream wrapper for uncompressed and compressed files.
void close()
Close the ofstream and reset the state.
std::string remove_dashes(std::string const &s)
#define NEW_OPT(akey, help, adef)
void setup_alignment_map(std::map< Size, Size > &mapping, std::string const &sequence_from_alignment)
Size figure_out_which_sequence_is_template(utility::vector1< sequence::SequenceOP > const &sequences_from_alignment, pose::Pose const &template_pose, std::string const &template_file_name)
ocstream cout(std::cout)
Wrapper around std::cout.
BooleanOptionKey const exit("options:exit")
vector1: std::vector with 1-based indexing
xyzMatrix: Fast 3x3 xyz matrix template
std::string make_tag(utility::vector1< int > res_vector)
ozstream: Output file stream wrapper for uncompressed and compressed files
Some std::string helper functions.
void prepare_threaded_model(pose::Pose &pose, std::string const &target_sequence_from_alignment, std::string const &template_sequence_from_alignment, ObjexxFCL::FArray1D_bool &sequence_mask, Size offset=0)
Fast (x,y,z)-coordinate numeric vector.
rule< Scanner, option_closure::context_t > option
size_type size() const
Active Array Size.
FArray1D: Fortran-Compatible 1D Array.
numeric::xyzMatrix< Real > Matrix