Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MolFileIOReader.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file src/core/chemical/sdf/MolFileIOReader.cc
11 /// @author Rocco Moretti (rmorettiase@gmail.com)
12 
16 
19 
21 
22 #include <basic/Tracer.hh>
23 #include <utility/string_util.hh>
24 #include <utility/file/FileName.hh>
25 #include <utility/io/izstream.hh>
26 
27 #include <ObjexxFCL/string.functions.hh>
28 
29 namespace core {
30 namespace chemical {
31 namespace sdf {
32 
33 static THREAD_LOCAL basic::Tracer TR( "core.io.sdf.MolFileIOReader" );
34 
36 {}
37 
39 {}
40 
41 /// @details This has to take a filename, as autodetection of file type may require reopening the file
42 
43 utility::vector1< MolFileIOMoleculeOP > MolFileIOReader::parse_file( std::string const & filename, std::string type /*= "" */, core::Size n_entries /* = 0 */ ) {
44 
45  utility::io::izstream file( filename );
46  if ( ! file ) {
47  TR << "Error: cannot open file " << filename << std::endl;
48  utility_exit_with_message( "Error opening " + filename );
49  }
50  if ( type == "" ) {
51  utility::file::FileName fn( filename );
52  std::string ext( fn.extension() ); // The fn.extension() machinery should seamlessly ignore the .gz ending.
53  ObjexxFCL::lowercase( ext );
54  if ( ext == ".mol" || ext == ".sdf" || ext == ".pdb" || ext == ".mol2" || ext == ".params" ) {
55  type = ext.substr(1); // Everything from the second character to the end of the string.
56  } else {
57  TR << "Extension of '" << ext << "' for file '" << filename << "' not recognized - attempting autodetection of type." << std::endl;
58  utility::io::izstream test_type( filename );
59  std::string line;
60  bool M_END_line = false;
61  bool dollars_line = false;
62  bool ATOM_line = false;
63  bool BOND_line = false;
64  bool Tripos_line = false;
65  for ( getline(test_type, line); test_type; getline(test_type, line) ) {
66  if ( utility::startswith(line, "M END") ) { M_END_line = true; }
67  if ( utility::startswith(line, "$$$$") ) { dollars_line = true; }
68  if ( utility::startswith(line, "ATOM") ) { ATOM_line = true; }
69  if ( utility::startswith(line, "BOND") ) { BOND_line = true; }
70  if ( utility::startswith(line, "@<TRIPOS>") ) { Tripos_line = true; }
71  }
72  if ( M_END_line || dollars_line ) {
73  if ( ATOM_line || BOND_line || Tripos_line ) {
74  TR.Warning << "Warning: Ambiguous filetype for loading " << filename << ", assuming SDF." << std::endl;
75  }
76  type = "sdf";
77  } else if ( Tripos_line ) {
78  if ( ATOM_line || BOND_line || M_END_line || dollars_line ) {
79  TR.Warning << "Warning: Ambiguous filetype for loading " << filename << ", assuming mol2." << std::endl;
80  }
81  type = "mol2";
82  } else if ( ATOM_line && BOND_line ) {
83  if ( Tripos_line || M_END_line || dollars_line ) {
84  TR.Warning << "Warning: Ambiguous filetype for loading " << filename << ", assuming Rosetta params." << std::endl;
85  }
86  type = "params";
87  } else if ( ATOM_line && ! BOND_line ) {
88  if ( Tripos_line || M_END_line || dollars_line ) {
89  TR.Warning << "Warning: Ambiguous filetype for loading " << filename << ", assuming PDB." << std::endl;
90  }
91  type = "pdb";
92  } else {
93  TR.Error << "ERROR: Unable to autodetermine filetype of molecule file " << filename << std::endl;
94  utility_exit_with_message( "Can't determine filetype for file " + filename );
95  }
96  } // if/else extension recognized
97  } // type = ""
98  return parse_file( file, type, n_entries ); // file will be closed on destruction.
99 }
100 
101 utility::vector1< MolFileIOMoleculeOP > MolFileIOReader::parse_file( std::istream & file, std::string type, core::Size n_entries /*=0*/ ) {
102  ObjexxFCL::lowercase( type );
103  utility::vector1< MolFileIOMoleculeOP > molecules;
104 
105  if ( type == "mol" || type == "sdf" ) {
106  SDFParser parser;
107  molecules = parser.parse( file, n_entries );
108  } else if ( type == "mol2" ) {
109  TR.Error << "Loading of mol2 files via this method not currently supported." << std::endl;
110  } else if ( type == "pdb" ) {
111  TR.Error << "Loading of pdb files via this method not currently supported." << std::endl;
112  } else if ( type == "params" ) {
113  TR.Error << "Loading of params files via this method not currently supported." << std::endl;
114  } else {
115  utility_exit_with_message( "Do not know how to handle molecule file of type " + type );
116  }
117 
118  if ( ! molecules.size() ) {
119  TR.Error << "Error: Stream contained no recognized molecules!" << std::endl;
120  }
121  return molecules;
122 }
123 
124 
125 ResidueTypeOP convert_to_ResidueType( utility::vector1< MolFileIOMoleculeOP > molfile_data,
126  std::string atom_type_tag,
127  std::string elements_tag,
128  std::string mm_atom_type_tag) {
129 
130  AtomTypeSetCOP atom_types( ChemicalManager::get_instance()->atom_type_set( atom_type_tag ) );
131  ElementSetCOP elements( ChemicalManager::get_instance()->element_set( elements_tag ) );
132  MMAtomTypeSetCOP mm_atom_types( ChemicalManager::get_instance()->mm_atom_type_set( mm_atom_type_tag ) );
133  return convert_to_ResidueType(molfile_data, atom_types, elements, mm_atom_types);
134 }
135 
136 ResidueTypeOP convert_to_ResidueType( utility::vector1< MolFileIOMoleculeOP > molfile_data,
137  AtomTypeSetCOP atom_types,
138  ElementSetCOP element_type_set,
139  MMAtomTypeSetCOP mm_atom_types) {
140 
141  if ( molfile_data.size() == 0 ) {
142  // This indicates a bad call, rather than bad data - early error
143  utility_exit_with_message("ERROR: Cannot convert an empty vector of molecules to a ResidueType.");
144  }
145 
146  std::map< core::chemical::sdf::AtomIndex, std::string > index_name_map;
147  ResidueTypeOP restype = molfile_data[1]->convert_to_ResidueType(index_name_map, atom_types, element_type_set, mm_atom_types);
148  if ( ! restype ) {
149  TR.Info << "Could not load molecule '" << molfile_data[1]->name() << "' as a residue type." << std::endl;
150  return ResidueTypeOP(0);
151  }
152 
153  if ( molfile_data.size() > 1 ) {
155  for ( core::Size ii(1); ii <= molfile_data.size(); ++ii ) {
156  std::map< std::string, core::Vector > location_map;
157  for ( std::map< core::chemical::sdf::AtomIndex, std::string >::const_iterator itr( index_name_map.begin() ), itr_end( index_name_map.end() );
158  itr != itr_end; ++itr ) {
159  MolFileIOAtomOP atom = molfile_data[ ii ]->atom_index( itr->first );
160  if ( atom ) {
161  location_map[ itr->second ] = atom->position();
162  }
163  }
164  rotlib->add_rotamer( location_map );
165  }
166  restype->rotamer_library_specification( rotlib );
167  }
168 
169  return restype;
170 }
171 
172 utility::vector1< ResidueTypeOP >
173 convert_to_ResidueTypes( utility::vector1< MolFileIOMoleculeOP > molfile_data,
174  bool load_rotamers,
175  std::string atom_type_tag,
176  std::string elements_tag,
177  std::string mm_atom_type_tag) {
178 
179  AtomTypeSetCOP atom_types( ChemicalManager::get_instance()->atom_type_set( atom_type_tag ) );
180  ElementSetCOP elements( ChemicalManager::get_instance()->element_set( elements_tag ) );
181  MMAtomTypeSetCOP mm_atom_types( ChemicalManager::get_instance()->mm_atom_type_set( mm_atom_type_tag ) );
182  return convert_to_ResidueTypes(molfile_data, load_rotamers, atom_types, elements, mm_atom_types);
183 }
184 
185 utility::vector1< ResidueTypeOP >
186 convert_to_ResidueTypes( utility::vector1< MolFileIOMoleculeOP > molfile_data,
187  bool load_rotamers,
188  AtomTypeSetCOP atom_types,
189  ElementSetCOP element_types,
190  MMAtomTypeSetCOP mm_atom_types) {
191 
192  std::map< std::string, core::Size > name_index_map;
193  utility::vector1< utility::vector1< MolFileIOMoleculeOP > > separated_molecules;
194  std::string previous_entry("");
195 
196  for ( core::Size ii(1); ii <= molfile_data.size(); ++ii ) {
197  if ( ! load_rotamers ) {
198  separated_molecules.resize( separated_molecules.size() + 1 );
199  separated_molecules[ separated_molecules.size() ].push_back( molfile_data[ii] );
200  } else if ( previous_entry != "" && molfile_data[ii]->name() == "" ) {
201  // If we have an empty name, assume it's a rotamer of the previous item (but only if there is a previous one)
202  separated_molecules[ name_index_map[ previous_entry ] ].push_back( molfile_data[ii] );
203  } else if ( name_index_map.find( molfile_data[ii]->name() ) != name_index_map.end() ) {
204  // Existing name
205  separated_molecules[ name_index_map[ molfile_data[ii]->name() ] ].push_back( molfile_data[ii] );
206  previous_entry = molfile_data[ii]->name();
207  } else {
208  // Brand new name
209  separated_molecules.resize( separated_molecules.size() + 1 );
210  separated_molecules[ separated_molecules.size() ].push_back( molfile_data[ii] );
211  name_index_map[ molfile_data[ii]->name() ] = separated_molecules.size();
212  previous_entry = molfile_data[ii]->name();
213  }
214  }
215 
216  utility::vector1< ResidueTypeOP > residue_types;
217  for ( core::Size jj(1); jj <= separated_molecules.size(); ++jj ) {
218  TR.Debug << "Converting " << separated_molecules[jj][1]->name() << std::endl;
219  ResidueTypeOP restype;
220  try {
221  restype = convert_to_ResidueType(separated_molecules[jj], atom_types, element_types, mm_atom_types);
222  } catch( utility::excn::EXCN_Msg_Exception & e ) {
223  TR << ">>>>>> Skipping " << separated_molecules[jj][1]->name() << " due to Error: " << e.msg() << std::endl;
224  continue;
225  } catch (...) {
226  TR << ">>>>>> Skipping " << separated_molecules[jj][1]->name() << " due to unspecified error!" << std::endl;
227  continue;
228  }
229  if ( restype ) {
230  residue_types.push_back( restype );
231  }
232  }
233 
234  return residue_types;
235 }
236 
237 
238 }
239 }
240 }
ResidueTypeOP convert_to_ResidueType(utility::vector1< MolFileIOMoleculeOP > molfile_data, std::string atom_type_tag, std::string elements_tag, std::string mm_atom_type_tag)
Convert the vector of MolFileIOMolecules into a single residue type, using multiple entries as rotame...
utility::vector1< ResidueTypeOP > convert_to_ResidueTypes(utility::vector1< MolFileIOMoleculeOP > molfile_data, bool load_rotamers, std::string atom_type_tag, std::string elements_tag, std::string mm_atom_type_tag)
Convert the vector of MolFileIOMolecules into multiple residue types If load_rotamers is false...
utility::pointer::shared_ptr< MolFileIOAtom > MolFileIOAtomOP
utility::pointer::shared_ptr< ResidueType > ResidueTypeOP
utility::vector1< MolFileIOMoleculeOP > parse_file(std::string const &filename, std::string type="", core::Size n_entries=0)
parse file, with the possibility of type autodetection.
utility::pointer::shared_ptr< StoredRotamerLibrarySpecification > StoredRotamerLibrarySpecificationOP
Method declarations and simple accessors/getters for ResidueType.
platform::Size Size
Definition: types.hh:30
A class which stores atom coordinates for a rotamer library. Internally, this is stored as a list of ...
Chemical manager class header.
std::string filename(core::Size batch_id)
Definition: HedgeArchive.cc:87
utility::pointer::shared_ptr< MMAtomTypeSet const > MMAtomTypeSetCOP
The StoredRotamerLibrarySpecification class says to build a rotamer library from a set of stored coor...
utility::vector1< MolFileIOMoleculeOP > parse(std::istream &filein, core::Size n_entries=0)
parse the given input stream. n_entries are the maximum number of entries to parse - of zero parse al...
Definition: SDFParser.cc:38
static THREAD_LOCAL basic::Tracer TR("core.chemical.sdf.CtabV2000Parser")
utility::pointer::shared_ptr< ElementSet const > ElementSetCOP
utility::pointer::shared_ptr< AtomTypeSet const > AtomTypeSetCOP