Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
cif_reader.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file core/io/mmcif/StructFileReader.hh
11 /// @author Andy Watkins (andy.watkins2@gmail.com)
12 
13 
14 // Unit headers
17 
18 // Package headers
19 #include <core/io/pdb/pdb_reader.hh> // TODO: Pull out pseudo-duplicated code and move to sfr_storage.cc.
20 
21 // When you move PDBReader and PoseUnbuilder, take these.
22 #include <core/pose/Pose.hh>
26 #include <core/chemical/Patch.hh>
28 #include <utility/io/ozstream.hh>
29 #include <utility/string_util.hh>
30 #include <utility/io/izstream.hh>
31 #include <numeric/random/random.hh>
32 
33 #include <core/pose/PDBInfo.hh>
34 #include <core/io/pdb/Field.hh>
37 //#include <core/io/pdb/file_data_fixup.hh>
38 #include <core/io/StructFileRep.hh>
39 
41 
42 #include <core/io/Remarks.hh>
43 
44 // Project headers
45 #include <core/types.hh>
46 
47 // Basic headers
48 #include <basic/options/option.hh>
49 #include <basic/options/keys/chemical.OptionKeys.gen.hh>
50 #include <basic/options/keys/run.OptionKeys.gen.hh>
51 #include <basic/options/keys/in.OptionKeys.gen.hh>
52 #include <basic/options/keys/mp.OptionKeys.gen.hh>
53 #include <basic/options/keys/inout.OptionKeys.gen.hh>
54 #include <basic/options/keys/out.OptionKeys.gen.hh>
55 #include <basic/options/keys/packing.OptionKeys.gen.hh>
56 #include <basic/Tracer.hh>
57 
58 // Numeric headers
59 #include <numeric/xyzVector.hh>
60 
61 // Utility headers
62 #include <utility/string_constants.hh>
63 #include <utility/vector1.hh>
64 #include <utility/tools/make_map.hh>
65 
66 // Numeric headers
67 #include <numeric/xyzVector.hh>
68 
69 // External headers
70 #include <ObjexxFCL/string.functions.hh>
71 #include <ObjexxFCL/format.hh>
72 #include <cifparse/CifFile.h>
73 typedef utility::pointer::shared_ptr< CifFile > CifFileOP;
74 
75 // C++ headers
76 #include <cstdlib>
77 #include <cstdio>
78 #include <algorithm>
79 
80 
81 static THREAD_LOCAL basic::Tracer TR( "core.io.mmcif.cif_reader" );
82 
83 // When you move PDBReader and PoseUnbuilder bring these
84 
85 using basic::T;
86 using basic::Error;
87 using basic::Warning;
88 
89 
90 namespace core {
91 namespace io {
92 namespace mmcif {
93 
94 
96 
97  // NO TER OR END
98  // INSERTION CODE DEFAULTS TO '?' SO TURN IT INTO A SPACE!.
99 
100  StructFileRepOP sfr( new StructFileRep );
101 
102  std::map<char, ChainAtoms> atom_chain_map;
103  std::vector< char > chain_list; // preserve order
104  std::map<char, Size> chain_to_idx;
105 
106  std::map<std::pair<Size, Size>, char> modelchain_to_chain;
107  std::string const & chain_letters( utility::UPPERCASE_ALPHANUMERICS );
108  for ( Size i = 0; i < chain_letters.size(); ++i ) {
109  modelchain_to_chain[std::pair<Size, Size>(0, i)] = chain_letters[i];
110  modelchain_to_chain[std::pair<Size, Size>(1, i)] = chain_letters[i];
111  }
112  Size modelidx = 1;
113  bool modeltags_present = false;
114 
115 
116  sfr->header() = HeaderInformationOP( new HeaderInformation() );
117 
118  bool read_pdb_header = options.read_pdb_header();
119 
120  Block& block = cifFile->GetBlock( cifFile->GetFirstBlockName() );
121 
122  // "header information", i.e., is from the Title Section of the PDB file.
123  if ( read_pdb_header ) {
124  if ( block.IsTablePresent( "citation" ) ) {
125  ISTable& citation = block.GetTable( "citation" );
126 
127  sfr->header()->store_title( citation( 0, "title" ) );
128  }
129 
130  if ( block.IsTablePresent( "entry" ) ) {
131  ISTable& entry = block.GetTable( "entry" );
132  sfr->header()->store_idCode( entry( 0, "id" ) );
133  }
134 
135  if ( block.IsTablePresent( "entity" ) ) {
136  ISTable& entity = block.GetTable("entity");
137  for ( Size i = 0; i <= entity.GetLastRowIndex(); ++i ) {
138  sfr->header()->store_compound( entity( i, "pdbx_description" ) );
139  }
140  }
141 
142  if ( block.IsTablePresent( "keywords" ) ) {
143  ISTable& keywords = block.GetTable( "struct_keywords" );
144  sfr->header()->store_classification( keywords( 0, "pdbx_keywords" ) );
145  sfr->header()->store_keywords( keywords( 0, "text" ) );
146  }
147 
148  if ( block.IsTablePresent( "database_PDB_rev" ) ) {
149  ISTable& database_PDB_rev = block.GetTable( "database_PDB_rev" );
150  sfr->header()->store_deposition_date( database_PDB_rev( 0, "date_original" ) );
151  }
152 
153  if ( block.IsTablePresent( "exptl" ) ) {
154  ISTable& exptl = block.GetTable( "exptl" );
155  sfr->header()->store_experimental_techniques( exptl( 0, "method" ) );
156  }
157 
158  sfr->header()->finalize_parse();
159  }
160 
161 
162  // We cannot support REMARKs yet because these are stored in DIVERSE places.
163  // There isn't a coherent "REMARKs" object. AMW TODO
164 
165  // HETNAM
166  if ( block.IsTablePresent( "chem_comp" ) ) {
167  ISTable& chem_comp = block.GetTable("chem_comp");
168  for ( Size i = 0; i <= chem_comp.GetLastRowIndex(); ++i ) {
169  // TODO: Aah! Duplicated code!
170  std::string name = chem_comp( i, "name" );
171  string const & hetID( chem_comp( i, "id" ) );
172 
173  // If the hetID is found in the map of Rosetta-allowed carbohydrate 3-letter codes....
176  pdb::store_base_residue_type_name_in_sfr( name, *sfr ); // TEMP
177  } else {
178  // Search through current list of HETNAM records: append or create records as needed.
179  bool record_found( false );
180  Size const n_heterogen_names( sfr->heterogen_names().size() );
181  for ( uint i( 1 ); i <= n_heterogen_names; ++i ) {
182  // If a record already exists with this hetID, this is a continuation line; append.
183  if ( hetID == sfr->heterogen_names()[ i ].first ) {
184  sfr->heterogen_names()[ i ].second.append( ObjexxFCL::rstripped_whitespace( name ) );
185  record_found = true;
186  break;
187  }
188  }
189  if ( ! record_found ) {
190  // Non-carbohydrate heterogen names are simply stored in the standard PDB way.
192  sfr->heterogen_names().push_back( make_pair( hetID, name ) );
193  }
194  }
195  }
196  }
197 
198  // LINK
199  if ( block.IsTablePresent( "struct_conn" ) ) {
200  ISTable& struct_conn = block.GetTable("struct_conn");
201  for ( Size i = 0; i < struct_conn.GetLastRowIndex(); ++i ) {
202 
203  if ( struct_conn( i, "conn_type_id" ) == "disulf" ) {
204  SSBondInformation ssbond;
205  utility::vector1< SSBondInformation > ssbonds;
206 
207  // Others: conn_type_id for "covale" etc--alert JWL!
208 
209  // Extract values from record fields.
210  //ssbond.name1 = struct_conn( i, "ptnr1_label_atom_id" );
211  ssbond.resName1 = struct_conn( i, "ptnr1_label_comp_id" );
212  ssbond.chainID1 = struct_conn( i, "ptnr1_label_asym_id" )[0];
213  ssbond.resSeq1 = atof( struct_conn( i, "ptnr1_label_seq_id" ).c_str() );
214  ssbond.iCode1 = struct_conn( i, "pdbx_ptnr1_PDB_ins_code" )[0] == '?' ? ' ' : struct_conn( i, "pdbx_ptnr1_PDB_ins_code" )[0];
215 
216  ssbond.resID1 = ssbond.resSeq1 + ssbond.iCode1 + ssbond.chainID1;
217 
218  //ssbond.name2 = struct_conn( i, "ptnr2_label_atom_id" );
219  ssbond.resName2 = struct_conn( i, "ptnr2_label_comp_id" );
220  ssbond.chainID2 = struct_conn( i, "ptnr2_label_asym_id" )[0];
221  ssbond.resSeq2 = atof( struct_conn( i, "ptnr2_label_seq_id" ).c_str() );
222  ssbond.iCode2 = struct_conn( i, "pdbx_ptnr2_PDB_ins_code" )[0] == '?' ? ' ' : struct_conn( i, "pdbx_ptnr2_PDB_ins_code" )[0];
223 
224  ssbond.resID2 = ssbond.resSeq2 + ssbond.iCode2 + ssbond.chainID2;
225 
226  ssbond.length = atof( struct_conn( i, "pdbx_dist_value" ).c_str() ); // bond length
227 
228  // If key is found in the links map, add this new linkage information to the links already keyed to this residue.
229  if ( sfr->ssbond_map().count( ssbond.resID1 ) ) {
230  ssbonds = sfr->ssbond_map()[ ssbond.resID1 ];
231  }
232  ssbonds.push_back( ssbond );
233 
234  sfr->ssbond_map()[ ssbond.resID1 ] = ssbonds;
235 
236  if ( TR.Debug.visible() ) {
237  TR.Debug << "SSBOND record information stored successfully." << std::endl;
238  }
239 
240  } else {
241  LinkInformation link;
242  utility::vector1< LinkInformation > links;
243 
244  // Others: conn_type_id for "covale" etc--alert JWL!
245 
246  // Extract values from record fields.
247  link.name1 = struct_conn( i, "ptnr1_label_atom_id" );
248  link.resName1 = struct_conn( i, "ptnr1_label_comp_id" );
249  link.chainID1 = struct_conn( i, "ptnr1_label_asym_id" )[0];
250  link.resSeq1 = atof( struct_conn( i, "ptnr1_label_seq_id" ).c_str() );
251  link.iCode1 = struct_conn( i, "pdbx_ptnr1_PDB_ins_code" )[0] == '?' ? ' ' : struct_conn( i, "pdbx_ptnr1_PDB_ins_code" )[0];
252 
253  link.resID1 = link.resSeq1 + link.iCode1 + link.chainID1;
254 
255  link.name2 = struct_conn( i, "ptnr2_label_atom_id" );
256  link.resName2 = struct_conn( i, "ptnr2_label_comp_id" );
257  link.chainID2 = struct_conn( i, "ptnr2_label_asym_id" )[0];
258  link.resSeq2 = atof( struct_conn( i, "ptnr2_label_seq_id" ).c_str() );
259  link.iCode2 = struct_conn( i, "pdbx_ptnr2_PDB_ins_code" )[0] == '?' ? ' ' : struct_conn( i, "pdbx_ptnr2_PDB_ins_code" )[0];
260 
261  link.resID2 = link.resSeq2 + link.iCode2 + link.chainID2;
262 
263  link.length = atof( struct_conn( i, "pdbx_dist_value" ).c_str() ); // bond length
264 
265  // If key is found in the links map, add this new linkage information to the links already keyed to this residue.
266  if ( sfr->link_map().count( link.resID1 ) ) {
267  links = sfr->link_map()[ link.resID1 ];
268  }
269  links.push_back( link );
270 
271  sfr->link_map()[ link.resID1 ] = links;
272 
273  if ( TR.Debug.visible() ) {
274  TR.Debug << "LINK record information stored successfully." << std::endl;
275  }
276  }
277  }
278  }
279 
280  // CRYST1
281  if ( block.IsTablePresent( "cell" ) ) {
282  ISTable& cell = block.GetTable("cell");
283  CrystInfo ci;
284  ci.A( atof( cell(0, "length_a" ).c_str() ) );
285  ci.B( atof( cell(0, "length_b" ).c_str() ) );
286  ci.C( atof( cell(0, "length_c" ).c_str() ) );
287  ci.alpha( atof( cell(0, "angle_alpha" ).c_str() ) );
288  ci.beta( atof( cell(0, "angle_beta" ).c_str() ) );
289  ci.gamma( atof( cell(0, "angle_gamma" ).c_str() ) );
290  ISTable& symmetry = block.GetTable("symmetry");
291  ci.spacegroup( symmetry( 0, "space_group_name_H-M" ) );
292  sfr->crystinfo() = ci;
293  }
294 
295  // ATOM/HETATM
296 
297  std::string last_model = "";
298  if ( block.IsTablePresent( "atom_site" ) ) {
299  ISTable& atom_site = block.GetTable("atom_site");
300  for ( Size i = 0; i <= atom_site.GetLastRowIndex(); ++i ) {
301  AtomInformation ai;
302 
303  std::string temp_model = atom_site( i, "pdbx_PDB_model_num" );
304  temp_model = ObjexxFCL::strip_whitespace( temp_model );
305 
306  // PUT OBEY ENDMDL RECOGNITION HERE!!!!!!!
307  if ( last_model != "" && last_model != temp_model ) {
308  break;
309  }
310  last_model = temp_model;
311 
312  // store the serial number as the filename, which will become the PDBInfo name of the pose
313  sfr->modeltag() = temp_model;
314  if ( options.new_chain_order() ) {
315  if ( modeltags_present ) {
316  if ( options.obey_ENDMDL() ) {
317  // That's enough records for now.
318  break;
319  }
320  // second model... all chains should be present...
321  for ( Size model_idx=2; model_idx*chain_to_idx.size()<chain_letters.size(); ++model_idx ) {
322  for ( Size chain_idx=1; chain_idx <= chain_to_idx.size(); ++chain_idx ) {
323  TR << "REARRANGE CHAINS " << model_idx << " " << chain_idx << " ";
324  TR << (model_idx-1)*chain_to_idx.size()+chain_idx << std::endl;
325  modelchain_to_chain[std::pair<Size, Size>(model_idx, chain_idx)] =
326  chain_letters[(model_idx-1)*chain_to_idx.size() + chain_idx - 1];
327  }
328  }
329  ++modelidx;
330  if ( modelidx > 8 ) utility_exit_with_message("quitting: too many MODELs");
331  } else {
332  modeltags_present = true;
333  }
334  }
335 
336 
337  ai.isHet = ( atom_site( i, "group_PDB" ) == "HETATM" );
338  ai.serial = atoi( atom_site( i, "id" ).c_str() );
339  ai.name = atom_site( i, "auth_atom_id" );
340  ai.altLoc = 0;
341  if ( atom_site( i, "label_alt_id" ).size() > 0 ) {
342  ai.altLoc = atom_site( i, "label_alt_id" )[ 0 ];
343  }
344 
345  ai.resName = atom_site( i, "auth_comp_id" );
346  ai.chainID = ' ';
347  if ( atom_site( i, "auth_asym_id" ).size() > 0 ) ai.chainID = atom_site( i, "auth_asym_id" )[0];
348  if ( options.new_chain_order() ) {
349  if ( atom_site( i, "auth_asym_id" ).size() > 0 ) {
350  char chainid = atom_site( i, "auth_asym_id" )[0];
351  if ( chain_to_idx.find(chainid) == chain_to_idx.end() ) {
352  chain_to_idx[chainid] = chain_to_idx.size()+1;
353  TR << "found new chain " << chainid << " " << chain_to_idx.size() << std::endl;
354  }
355  ai.chainID = modelchain_to_chain[std::pair<Size, Size>(modelidx, chain_to_idx[chainid])];
356  }
357  }
358 
359  ai.resSeq = atoi( atom_site( i, "auth_seq_id" ).c_str() );
360  ai.iCode = ' ';
361  if ( atom_site( i, "pdbx_PDB_ins_code" ).size() > 0 && atom_site( i, "pdbx_PDB_ins_code" )[0] != '?' ) ai.iCode = atom_site( i, "pdbx_PDB_ins_code" )[0];
362 
363  // how can you check properly if something will successfully convert to a number !?!?!?
364  bool force_no_occupancy = false;
365  if ( atom_site( i, "Cartn_x" ) == " nan" ) {
366  ai.x =0.0;
367  force_no_occupancy=true;
368  } else {
369  ai.x = atof( atom_site( i, "Cartn_x" ).c_str() );
370  }
371  if ( atom_site( i, "Cartn_y" ) == " nan" ) {
372  ai.y =0.0;
373  force_no_occupancy=true;
374  } else {
375  ai.y = atof( atom_site( i, "Cartn_y" ).c_str() );
376  }
377  if ( atom_site( i, "Cartn_z" ) == " nan" ) {
378  ai.z =0.0;
379  force_no_occupancy=true;
380  } else {
381  ai.z = atof( atom_site( i, "Cartn_z" ).c_str() );
382  }
383 
384  // check that the occupancy column actually exists. If it doesn't, assume full occupancy.
385  // otherwise read it.
386  if ( atom_site( i, "occupancy" ) == " " ) {
387  ai.occupancy = 1.0;
388  } else {
389  ai.occupancy = atof( atom_site( i, "occupancy" ).c_str() );
390  }
391  if ( force_no_occupancy ) ai.occupancy = -1.0;
392 
393  ai.temperature = atof( atom_site( i, "B_iso_or_equiv" ).c_str() );
394  ai.segmentID = " ";
395  ai.element = atom_site( i, "type_symbol" );
396  ai.terCount = 0;
397 
398  atom_chain_map[ai.chainID].push_back(ai);
399  if ( std::find( chain_list.begin(), chain_list.end(), ai.chainID ) == chain_list.end() ) {
400  chain_list.push_back( ai.chainID );
401  }
402  }
403  }
404 
405 
406  for ( Size i=0; i< chain_list.size(); ++i ) { // std::vector
407  sfr->chains().push_back( atom_chain_map.find( chain_list[i] )->second );
408  }
409 
410  return sfr;
411 }
412 
413 } // namespace mmcif
414 } // namespace io
415 } // namespace core
void store_base_residue_type_name_in_sfr(std::string const &text_field, StructFileRep &sfr)
Parse .pdb HETNAM text field to extract full resID and convert into SFR data.
Definition: pdb_reader.cc:438
StructFileRep class. Hold data created from PDB file.
pose information so it's not loose in the pose
utility::pointer::shared_ptr< CifFile > CifFileOP
Definition: import_pose.cc:65
Declarations for StructFileRep and related classes.
Real gamma() const
Definition: CrystInfo.hh:58
utility::pointer::shared_ptr< HeaderInformation > HeaderInformationOP
Method declarations for CarbohydrateInfoManager.
Real A() const
Definition: CrystInfo.hh:47
Function declarations for reading of .pdb files.
platform::Size Size
Definition: types.hh:30
StructFileRepOP create_sfr_from_cif_file_op(CifFileOP cifFile, StructFileReaderOptions const &options)
Definition: cif_reader.cc:95
A class that contains information for individual atoms.
static THREAD_LOCAL basic::Tracer TR("core.io.mmcif.cif_reader")
rosetta project type declarations
static bool is_valid_sugar_code(std::string const &code)
Is the given 3-letter code a valid Rosetta/IUPAC code for carbohydrates?
Information stored in the header records http://www.wwpdb.org/documentation/format32/sect2.html HEADER PEPTIDASE 13-JAN-98 1A2Z.
std::string strip_whitespace(std::string const &name)
Real C() const
Definition: CrystInfo.hh:51
Inter-residue chemical bond connection point class declaration.
Information stored in the HEADER record in the PDB format.
static THREAD_LOCAL basic::Tracer TR("core.io.pdb.HeaderInformation")
Real alpha() const
Definition: CrystInfo.hh:54
Real B() const
Definition: CrystInfo.hh:49
Real beta() const
Definition: CrystInfo.hh:56
platform::uint uint
Definition: types.hh:32
Method declarations and simple accessor definitions for the Residue class.
A class for defining atom parameters, known as atom_types.
A structure for storing information from .pdb SSBOND records.
std::string spacegroup() const
Definition: CrystInfo.hh:61
utility::pointer::shared_ptr< CifFile > CifFileOP
Definition: cif_reader.cc:73
bool isHet
For now, all member names have the same names as fields in PDB standard.
conformation container
utility::pointer::shared_ptr< StructFileRep > StructFileRepOP
Definitions for the Field data structures and related helper function declarations.
Pose class.