28 #include <utility/io/ozstream.hh>
29 #include <utility/string_util.hh>
30 #include <utility/io/izstream.hh>
31 #include <numeric/random/random.hh>
48 #include <basic/options/option.hh>
49 #include <basic/options/keys/chemical.OptionKeys.gen.hh>
50 #include <basic/options/keys/run.OptionKeys.gen.hh>
51 #include <basic/options/keys/in.OptionKeys.gen.hh>
52 #include <basic/options/keys/mp.OptionKeys.gen.hh>
53 #include <basic/options/keys/inout.OptionKeys.gen.hh>
54 #include <basic/options/keys/out.OptionKeys.gen.hh>
55 #include <basic/options/keys/packing.OptionKeys.gen.hh>
56 #include <basic/Tracer.hh>
59 #include <numeric/xyzVector.hh>
62 #include <utility/string_constants.hh>
63 #include <utility/vector1.hh>
64 #include <utility/tools/make_map.hh>
67 #include <numeric/xyzVector.hh>
70 #include <ObjexxFCL/string.functions.hh>
71 #include <ObjexxFCL/format.hh>
72 #include <cifparse/CifFile.h>
73 typedef utility::pointer::shared_ptr< CifFile >
CifFileOP;
81 static THREAD_LOCAL basic::Tracer
TR(
"core.io.mmcif.cif_reader" );
102 std::map<char, ChainAtoms> atom_chain_map;
103 std::vector< char > chain_list;
104 std::map<char, Size> chain_to_idx;
106 std::map<std::pair<Size, Size>,
char> modelchain_to_chain;
107 std::string const & chain_letters( utility::UPPERCASE_ALPHANUMERICS );
108 for (
Size i = 0; i < chain_letters.size(); ++i ) {
109 modelchain_to_chain[std::pair<Size, Size>(0, i)] = chain_letters[i];
110 modelchain_to_chain[std::pair<Size, Size>(1, i)] = chain_letters[i];
113 bool modeltags_present =
false;
120 Block& block = cifFile->GetBlock( cifFile->GetFirstBlockName() );
123 if ( read_pdb_header ) {
124 if ( block.IsTablePresent(
"citation" ) ) {
125 ISTable& citation = block.GetTable(
"citation" );
127 sfr->header()->store_title( citation( 0,
"title" ) );
130 if ( block.IsTablePresent(
"entry" ) ) {
131 ISTable& entry = block.GetTable(
"entry" );
132 sfr->header()->store_idCode( entry( 0,
"id" ) );
135 if ( block.IsTablePresent(
"entity" ) ) {
136 ISTable& entity = block.GetTable(
"entity");
137 for (
Size i = 0; i <= entity.GetLastRowIndex(); ++i ) {
138 sfr->header()->store_compound( entity( i,
"pdbx_description" ) );
142 if ( block.IsTablePresent(
"keywords" ) ) {
143 ISTable& keywords = block.GetTable(
"struct_keywords" );
144 sfr->header()->store_classification( keywords( 0,
"pdbx_keywords" ) );
145 sfr->header()->store_keywords( keywords( 0,
"text" ) );
148 if ( block.IsTablePresent(
"database_PDB_rev" ) ) {
149 ISTable& database_PDB_rev = block.GetTable(
"database_PDB_rev" );
150 sfr->header()->store_deposition_date( database_PDB_rev( 0,
"date_original" ) );
153 if ( block.IsTablePresent(
"exptl" ) ) {
154 ISTable& exptl = block.GetTable(
"exptl" );
155 sfr->header()->store_experimental_techniques( exptl( 0,
"method" ) );
158 sfr->header()->finalize_parse();
166 if ( block.IsTablePresent(
"chem_comp" ) ) {
167 ISTable& chem_comp = block.GetTable(
"chem_comp");
168 for (
Size i = 0; i <= chem_comp.GetLastRowIndex(); ++i ) {
171 string const & hetID( chem_comp( i,
"id" ) );
179 bool record_found(
false );
180 Size const n_heterogen_names( sfr->heterogen_names().size() );
181 for (
uint i( 1 ); i <= n_heterogen_names; ++i ) {
183 if ( hetID == sfr->heterogen_names()[ i ].first ) {
184 sfr->heterogen_names()[ i ].second.append( ObjexxFCL::rstripped_whitespace( name ) );
189 if ( ! record_found ) {
192 sfr->heterogen_names().push_back( make_pair( hetID, name ) );
199 if ( block.IsTablePresent(
"struct_conn" ) ) {
200 ISTable& struct_conn = block.GetTable(
"struct_conn");
201 for (
Size i = 0; i < struct_conn.GetLastRowIndex(); ++i ) {
203 if ( struct_conn( i,
"conn_type_id" ) ==
"disulf" ) {
205 utility::vector1< SSBondInformation > ssbonds;
211 ssbond.
resName1 = struct_conn( i,
"ptnr1_label_comp_id" );
212 ssbond.
chainID1 = struct_conn( i,
"ptnr1_label_asym_id" )[0];
213 ssbond.
resSeq1 = atof( struct_conn( i,
"ptnr1_label_seq_id" ).c_str() );
214 ssbond.
iCode1 = struct_conn( i,
"pdbx_ptnr1_PDB_ins_code" )[0] ==
'?' ?
' ' : struct_conn( i,
"pdbx_ptnr1_PDB_ins_code" )[0];
219 ssbond.
resName2 = struct_conn( i,
"ptnr2_label_comp_id" );
220 ssbond.
chainID2 = struct_conn( i,
"ptnr2_label_asym_id" )[0];
221 ssbond.
resSeq2 = atof( struct_conn( i,
"ptnr2_label_seq_id" ).c_str() );
222 ssbond.
iCode2 = struct_conn( i,
"pdbx_ptnr2_PDB_ins_code" )[0] ==
'?' ?
' ' : struct_conn( i,
"pdbx_ptnr2_PDB_ins_code" )[0];
226 ssbond.
length = atof( struct_conn( i,
"pdbx_dist_value" ).c_str() );
229 if ( sfr->ssbond_map().count( ssbond.
resID1 ) ) {
230 ssbonds = sfr->ssbond_map()[ ssbond.
resID1 ];
232 ssbonds.push_back( ssbond );
234 sfr->ssbond_map()[ ssbond.
resID1 ] = ssbonds;
236 if (
TR.Debug.visible() ) {
237 TR.Debug <<
"SSBOND record information stored successfully." << std::endl;
242 utility::vector1< LinkInformation > links;
247 link.
name1 = struct_conn( i,
"ptnr1_label_atom_id" );
248 link.
resName1 = struct_conn( i,
"ptnr1_label_comp_id" );
249 link.
chainID1 = struct_conn( i,
"ptnr1_label_asym_id" )[0];
250 link.
resSeq1 = atof( struct_conn( i,
"ptnr1_label_seq_id" ).c_str() );
251 link.
iCode1 = struct_conn( i,
"pdbx_ptnr1_PDB_ins_code" )[0] ==
'?' ?
' ' : struct_conn( i,
"pdbx_ptnr1_PDB_ins_code" )[0];
255 link.
name2 = struct_conn( i,
"ptnr2_label_atom_id" );
256 link.
resName2 = struct_conn( i,
"ptnr2_label_comp_id" );
257 link.
chainID2 = struct_conn( i,
"ptnr2_label_asym_id" )[0];
258 link.
resSeq2 = atof( struct_conn( i,
"ptnr2_label_seq_id" ).c_str() );
259 link.
iCode2 = struct_conn( i,
"pdbx_ptnr2_PDB_ins_code" )[0] ==
'?' ?
' ' : struct_conn( i,
"pdbx_ptnr2_PDB_ins_code" )[0];
263 link.
length = atof( struct_conn( i,
"pdbx_dist_value" ).c_str() );
266 if ( sfr->link_map().count( link.
resID1 ) ) {
267 links = sfr->link_map()[ link.
resID1 ];
269 links.push_back( link );
271 sfr->link_map()[ link.
resID1 ] = links;
273 if (
TR.Debug.visible() ) {
274 TR.Debug <<
"LINK record information stored successfully." << std::endl;
281 if ( block.IsTablePresent(
"cell" ) ) {
282 ISTable& cell = block.GetTable(
"cell");
284 ci.
A( atof( cell(0,
"length_a" ).c_str() ) );
285 ci.
B( atof( cell(0,
"length_b" ).c_str() ) );
286 ci.
C( atof( cell(0,
"length_c" ).c_str() ) );
287 ci.
alpha( atof( cell(0,
"angle_alpha" ).c_str() ) );
288 ci.
beta( atof( cell(0,
"angle_beta" ).c_str() ) );
289 ci.
gamma( atof( cell(0,
"angle_gamma" ).c_str() ) );
290 ISTable& symmetry = block.GetTable(
"symmetry");
291 ci.
spacegroup( symmetry( 0,
"space_group_name_H-M" ) );
292 sfr->crystinfo() = ci;
298 if ( block.IsTablePresent(
"atom_site" ) ) {
299 ISTable& atom_site = block.GetTable(
"atom_site");
300 for (
Size i = 0; i <= atom_site.GetLastRowIndex(); ++i ) {
303 std::string temp_model = atom_site( i,
"pdbx_PDB_model_num" );
307 if ( last_model !=
"" && last_model != temp_model ) {
310 last_model = temp_model;
313 sfr->modeltag() = temp_model;
315 if ( modeltags_present ) {
321 for (
Size model_idx=2; model_idx*chain_to_idx.size()<chain_letters.size(); ++model_idx ) {
322 for (
Size chain_idx=1; chain_idx <= chain_to_idx.size(); ++chain_idx ) {
323 TR <<
"REARRANGE CHAINS " << model_idx <<
" " << chain_idx <<
" ";
324 TR << (model_idx-1)*chain_to_idx.size()+chain_idx << std::endl;
325 modelchain_to_chain[std::pair<Size, Size>(model_idx, chain_idx)] =
326 chain_letters[(model_idx-1)*chain_to_idx.size() + chain_idx - 1];
330 if ( modelidx > 8 ) utility_exit_with_message(
"quitting: too many MODELs");
332 modeltags_present =
true;
337 ai.
isHet = ( atom_site( i,
"group_PDB" ) ==
"HETATM" );
338 ai.
serial = atoi( atom_site( i,
"id" ).c_str() );
339 ai.
name = atom_site( i,
"auth_atom_id" );
341 if ( atom_site( i,
"label_alt_id" ).size() > 0 ) {
342 ai.
altLoc = atom_site( i,
"label_alt_id" )[ 0 ];
345 ai.
resName = atom_site( i,
"auth_comp_id" );
347 if ( atom_site( i,
"auth_asym_id" ).size() > 0 ) ai.
chainID = atom_site( i,
"auth_asym_id" )[0];
349 if ( atom_site( i,
"auth_asym_id" ).size() > 0 ) {
350 char chainid = atom_site( i,
"auth_asym_id" )[0];
351 if ( chain_to_idx.find(chainid) == chain_to_idx.end() ) {
352 chain_to_idx[chainid] = chain_to_idx.size()+1;
353 TR <<
"found new chain " << chainid <<
" " << chain_to_idx.size() << std::endl;
355 ai.
chainID = modelchain_to_chain[std::pair<Size, Size>(modelidx, chain_to_idx[chainid])];
359 ai.
resSeq = atoi( atom_site( i,
"auth_seq_id" ).c_str() );
361 if ( atom_site( i,
"pdbx_PDB_ins_code" ).size() > 0 && atom_site( i,
"pdbx_PDB_ins_code" )[0] !=
'?' ) ai.
iCode = atom_site( i,
"pdbx_PDB_ins_code" )[0];
364 bool force_no_occupancy =
false;
365 if ( atom_site( i,
"Cartn_x" ) ==
" nan" ) {
367 force_no_occupancy=
true;
369 ai.
x = atof( atom_site( i,
"Cartn_x" ).c_str() );
371 if ( atom_site( i,
"Cartn_y" ) ==
" nan" ) {
373 force_no_occupancy=
true;
375 ai.
y = atof( atom_site( i,
"Cartn_y" ).c_str() );
377 if ( atom_site( i,
"Cartn_z" ) ==
" nan" ) {
379 force_no_occupancy=
true;
381 ai.
z = atof( atom_site( i,
"Cartn_z" ).c_str() );
386 if ( atom_site( i,
"occupancy" ) ==
" " ) {
389 ai.
occupancy = atof( atom_site( i,
"occupancy" ).c_str() );
391 if ( force_no_occupancy ) ai.
occupancy = -1.0;
393 ai.
temperature = atof( atom_site( i,
"B_iso_or_equiv" ).c_str() );
395 ai.
element = atom_site( i,
"type_symbol" );
398 atom_chain_map[ai.
chainID].push_back(ai);
399 if ( std::find( chain_list.begin(), chain_list.end(), ai.
chainID ) == chain_list.end() ) {
400 chain_list.push_back( ai.
chainID );
406 for (
Size i=0; i< chain_list.size(); ++i ) {
407 sfr->chains().push_back( atom_chain_map.find( chain_list[i] )->second );
void store_base_residue_type_name_in_sfr(std::string const &text_field, StructFileRep &sfr)
Parse .pdb HETNAM text field to extract full resID and convert into SFR data.
StructFileRep class. Hold data created from PDB file.
pose information so it's not loose in the pose
utility::pointer::shared_ptr< CifFile > CifFileOP
Declarations for StructFileRep and related classes.
utility::pointer::shared_ptr< HeaderInformation > HeaderInformationOP
Method declarations for CarbohydrateInfoManager.
bool read_pdb_header() const
Function declarations for reading of .pdb files.
StructFileRepOP create_sfr_from_cif_file_op(CifFileOP cifFile, StructFileReaderOptions const &options)
static THREAD_LOCAL basic::Tracer TR("core.io.mmcif.cif_reader")
rosetta project type declarations
static bool is_valid_sugar_code(std::string const &code)
Is the given 3-letter code a valid Rosetta/IUPAC code for carbohydrates?
bool new_chain_order() const
std::string strip_whitespace(std::string const &name)
Inter-residue chemical bond connection point class declaration.
static THREAD_LOCAL basic::Tracer TR("core.io.pdb.HeaderInformation")
Method declarations and simple accessor definitions for the Residue class.
A class for defining atom parameters, known as atom_types.
std::string spacegroup() const
utility::pointer::shared_ptr< CifFile > CifFileOP
utility::pointer::shared_ptr< StructFileRep > StructFileRepOP
Definitions for the Field data structures and related helper function declarations.